From 9edef96106c6e9e85a935a81137e0ea76d129f2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 21 Aug 2014 16:14:47 +0200 Subject: [PATCH] Sync with native openssl-1.0.1i-3.fc21 Fixes various flaws (RHBZ#1096234 and RHBZ#1127705) CVE-2014-3505 CVE-2014-3506 CVE-2014-3507 CVE-2014-3511 CVE-2014-3510 CVE-2014-3508 CVE-2014-3509 CVE-2014-0221 CVE-2014-0198 CVE-2014-0224 CVE-2014-0195 CVE-2010-5298 CVE-2014-3470 --- .gitignore | 1 + mingw-openssl.spec | 66 +- openssl-1.0.0c-fips-md5-allow.patch | 20 - openssl-1.0.0e-doc-noeof.patch | 23 - openssl-1.0.1-beta2-ssl-op-all.patch | 21 - openssl-1.0.1e-3des-strength.patch | 171 - openssl-1.0.1e-backports.patch | 345 - openssl-1.0.1e-bad-mac.patch | 33 - openssl-1.0.1e-cve-2013-4353.patch | 21 - openssl-1.0.1e-cve-2013-6449.patch | 111 - openssl-1.0.1e-cve-2013-6450.patch | 85 - openssl-1.0.1e-fips-ec.patch | 2 +- openssl-1.0.1e-manfix.patch | 555 -- openssl-1.0.1e-ppc-asm-update.patch | 6664 +++++++++++++++++ openssl-1.0.1e-ppc64le-target.patch | 10 - openssl-1.0.1e-req-keylen.patch | 38 - ...ild.patch => openssl-1.0.1e-rpmbuild.patch | 22 +- ...1e-fips.patch => openssl-1.0.1g-fips.patch | 797 +- openssl-1.0.1h-disable-sslv2v3.patch | 13 + ...ps.patch => openssl-1.0.1h-ipv6-apps.patch | 59 +- openssl-1.0.1h-system-cipherlist.patch | 289 + ...doc.patch => openssl-1.0.1i-algo-doc.patch | 18 +- openssl-1.0.1i-manfix.patch | 86 + ...atch => openssl-1.0.1i-new-fips-reqs.patch | 627 +- openssl-1.0.1i-ppc-asm-update.patch | 6636 ++++++++++++++++ ...atch => openssl-1.0.1i-trusted-first.patch | 134 +- openssl.git-96db902.patch | 108 - sources | 2 +- 28 files changed, 14373 insertions(+), 2584 deletions(-) delete mode 100644 openssl-1.0.0c-fips-md5-allow.patch delete mode 100644 openssl-1.0.0e-doc-noeof.patch delete mode 100644 openssl-1.0.1-beta2-ssl-op-all.patch delete mode 100644 openssl-1.0.1e-3des-strength.patch delete mode 100644 openssl-1.0.1e-backports.patch delete mode 100644 openssl-1.0.1e-bad-mac.patch delete mode 100644 openssl-1.0.1e-cve-2013-4353.patch delete mode 100644 openssl-1.0.1e-cve-2013-6449.patch delete mode 100644 openssl-1.0.1e-cve-2013-6450.patch delete mode 100644 openssl-1.0.1e-manfix.patch create mode 100644 openssl-1.0.1e-ppc-asm-update.patch delete mode 100644 openssl-1.0.1e-ppc64le-target.patch delete mode 100644 openssl-1.0.1e-req-keylen.patch rename openssl-1.0.1-beta2-rpmbuild.patch => openssl-1.0.1e-rpmbuild.patch (91%) rename openssl-1.0.1e-fips.patch => openssl-1.0.1g-fips.patch (95%) create mode 100644 openssl-1.0.1h-disable-sslv2v3.patch rename openssl-1.0.1c-ipv6-apps.patch => openssl-1.0.1h-ipv6-apps.patch (86%) create mode 100644 openssl-1.0.1h-system-cipherlist.patch rename openssl-1.0.1a-algo-doc.patch => openssl-1.0.1i-algo-doc.patch (80%) create mode 100644 openssl-1.0.1i-manfix.patch rename openssl-1.0.1e-new-fips-reqs.patch => openssl-1.0.1i-new-fips-reqs.patch (73%) create mode 100644 openssl-1.0.1i-ppc-asm-update.patch rename openssl-1.0.1e-trusted-first.patch => openssl-1.0.1i-trusted-first.patch (67%) delete mode 100644 openssl.git-96db902.patch diff --git a/.gitignore b/.gitignore index b9433ec..f36a459 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ openssl-1.0.0a-usa.tar.bz2 /openssl-1.0.1c-usa.tar.xz /openssl-1.0.1e-usa.tar.xz /openssl-1.0.1e-hobbled.tar.xz +/openssl-1.0.1i-hobbled.tar.xz diff --git a/mingw-openssl.spec b/mingw-openssl.spec index 2b67928..c089dbc 100644 --- a/mingw-openssl.spec +++ b/mingw-openssl.spec @@ -23,8 +23,8 @@ %global thread_test_threads %{?threads:%{threads}}%{!?threads:1} Name: mingw-openssl -Version: 1.0.1e -Release: 7%{?dist} +Version: 1.0.1i +Release: 1%{?dist} Summary: MinGW port of the OpenSSL toolkit License: OpenSSL @@ -48,7 +48,7 @@ Source12: ec_curve.c Source13: ectest.c # Build changes -Patch1: openssl-1.0.1-beta2-rpmbuild.patch +Patch1: openssl-1.0.1e-rpmbuild.patch Patch2: openssl-1.0.1e-defaults.patch Patch4: openssl-1.0.0-beta5-enginesdir.patch Patch5: openssl-0.9.8a-no-rpath.patch @@ -56,7 +56,8 @@ Patch6: openssl-0.9.8b-test-use-localhost.patch Patch7: openssl-1.0.0-timezone.patch Patch8: openssl-1.0.1c-perlfind.patch Patch9: openssl-1.0.1c-aliasing.patch -Patch10: openssl-1.0.1e-ppc64le-target.patch +# This patch must be applied first +Patch10: openssl-1.0.1i-ppc-asm-update.patch # Bug fixes Patch23: openssl-1.0.1c-default-paths.patch Patch24: openssl-1.0.1e-issuer-hash.patch @@ -64,13 +65,11 @@ Patch24: openssl-1.0.1e-issuer-hash.patch Patch33: openssl-1.0.0-beta4-ca-dir.patch Patch34: openssl-0.9.6-x509.patch Patch35: openssl-0.9.8j-version-add-engines.patch -Patch36: openssl-1.0.0e-doc-noeof.patch -Patch38: openssl-1.0.1-beta2-ssl-op-all.patch -Patch39: openssl-1.0.1c-ipv6-apps.patch -Patch40: openssl-1.0.1e-fips.patch +Patch39: openssl-1.0.1h-ipv6-apps.patch +Patch40: openssl-1.0.1g-fips.patch Patch45: openssl-1.0.1e-env-zlib.patch Patch47: openssl-1.0.0-beta5-readme-warning.patch -Patch49: openssl-1.0.1a-algo-doc.patch +Patch49: openssl-1.0.1i-algo-doc.patch Patch50: openssl-1.0.1-beta2-dtls1-abi.patch Patch51: openssl-1.0.1e-version.patch Patch56: openssl-1.0.0c-rsa-x931.patch @@ -82,28 +81,21 @@ Patch66: openssl-1.0.1-pkgconfig-krb5.patch Patch68: openssl-1.0.1e-secure-getenv.patch Patch69: openssl-1.0.1c-dh-1024.patch Patch70: openssl-1.0.1e-fips-ec.patch -Patch71: openssl-1.0.1e-manfix.patch +Patch71: openssl-1.0.1i-manfix.patch Patch72: openssl-1.0.1e-fips-ctor.patch Patch73: openssl-1.0.1e-ecc-suiteb.patch Patch74: openssl-1.0.1e-no-md5-verify.patch Patch75: openssl-1.0.1e-compat-symbols.patch -Patch76: openssl-1.0.1e-new-fips-reqs.patch +Patch76: openssl-1.0.1i-new-fips-reqs.patch Patch77: openssl-1.0.1e-weak-ciphers.patch -Patch78: openssl-1.0.1e-3des-strength.patch -Patch79: openssl-1.0.1e-req-keylen.patch +Patch90: openssl-1.0.1e-enc-fail.patch +Patch92: openssl-1.0.1h-system-cipherlist.patch +Patch93: openssl-1.0.1h-disable-sslv2v3.patch # Backported fixes including security fixes Patch81: openssl-1.0.1-beta2-padlock64.patch -Patch82: openssl-1.0.1e-backports.patch -Patch83: openssl-1.0.1e-bad-mac.patch -Patch84: openssl-1.0.1e-trusted-first.patch +Patch84: openssl-1.0.1i-trusted-first.patch Patch85: openssl-1.0.1e-arm-use-elf-auxv-caps.patch -Patch86: openssl-1.0.1e-cve-2013-6449.patch -Patch87: openssl-1.0.1e-cve-2013-6450.patch -Patch88: openssl-1.0.1e-cve-2013-4353.patch Patch89: openssl-1.0.1e-ephemeral-key-size.patch -Patch90: openssl-1.0.1e-enc-fail.patch -# upstream patch for CVE-2014-0160 -Patch100: openssl.git-96db902.patch # MinGW-specific patches. # Rename *eay32.dll to lib*.dll @@ -217,6 +209,7 @@ Static version of the MinGW port of the OpenSSL toolkit. cp %{SOURCE12} %{SOURCE13} crypto/ec/ +%patch10 -p1 -b .ppc-asm %patch1 -p1 -b .rpmbuild %patch2 -p1 -b .defaults %patch4 -p1 -b .enginesdir %{?_rawbuild} @@ -225,7 +218,6 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/ %patch7 -p1 -b .timezone %patch8 -p1 -b .perlfind %{?_rawbuild} %patch9 -p1 -b .aliasing -%patch10 -p1 -b .ppc64le %patch23 -p1 -b .default-paths %patch24 -p1 -b .issuer-hash @@ -233,8 +225,6 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/ %patch33 -p1 -b .ca-dir %patch34 -p1 -b .x509 %patch35 -p1 -b .version-add-engines -%patch36 -p1 -b .doc-noeof -%patch38 -p1 -b .op-all #patch39 -p1 -b .ipv6-apps %patch40 -p1 -b .fips %patch45 -p1 -b .env-zlib @@ -251,27 +241,21 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/ #patch68 -p1 -b .secure-getenv %patch69 -p1 -b .dh1024 #patch70 -p1 -b .fips-ec +%patch71 -p1 -b .manfix #patch72 -p1 -b .fips-ctor %patch73 -p1 -b .suiteb #patch74 -p1 -b .no-md5-verify %patch75 -p1 -b .compat #patch76 -p1 -b .fips-reqs %patch77 -p1 -b .weak-ciphers -%patch78 -p1 -b .3des-strength -%patch79 -p1 -b .keylen +%patch90 -p1 -b .enc-fail +%patch92 -p1 -b .system +%patch93 -p1 -b .v2v3 %patch81 -p1 -b .padlock64 -%patch82 -p1 -b .backports -%patch71 -p1 -b .manfix -%patch83 -p1 -b .bad-mac %patch84 -p1 -b .trusted-first %patch85 -p1 -b .armcap -%patch86 -p1 -b .hash-crash -%patch87 -p1 -b .dtls1-mitm -%patch88 -p1 -b .handshake-crash -#%patch89 -p1 -b .ephemeral -%patch90 -p1 -b .enc-fail -%patch100 -p1 -b .CVE-2014-0160 +#patch89 -p1 -b .ephemeral # MinGW specific patches %patch101 -p1 -b .mingw-libversion @@ -516,6 +500,14 @@ mkdir -m700 $RPM_BUILD_ROOT%{mingw64_sysconfdir}/pki/CA/private %changelog +* Thu Aug 21 2014 Marc-AndrĂ© Lureau - 1.0.1i-1 +- Synced with native openssl-1.0.1i-3.fc21 +- Fixes various flaws (RHBZ#1096234 and RHBZ#1127705) + CVE-2014-3505 CVE-2014-3506 CVE-2014-3507 CVE-2014-3511 + CVE-2014-3510 CVE-2014-3508 CVE-2014-3509 CVE-2014-0221 + CVE-2014-0198 CVE-2014-0224 CVE-2014-0195 CVE-2010-5298 + CVE-2014-3470 + * Sat Jun 07 2014 Fedora Release Engineering - 1.0.1e-7 - Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild @@ -612,7 +604,7 @@ mkdir -m700 $RPM_BUILD_ROOT%{mingw64_sysconfdir}/pki/CA/private * Sun Aug 30 2009 Erik van Pienbroek - 1.0.0-0.2.beta3 - Fixed invalid RPM Provides - + * Fri Aug 28 2009 Erik van Pienbroek - 1.0.0-0.1.beta3 - Update to version 1.0.0 beta 3 - Use %%global instead of %%define diff --git a/openssl-1.0.0c-fips-md5-allow.patch b/openssl-1.0.0c-fips-md5-allow.patch deleted file mode 100644 index f9f5e5d..0000000 --- a/openssl-1.0.0c-fips-md5-allow.patch +++ /dev/null @@ -1,20 +0,0 @@ -diff -up openssl-1.0.0c/crypto/md5/md5_dgst.c.md5-allow openssl-1.0.0c/crypto/md5/md5_dgst.c ---- openssl-1.0.0c/crypto/md5/md5_dgst.c.md5-allow 2011-02-03 19:53:28.000000000 +0100 -+++ openssl-1.0.0c/crypto/md5/md5_dgst.c 2011-02-03 20:33:14.000000000 +0100 -@@ -75,7 +75,15 @@ const char MD5_version[]="MD5" OPENSSL_V - #define INIT_DATA_C (unsigned long)0x98badcfeL - #define INIT_DATA_D (unsigned long)0x10325476L - --FIPS_NON_FIPS_MD_Init(MD5) -+int MD5_Init(MD5_CTX *c) -+#ifdef OPENSSL_FIPS -+ { -+ if (FIPS_mode() && getenv("OPENSSL_FIPS_NON_APPROVED_MD5_ALLOW") == NULL) -+ FIPS_BAD_ALGORITHM(alg) -+ return private_MD5_Init(c); -+ } -+int private_MD5_Init(MD5_CTX *c) -+#endif - { - memset (c,0,sizeof(*c)); - c->A=INIT_DATA_A; diff --git a/openssl-1.0.0e-doc-noeof.patch b/openssl-1.0.0e-doc-noeof.patch deleted file mode 100644 index 9686575..0000000 --- a/openssl-1.0.0e-doc-noeof.patch +++ /dev/null @@ -1,23 +0,0 @@ -diff -up openssl-1.0.0e/doc/apps/s_client.pod.doc-noeof openssl-1.0.0e/doc/apps/s_client.pod ---- openssl-1.0.0e/doc/apps/s_client.pod.doc-noeof 2009-06-26 13:28:51.000000000 +0200 -+++ openssl-1.0.0e/doc/apps/s_client.pod 2011-11-03 08:30:35.000000000 +0100 -@@ -27,6 +27,7 @@ B B - [B<-nbio>] - [B<-crlf>] - [B<-ign_eof>] -+[B<-no_ign_eof>] - [B<-quiet>] - [B<-ssl2>] - [B<-ssl3>] -@@ -161,6 +162,11 @@ by some servers. - inhibit shutting down the connection when end of file is reached in the - input. - -+=item B<-no_ign_eof> -+ -+shut down the connection when end of file is reached in the -+input. Can be used to override the implicit B<-ign_eof> after B<-quiet>. -+ - =item B<-quiet> - - inhibit printing of session and certificate information. This implicitly diff --git a/openssl-1.0.1-beta2-ssl-op-all.patch b/openssl-1.0.1-beta2-ssl-op-all.patch deleted file mode 100644 index 3259d8c..0000000 --- a/openssl-1.0.1-beta2-ssl-op-all.patch +++ /dev/null @@ -1,21 +0,0 @@ -diff -up openssl-1.0.1-beta2/ssl/ssl.h.op-all openssl-1.0.1-beta2/ssl/ssl.h ---- openssl-1.0.1-beta2/ssl/ssl.h.op-all 2012-02-02 12:49:00.828035916 +0100 -+++ openssl-1.0.1-beta2/ssl/ssl.h 2012-02-02 12:52:27.297818182 +0100 -@@ -540,7 +540,7 @@ struct ssl_session_st - #define SSL_OP_NETSCAPE_CHALLENGE_BUG 0x00000002L - /* Allow initial connection to servers that don't support RI */ - #define SSL_OP_LEGACY_SERVER_CONNECT 0x00000004L --#define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L -+#define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L /* no effect since 1.0.0c due to CVE-2010-4180 */ - #define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x00000010L - #define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x00000020L - #define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x00000040L /* no effect since 0.9.7h and 0.9.8b */ -@@ -558,7 +558,7 @@ struct ssl_session_st - - /* SSL_OP_ALL: various bug workarounds that should be rather harmless. - * This used to be 0x000FFFFFL before 0.9.7. */ --#define SSL_OP_ALL 0x80000BFFL -+#define SSL_OP_ALL 0x80000BF7L /* we still have to include SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS */ - - /* DTLS options */ - #define SSL_OP_NO_QUERY_MTU 0x00001000L diff --git a/openssl-1.0.1e-3des-strength.patch b/openssl-1.0.1e-3des-strength.patch deleted file mode 100644 index 7375b47..0000000 --- a/openssl-1.0.1e-3des-strength.patch +++ /dev/null @@ -1,171 +0,0 @@ -Although the real strength is rather 112 bits we use 128 here as -we do not want to sort it behind more obscure ciphers. -AES-128 is preferred anyway. -diff -up openssl-1.0.1e/ssl/s2_lib.c.3des-strength openssl-1.0.1e/ssl/s2_lib.c ---- openssl-1.0.1e/ssl/s2_lib.c.3des-strength 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/s2_lib.c 2014-01-22 16:32:45.791700322 +0100 -@@ -250,7 +250,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_cip - SSL_SSLV2, - SSL_NOT_EXP|SSL_HIGH, - 0, -- 168, -+ 128, - 168, - }, - -diff -up openssl-1.0.1e/ssl/s3_lib.c.3des-strength openssl-1.0.1e/ssl/s3_lib.c ---- openssl-1.0.1e/ssl/s3_lib.c.3des-strength 2014-01-17 11:41:11.000000000 +0100 -+++ openssl-1.0.1e/ssl/s3_lib.c 2014-01-22 16:31:14.713666777 +0100 -@@ -328,7 +328,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_SSLV3, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -377,7 +377,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_SSLV3, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -425,7 +425,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_SSLV3, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -474,7 +474,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_SSLV3, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -522,7 +522,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_SSLV3, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -602,7 +602,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_SSLV3, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -687,7 +687,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_SSLV3, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -751,7 +751,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_SSLV3, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -1685,7 +1685,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -2062,7 +2062,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -2142,7 +2142,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -2222,7 +2222,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -2302,7 +2302,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -2382,7 +2382,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -2432,7 +2432,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -2448,7 +2448,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - -@@ -2464,7 +2464,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, -- 168, -+ 128, - 168, - }, - diff --git a/openssl-1.0.1e-backports.patch b/openssl-1.0.1e-backports.patch deleted file mode 100644 index abe3017..0000000 --- a/openssl-1.0.1e-backports.patch +++ /dev/null @@ -1,345 +0,0 @@ -diff -up openssl-1.0.1e/crypto/pem/pem_info.c.backports openssl-1.0.1e/crypto/pem/pem_info.c ---- openssl-1.0.1e/crypto/pem/pem_info.c.backports 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/pem/pem_info.c 2013-08-16 15:31:35.726003892 +0200 -@@ -167,6 +167,7 @@ start: - #ifndef OPENSSL_NO_RSA - if (strcmp(name,PEM_STRING_RSA) == 0) - { -+ d2i=(D2I_OF(void))d2i_RSAPrivateKey; - if (xi->x_pkey != NULL) - { - if (!sk_X509_INFO_push(ret,xi)) goto err; -diff -up openssl-1.0.1e/crypto/rsa/rsa_pmeth.c.backports openssl-1.0.1e/crypto/rsa/rsa_pmeth.c ---- openssl-1.0.1e/crypto/rsa/rsa_pmeth.c.backports 2013-08-16 15:31:35.697003256 +0200 -+++ openssl-1.0.1e/crypto/rsa/rsa_pmeth.c 2013-08-16 15:33:37.770673918 +0200 -@@ -582,6 +582,8 @@ static int pkey_rsa_ctrl_str(EVP_PKEY_CT - pm = RSA_NO_PADDING; - else if (!strcmp(value, "oeap")) - pm = RSA_PKCS1_OAEP_PADDING; -+ else if (!strcmp(value, "oaep")) -+ pm = RSA_PKCS1_OAEP_PADDING; - else if (!strcmp(value, "x931")) - pm = RSA_X931_PADDING; - else if (!strcmp(value, "pss")) -diff -up openssl-1.0.1e/crypto/x509/x509_vfy.c.backports openssl-1.0.1e/crypto/x509/x509_vfy.c ---- openssl-1.0.1e/crypto/x509/x509_vfy.c.backports 2013-08-16 15:31:35.721003782 +0200 -+++ openssl-1.0.1e/crypto/x509/x509_vfy.c 2013-08-16 15:31:35.726003892 +0200 -@@ -696,6 +696,7 @@ static int check_cert(X509_STORE_CTX *ct - X509_CRL *crl = NULL, *dcrl = NULL; - X509 *x; - int ok, cnum; -+ unsigned int last_reasons; - cnum = ctx->error_depth; - x = sk_X509_value(ctx->chain, cnum); - ctx->current_cert = x; -@@ -704,6 +705,7 @@ static int check_cert(X509_STORE_CTX *ct - ctx->current_reasons = 0; - while (ctx->current_reasons != CRLDP_ALL_REASONS) - { -+ last_reasons = ctx->current_reasons; - /* Try to retrieve relevant CRL */ - if (ctx->get_crl) - ok = ctx->get_crl(ctx, &crl, x); -@@ -747,6 +749,15 @@ static int check_cert(X509_STORE_CTX *ct - X509_CRL_free(dcrl); - crl = NULL; - dcrl = NULL; -+ /* If reasons not updated we wont get anywhere by -+ * another iteration, so exit loop. -+ */ -+ if (last_reasons == ctx->current_reasons) -+ { -+ ctx->error = X509_V_ERR_UNABLE_TO_GET_CRL; -+ ok = ctx->verify_cb(0, ctx); -+ goto err; -+ } - } - err: - X509_CRL_free(crl); -diff -up openssl-1.0.1e/crypto/x509/x_all.c.backports openssl-1.0.1e/crypto/x509/x_all.c ---- openssl-1.0.1e/crypto/x509/x_all.c.backports 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/x509/x_all.c 2013-08-16 15:33:25.247399940 +0200 -@@ -97,6 +97,7 @@ int X509_sign(X509 *x, EVP_PKEY *pkey, c - - int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx) - { -+ x->cert_info->enc.modified = 1; - return ASN1_item_sign_ctx(ASN1_ITEM_rptr(X509_CINF), - x->cert_info->signature, - x->sig_alg, x->signature, x->cert_info, ctx); -@@ -123,6 +124,7 @@ int X509_CRL_sign(X509_CRL *x, EVP_PKEY - - int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx) - { -+ x->crl->enc.modified = 1; - return ASN1_item_sign_ctx(ASN1_ITEM_rptr(X509_CRL_INFO), - x->crl->sig_alg, x->sig_alg, x->signature, x->crl, ctx); - } -diff -up openssl-1.0.1e/doc/crypto/X509_STORE_CTX_get_error.pod.backports openssl-1.0.1e/doc/crypto/X509_STORE_CTX_get_error.pod ---- openssl-1.0.1e/doc/crypto/X509_STORE_CTX_get_error.pod.backports 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/doc/crypto/X509_STORE_CTX_get_error.pod 2013-08-16 15:31:35.727003914 +0200 -@@ -278,6 +278,8 @@ happen if extended CRL checking is enabl - an application specific error. This will never be returned unless explicitly - set by an application. - -+=back -+ - =head1 NOTES - - The above functions should be used instead of directly referencing the fields -diff -up openssl-1.0.1e/doc/ssl/SSL_accept.pod.backports openssl-1.0.1e/doc/ssl/SSL_accept.pod ---- openssl-1.0.1e/doc/ssl/SSL_accept.pod.backports 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_accept.pod 2013-08-16 15:31:35.727003914 +0200 -@@ -44,17 +44,17 @@ The following return values can occur: - - =over 4 - --=item 1 -- --The TLS/SSL handshake was successfully completed, a TLS/SSL connection has been --established. -- - =item 0 - - The TLS/SSL handshake was not successful but was shut down controlled and - by the specifications of the TLS/SSL protocol. Call SSL_get_error() with the - return value B to find out the reason. - -+=item 1 -+ -+The TLS/SSL handshake was successfully completed, a TLS/SSL connection has been -+established. -+ - =item E0 - - The TLS/SSL handshake was not successful because a fatal error occurred either -diff -up openssl-1.0.1e/doc/ssl/SSL_connect.pod.backports openssl-1.0.1e/doc/ssl/SSL_connect.pod ---- openssl-1.0.1e/doc/ssl/SSL_connect.pod.backports 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_connect.pod 2013-08-16 15:31:35.727003914 +0200 -@@ -41,17 +41,17 @@ The following return values can occur: - - =over 4 - --=item 1 -- --The TLS/SSL handshake was successfully completed, a TLS/SSL connection has been --established. -- - =item 0 - - The TLS/SSL handshake was not successful but was shut down controlled and - by the specifications of the TLS/SSL protocol. Call SSL_get_error() with the - return value B to find out the reason. - -+=item 1 -+ -+The TLS/SSL handshake was successfully completed, a TLS/SSL connection has been -+established. -+ - =item E0 - - The TLS/SSL handshake was not successful, because a fatal error occurred either -diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_set_client_CA_list.pod.backports openssl-1.0.1e/doc/ssl/SSL_CTX_set_client_CA_list.pod ---- openssl-1.0.1e/doc/ssl/SSL_CTX_set_client_CA_list.pod.backports 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_CTX_set_client_CA_list.pod 2013-08-16 15:31:35.727003914 +0200 -@@ -66,16 +66,16 @@ values: - - =over 4 - --=item 1 -- --The operation succeeded. -- - =item 0 - - A failure while manipulating the STACK_OF(X509_NAME) object occurred or - the X509_NAME could not be extracted from B. Check the error stack - to find out the reason. - -+=item 1 -+ -+The operation succeeded. -+ - =back - - =head1 EXAMPLES -diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_use_psk_identity_hint.pod.backports openssl-1.0.1e/doc/ssl/SSL_CTX_use_psk_identity_hint.pod ---- openssl-1.0.1e/doc/ssl/SSL_CTX_use_psk_identity_hint.pod.backports 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_CTX_use_psk_identity_hint.pod 2013-08-16 15:31:35.727003914 +0200 -@@ -81,6 +81,8 @@ SSL_CTX_use_psk_identity_hint() and SSL_ - - Return values from the server callback are interpreted as follows: - -+=over 4 -+ - =item > 0 - - PSK identity was found and the server callback has provided the PSK -@@ -99,4 +101,6 @@ completely. - PSK identity was not found. An "unknown_psk_identity" alert message - will be sent and the connection setup fails. - -+=back -+ - =cut -diff -up openssl-1.0.1e/doc/ssl/SSL_do_handshake.pod.backports openssl-1.0.1e/doc/ssl/SSL_do_handshake.pod ---- openssl-1.0.1e/doc/ssl/SSL_do_handshake.pod.backports 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_do_handshake.pod 2013-08-16 15:31:35.727003914 +0200 -@@ -45,17 +45,17 @@ The following return values can occur: - - =over 4 - --=item 1 -- --The TLS/SSL handshake was successfully completed, a TLS/SSL connection has been --established. -- - =item 0 - - The TLS/SSL handshake was not successful but was shut down controlled and - by the specifications of the TLS/SSL protocol. Call SSL_get_error() with the - return value B to find out the reason. - -+=item 1 -+ -+The TLS/SSL handshake was successfully completed, a TLS/SSL connection has been -+established. -+ - =item E0 - - The TLS/SSL handshake was not successful because a fatal error occurred either -diff -up openssl-1.0.1e/doc/ssl/SSL_shutdown.pod.backports openssl-1.0.1e/doc/ssl/SSL_shutdown.pod ---- openssl-1.0.1e/doc/ssl/SSL_shutdown.pod.backports 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_shutdown.pod 2013-08-16 15:31:35.728003935 +0200 -@@ -92,11 +92,6 @@ The following return values can occur: - - =over 4 - --=item 1 -- --The shutdown was successfully completed. The "close notify" alert was sent --and the peer's "close notify" alert was received. -- - =item 0 - - The shutdown is not yet finished. Call SSL_shutdown() for a second time, -@@ -104,6 +99,11 @@ if a bidirectional shutdown shall be per - The output of L may be misleading, as an - erroneous SSL_ERROR_SYSCALL may be flagged even though no error occurred. - -+=item 1 -+ -+The shutdown was successfully completed. The "close notify" alert was sent -+and the peer's "close notify" alert was received. -+ - =item -1 - - The shutdown was not successful because a fatal error occurred either -diff -up openssl-1.0.1e/ssl/d1_lib.c.backports openssl-1.0.1e/ssl/d1_lib.c ---- openssl-1.0.1e/ssl/d1_lib.c.backports 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/d1_lib.c 2013-08-16 15:33:33.306576363 +0200 -@@ -196,6 +196,7 @@ void dtls1_free(SSL *s) - pqueue_free(s->d1->buffered_app_data.q); - - OPENSSL_free(s->d1); -+ s->d1 = NULL; - } - - void dtls1_clear(SSL *s) -diff -up openssl-1.0.1e/ssl/d1_pkt.c.backports openssl-1.0.1e/ssl/d1_pkt.c ---- openssl-1.0.1e/ssl/d1_pkt.c.backports 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/d1_pkt.c 2013-08-16 15:31:35.728003935 +0200 -@@ -847,6 +847,12 @@ start: - } - } - -+ if (s->d1->listen && rr->type != SSL3_RT_HANDSHAKE) -+ { -+ rr->length = 0; -+ goto start; -+ } -+ - /* we now have a packet which can be read and processed */ - - if (s->s3->change_cipher_spec /* set when we receive ChangeCipherSpec, -@@ -1051,6 +1057,7 @@ start: - !(s->s3->flags & SSL3_FLAGS_NO_RENEGOTIATE_CIPHERS) && - !s->s3->renegotiate) - { -+ s->d1->handshake_read_seq++; - s->new_session = 1; - ssl3_renegotiate(s); - if (ssl3_renegotiate_check(s)) -diff -up openssl-1.0.1e/ssl/d1_srvr.c.backports openssl-1.0.1e/ssl/d1_srvr.c ---- openssl-1.0.1e/ssl/d1_srvr.c.backports 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/d1_srvr.c 2013-08-16 15:31:35.728003935 +0200 -@@ -276,10 +276,11 @@ int dtls1_accept(SSL *s) - case SSL3_ST_SW_HELLO_REQ_B: - - s->shutdown=0; -+ dtls1_clear_record_buffer(s); - dtls1_start_timer(s); - ret=dtls1_send_hello_request(s); - if (ret <= 0) goto end; -- s->s3->tmp.next_state=SSL3_ST_SW_HELLO_REQ_C; -+ s->s3->tmp.next_state=SSL3_ST_SR_CLNT_HELLO_A; - s->state=SSL3_ST_SW_FLUSH; - s->init_num=0; - -diff -up openssl-1.0.1e/ssl/s3_cbc.c.backports openssl-1.0.1e/ssl/s3_cbc.c ---- openssl-1.0.1e/ssl/s3_cbc.c.backports 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/s3_cbc.c 2013-08-16 15:31:35.729003956 +0200 -@@ -148,7 +148,7 @@ int tls1_cbc_remove_padding(const SSL* s - unsigned padding_length, good, to_check, i; - const unsigned overhead = 1 /* padding length byte */ + mac_size; - /* Check if version requires explicit IV */ -- if (s->version >= TLS1_1_VERSION || s->version == DTLS1_VERSION) -+ if (s->version >= TLS1_1_VERSION || s->version == DTLS1_BAD_VER) - { - /* These lengths are all public so we can test them in - * non-constant time. -diff -up openssl-1.0.1e/ssl/ssl_lib.c.backports openssl-1.0.1e/ssl/ssl_lib.c ---- openssl-1.0.1e/ssl/ssl_lib.c.backports 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/ssl_lib.c 2013-08-16 15:31:35.729003956 +0200 -@@ -1797,7 +1797,9 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *m - CRYPTO_new_ex_data(CRYPTO_EX_INDEX_SSL_CTX, ret, &ret->ex_data); - - ret->extra_certs=NULL; -- ret->comp_methods=SSL_COMP_get_compression_methods(); -+ /* No compression for DTLS */ -+ if (meth->version != DTLS1_VERSION) -+ ret->comp_methods=SSL_COMP_get_compression_methods(); - - ret->max_send_fragment = SSL3_RT_MAX_PLAIN_LENGTH; - -@@ -2792,9 +2794,7 @@ void ssl_clear_cipher_ctx(SSL *s) - /* Fix this function so that it takes an optional type parameter */ - X509 *SSL_get_certificate(const SSL *s) - { -- if (s->server) -- return(ssl_get_server_send_cert(s)); -- else if (s->cert != NULL) -+ if (s->cert != NULL) - return(s->cert->key->x509); - else - return(NULL); -diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl -index 3b6c469..e8a7518 100644 ---- a/crypto/x86cpuid.pl -+++ b/crypto/x86cpuid.pl -@@ -69,6 +69,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - &inc ("esi"); # number of cores - - &mov ("eax",1); -+ &xor ("ecx","ecx"); - &cpuid (); - &bt ("edx",28); - &jnc (&label("generic")); -@@ -102,6 +103,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - - &set_label("nocacheinfo"); - &mov ("eax",1); -+ &xor ("ecx","ecx"); - &cpuid (); - &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 - &cmp ("ebp",0); diff --git a/openssl-1.0.1e-bad-mac.patch b/openssl-1.0.1e-bad-mac.patch deleted file mode 100644 index 06d284e..0000000 --- a/openssl-1.0.1e-bad-mac.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 9ab3ce124616cb12bd39c6aa1e1bde0f46969b29 Mon Sep 17 00:00:00 2001 -From: Andy Polyakov -Date: Mon, 18 Mar 2013 19:29:41 +0100 -Subject: [PATCH] e_aes_cbc_hmac_sha1.c: fix rare bad record mac on AES-NI - plaforms. - -PR: 3002 -(cherry picked from commit 5c60046553716fcf160718f59160493194f212dc) ---- - crypto/evp/e_aes_cbc_hmac_sha1.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/crypto/evp/e_aes_cbc_hmac_sha1.c b/crypto/evp/e_aes_cbc_hmac_sha1.c -index 483e04b..fb2c884 100644 ---- a/crypto/evp/e_aes_cbc_hmac_sha1.c -+++ b/crypto/evp/e_aes_cbc_hmac_sha1.c -@@ -328,10 +328,11 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, - - if (res!=SHA_CBLOCK) continue; - -- mask = 0-((inp_len+8-j)>>(sizeof(j)*8-1)); -+ /* j is not incremented yet */ -+ mask = 0-((inp_len+7-j)>>(sizeof(j)*8-1)); - data->u[SHA_LBLOCK-1] |= bitlen&mask; - sha1_block_data_order(&key->md,data,1); -- mask &= 0-((j-inp_len-73)>>(sizeof(j)*8-1)); -+ mask &= 0-((j-inp_len-72)>>(sizeof(j)*8-1)); - pmac->u[0] |= key->md.h0 & mask; - pmac->u[1] |= key->md.h1 & mask; - pmac->u[2] |= key->md.h2 & mask; --- -1.7.9.5 - diff --git a/openssl-1.0.1e-cve-2013-4353.patch b/openssl-1.0.1e-cve-2013-4353.patch deleted file mode 100644 index 5f96116..0000000 --- a/openssl-1.0.1e-cve-2013-4353.patch +++ /dev/null @@ -1,21 +0,0 @@ -Fix for TLS record tampering bug. A carefully crafted invalid -handshake could crash OpenSSL with a NULL pointer exception. -Thanks to Anton Johansson for reporting this issues. -(CVE-2013-4353) -diff --git a/ssl/s3_both.c b/ssl/s3_both.c -index 1e5dcab..53b9390 100644 ---- a/ssl/s3_both.c -+++ b/ssl/s3_both.c -@@ -210,7 +210,11 @@ static void ssl3_take_mac(SSL *s) - { - const char *sender; - int slen; -- -+ /* If no new cipher setup return immediately: other functions will -+ * set the appropriate error. -+ */ -+ if (s->s3->tmp.new_cipher == NULL) -+ return; - if (s->state & SSL_ST_CONNECT) - { - sender=s->method->ssl3_enc->server_finished_label; diff --git a/openssl-1.0.1e-cve-2013-6449.patch b/openssl-1.0.1e-cve-2013-6449.patch deleted file mode 100644 index d80a178..0000000 --- a/openssl-1.0.1e-cve-2013-6449.patch +++ /dev/null @@ -1,111 +0,0 @@ -Use version in SSL_METHOD not SSL structure. - -When deciding whether to use TLS 1.2 PRF and record hash algorithms -use the version number in the corresponding SSL_METHOD structure -instead of the SSL structure. The SSL structure version is sometimes -inaccurate. Note: OpenSSL 1.0.2 and later effectively do this already. -(CVE-2013-6449) - -Also preventively check EVP errors for handshake digests. - -diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c -index bf832bb..c4ef273 100644 ---- a/ssl/s3_lib.c -+++ b/ssl/s3_lib.c -@@ -4286,7 +4286,7 @@ need to go to SSL_ST_ACCEPT. - long ssl_get_algorithm2(SSL *s) - { - long alg2 = s->s3->tmp.new_cipher->algorithm2; -- if (TLS1_get_version(s) >= TLS1_2_VERSION && -+ if (s->method->version == TLS1_2_VERSION && - alg2 == (SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF)) - return SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256; - return alg2; -diff --git a/ssl/s3_both.c b/ssl/s3_both.c -index ead01c8..1e5dcab 100644 ---- a/ssl/s3_both.c -+++ b/ssl/s3_both.c -@@ -161,6 +161,8 @@ int ssl3_send_finished(SSL *s, int a, int b, const char *sender, int slen) - - i=s->method->ssl3_enc->final_finish_mac(s, - sender,slen,s->s3->tmp.finish_md); -+ if (i == 0) -+ return 0; - s->s3->tmp.finish_md_len = i; - memcpy(p, s->s3->tmp.finish_md, i); - p+=i; -diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c -index 804291e..c4bc4e7 100644 ---- a/ssl/s3_pkt.c -+++ b/ssl/s3_pkt.c -@@ -335,7 +335,7 @@ fprintf(stderr, "Record type=%d, Length=%d\n", rr->type, rr->length); - if (version != s->version) - { - SSLerr(SSL_F_SSL3_GET_RECORD,SSL_R_WRONG_VERSION_NUMBER); -- if ((s->version & 0xFF00) == (version & 0xFF00)) -+ if ((s->version & 0xFF00) == (version & 0xFF00) && !s->enc_write_ctx && !s->write_hash) - /* Send back error using their minor version number :-) */ - s->version = (unsigned short)version; - al=SSL_AD_PROTOCOL_VERSION; -@@ -1459,8 +1459,14 @@ int ssl3_do_change_cipher_spec(SSL *s) - slen=s->method->ssl3_enc->client_finished_label_len; - } - -- s->s3->tmp.peer_finish_md_len = s->method->ssl3_enc->final_finish_mac(s, -+ i = s->method->ssl3_enc->final_finish_mac(s, - sender,slen,s->s3->tmp.peer_finish_md); -+ if (i == 0) -+ { -+ SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC, ERR_R_INTERNAL_ERROR); -+ return 0; -+ } -+ s->s3->tmp.peer_finish_md_len = i; - - return(1); - } -diff --git a/ssl/s3_srvr.c b/ssl/s3_srvr.c -index e5a8b3f..52efed3 100644 ---- a/ssl/s3_srvr.c -+++ b/ssl/s3_srvr.c -@@ -958,7 +958,8 @@ int ssl3_get_client_hello(SSL *s) - (s->version != DTLS1_VERSION && s->client_version < s->version)) - { - SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_WRONG_VERSION_NUMBER); -- if ((s->client_version>>8) == SSL3_VERSION_MAJOR) -+ if ((s->client_version>>8) == SSL3_VERSION_MAJOR && -+ !s->enc_write_ctx && !s->write_hash) - { - /* similar to ssl3_get_record, send alert using remote version number */ - s->version = s->client_version; -diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c -index 809ad2e..72015f5 100644 ---- a/ssl/t1_enc.c -+++ b/ssl/t1_enc.c -@@ -915,18 +915,19 @@ int tls1_final_finish_mac(SSL *s, - if (mask & ssl_get_algorithm2(s)) - { - int hashsize = EVP_MD_size(md); -- if (hashsize < 0 || hashsize > (int)(sizeof buf - (size_t)(q-buf))) -+ EVP_MD_CTX *hdgst = s->s3->handshake_dgst[idx]; -+ if (!hdgst || hashsize < 0 || hashsize > (int)(sizeof buf - (size_t)(q-buf))) - { - /* internal error: 'buf' is too small for this cipersuite! */ - err = 1; - } - else - { -- EVP_MD_CTX_copy_ex(&ctx,s->s3->handshake_dgst[idx]); -- EVP_DigestFinal_ex(&ctx,q,&i); -- if (i != (unsigned int)hashsize) /* can't really happen */ -+ if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) || -+ !EVP_DigestFinal_ex(&ctx,q,&i) || -+ (i != (unsigned int)hashsize)) - err = 1; -- q+=i; -+ q+=hashsize; - } - } - } --- -1.8.3.1 - diff --git a/openssl-1.0.1e-cve-2013-6450.patch b/openssl-1.0.1e-cve-2013-6450.patch deleted file mode 100644 index fa096c8..0000000 --- a/openssl-1.0.1e-cve-2013-6450.patch +++ /dev/null @@ -1,85 +0,0 @@ -Fix DTLS retransmission from previous session. - -For DTLS we might need to retransmit messages from the previous session -so keep a copy of write context in DTLS retransmission buffers instead -of replacing it after sending CCS. CVE-2013-6450. - -diff --git a/ssl/d1_both.c b/ssl/d1_both.c -index 65ec001..7a5596a 100644 ---- a/ssl/d1_both.c -+++ b/ssl/d1_both.c -@@ -214,6 +214,12 @@ dtls1_hm_fragment_new(unsigned long frag_len, int reassembly) - static void - dtls1_hm_fragment_free(hm_fragment *frag) - { -+ -+ if (frag->msg_header.is_ccs) -+ { -+ EVP_CIPHER_CTX_free(frag->msg_header.saved_retransmit_state.enc_write_ctx); -+ EVP_MD_CTX_destroy(frag->msg_header.saved_retransmit_state.write_hash); -+ } - if (frag->fragment) OPENSSL_free(frag->fragment); - if (frag->reassembly) OPENSSL_free(frag->reassembly); - OPENSSL_free(frag); -diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h -index 96ce9a7..e485907 100644 ---- a/ssl/ssl_locl.h -+++ b/ssl/ssl_locl.h -@@ -621,6 +621,8 @@ extern SSL3_ENC_METHOD TLSv1_enc_data; - extern SSL3_ENC_METHOD SSLv3_enc_data; - extern SSL3_ENC_METHOD DTLSv1_enc_data; - -+#define SSL_IS_DTLS(s) (s->method->version == DTLS1_VERSION) -+ - #define IMPLEMENT_tls_meth_func(version, func_name, s_accept, s_connect, \ - s_get_meth) \ - const SSL_METHOD *func_name(void) \ -diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c -index 72015f5..56db834 100644 ---- a/ssl/t1_enc.c -+++ b/ssl/t1_enc.c -@@ -414,15 +414,20 @@ int tls1_change_cipher_state(SSL *s, int which) - s->mac_flags |= SSL_MAC_FLAG_WRITE_MAC_STREAM; - else - s->mac_flags &= ~SSL_MAC_FLAG_WRITE_MAC_STREAM; -- if (s->enc_write_ctx != NULL) -+ if (s->enc_write_ctx != NULL && !SSL_IS_DTLS(s)) - reuse_dd = 1; -- else if ((s->enc_write_ctx=OPENSSL_malloc(sizeof(EVP_CIPHER_CTX))) == NULL) -+ else if ((s->enc_write_ctx=EVP_CIPHER_CTX_new()) == NULL) - goto err; -- else -- /* make sure it's intialized in case we exit later with an error */ -- EVP_CIPHER_CTX_init(s->enc_write_ctx); - dd= s->enc_write_ctx; -- mac_ctx = ssl_replace_hash(&s->write_hash,NULL); -+ if (SSL_IS_DTLS(s)) -+ { -+ mac_ctx = EVP_MD_CTX_create(); -+ if (!mac_ctx) -+ goto err; -+ s->write_hash = mac_ctx; -+ } -+ else -+ mac_ctx = ssl_replace_hash(&s->write_hash,NULL); - #ifndef OPENSSL_NO_COMP - if (s->compress != NULL) - { -diff --git a/crypto/evp/digest.c b/crypto/evp/digest.c -index 6fc469f..d14e8e4 100644 ---- a/crypto/evp/digest.c -+++ b/crypto/evp/digest.c -@@ -366,8 +366,11 @@ int EVP_Digest(const void *data, size_t count, - - void EVP_MD_CTX_destroy(EVP_MD_CTX *ctx) - { -- EVP_MD_CTX_cleanup(ctx); -- OPENSSL_free(ctx); -+ if (ctx) -+ { -+ EVP_MD_CTX_cleanup(ctx); -+ OPENSSL_free(ctx); -+ } - } - - /* This call frees resources associated with the context */ diff --git a/openssl-1.0.1e-fips-ec.patch b/openssl-1.0.1e-fips-ec.patch index 7287dae..e1f648c 100644 --- a/openssl-1.0.1e-fips-ec.patch +++ b/openssl-1.0.1e-fips-ec.patch @@ -241,7 +241,7 @@ diff -up openssl-1.0.1e/crypto/ec/ec_key.c.fips-ec openssl-1.0.1e/crypto/ec/ec_k + + EVP_PKEY_set1_EC_KEY(pk, key); + -+ if (fips_pkey_signature_test(pk, tbs, 0, NULL, 0, NULL, 0, NULL)) ++ if (fips_pkey_signature_test(pk, tbs, -1, NULL, 0, NULL, 0, NULL)) + ret = 1; + + err: diff --git a/openssl-1.0.1e-manfix.patch b/openssl-1.0.1e-manfix.patch deleted file mode 100644 index 4ba2abb..0000000 --- a/openssl-1.0.1e-manfix.patch +++ /dev/null @@ -1,555 +0,0 @@ -diff -up openssl-1.0.1e/doc/apps/cms.pod.manfix openssl-1.0.1e/doc/apps/cms.pod ---- openssl-1.0.1e/doc/apps/cms.pod.manfix 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/doc/apps/cms.pod 2013-09-12 11:17:42.147092310 +0200 -@@ -450,28 +450,28 @@ remains DER. - - =over 4 - --=item 0 -+=item C<0> - - the operation was completely successfully. - --=item 1 -+=item C<1> - - an error occurred parsing the command options. - --=item 2 -+=item C<2> - - one of the input files could not be read. - --=item 3 -+=item C<3> - - an error occurred creating the CMS file or when reading the MIME - message. - --=item 4 -+=item C<4> - - an error occurred decrypting or verifying the message. - --=item 5 -+=item C<5> - - the message was verified correctly but an error occurred writing out - the signers certificates. -diff -up openssl-1.0.1e/doc/apps/ec.pod.manfix openssl-1.0.1e/doc/apps/ec.pod ---- openssl-1.0.1e/doc/apps/ec.pod.manfix 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/doc/apps/ec.pod 2013-09-12 11:17:42.147092310 +0200 -@@ -93,10 +93,6 @@ prints out the public, private key compo - - this option prevents output of the encoded version of the key. - --=item B<-modulus> -- --this option prints out the value of the public key component of the key. -- - =item B<-pubin> - - by default a private key is read from the input file: with this option a -diff -up openssl-1.0.1e/doc/apps/openssl.pod.manfix openssl-1.0.1e/doc/apps/openssl.pod ---- openssl-1.0.1e/doc/apps/openssl.pod.manfix 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/doc/apps/openssl.pod 2013-09-12 11:17:42.148092331 +0200 -@@ -163,7 +163,7 @@ Create or examine a netscape certificate - - Online Certificate Status Protocol utility. - --=item L|passwd(1)> -+=item L|sslpasswd(1)> - - Generation of hashed passwords. - -@@ -187,7 +187,7 @@ Public key algorithm parameter managemen - - Public key algorithm cryptographic operation utility. - --=item L|rand(1)> -+=item L|sslrand(1)> - - Generate pseudo-random bytes. - -@@ -401,9 +401,9 @@ L, L, L, L, - L, L, L, - L, L, L, --L, -+L, - L, L, L, --L, L, L, -+L, L, L, - L, L, - L, L, - L, L, -diff -up openssl-1.0.1e/doc/apps/s_client.pod.manfix openssl-1.0.1e/doc/apps/s_client.pod ---- openssl-1.0.1e/doc/apps/s_client.pod.manfix 2013-09-12 11:17:41.517078502 +0200 -+++ openssl-1.0.1e/doc/apps/s_client.pod 2013-09-12 11:17:42.149092353 +0200 -@@ -32,9 +32,14 @@ B B - [B<-ssl2>] - [B<-ssl3>] - [B<-tls1>] -+[B<-tls1_1>] -+[B<-tls1_2>] -+[B<-dtls1>] - [B<-no_ssl2>] - [B<-no_ssl3>] - [B<-no_tls1>] -+[B<-no_tls1_1>] -+[B<-no_tls1_2>] - [B<-bugs>] - [B<-cipher cipherlist>] - [B<-starttls protocol>] -@@ -44,6 +49,7 @@ B B - [B<-sess_out filename>] - [B<-sess_in filename>] - [B<-rand file(s)>] -+[B<-nextprotoneg protocols>] - - =head1 DESCRIPTION - -@@ -182,7 +188,7 @@ Use the PSK key B when using a PSK - given as a hexadecimal number without leading 0x, for example -psk - 1a2b3c4d. - --=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1> -+=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-tls1_1>, B<-tls1_2>, B<-dtls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2> - - these options disable the use of certain SSL or TLS protocols. By default - the initial handshake uses a method which should be compatible with all -@@ -243,6 +249,17 @@ Multiple files can be specified separate - The separator is B<;> for MS-Windows, B<,> for OpenVMS, and B<:> for - all others. - -+=item B<-nextprotoneg protocols> -+ -+enable Next Protocol Negotiation TLS extension and provide a list of -+comma-separated protocol names that the client should advertise -+support for. The list should contain most wanted protocols first. -+Protocol names are printable ASCII strings, for example "http/1.1" or -+"spdy/3". -+Empty list of protocols is treated specially and will cause the client to -+advertise support for the TLS extension but disconnect just after -+reciving ServerHello with a list of server supported protocols. -+ - =back - - =head1 CONNECTED COMMANDS -diff -up openssl-1.0.1e/doc/apps/smime.pod.manfix openssl-1.0.1e/doc/apps/smime.pod ---- openssl-1.0.1e/doc/apps/smime.pod.manfix 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/doc/apps/smime.pod 2013-09-12 11:17:42.150092375 +0200 -@@ -308,28 +308,28 @@ remains DER. - - =over 4 - --=item 0 -+=item C<0> - - the operation was completely successfully. - --=item 1 -+=item C<1> - - an error occurred parsing the command options. - --=item 2 -+=item C<2> - - one of the input files could not be read. - --=item 3 -+=item C<3> - - an error occurred creating the PKCS#7 file or when reading the MIME - message. - --=item 4 -+=item C<4> - - an error occurred decrypting or verifying the message. - --=item 5 -+=item C<5> - - the message was verified correctly but an error occurred writing out - the signers certificates. -diff -up openssl-1.0.1e/doc/apps/s_server.pod.manfix openssl-1.0.1e/doc/apps/s_server.pod ---- openssl-1.0.1e/doc/apps/s_server.pod.manfix 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/doc/apps/s_server.pod 2013-09-12 11:17:42.150092375 +0200 -@@ -40,10 +40,16 @@ B B - [B<-ssl2>] - [B<-ssl3>] - [B<-tls1>] -+[B<-tls1_1>] -+[B<-tls1_2>] -+[B<-dtls1>] - [B<-no_ssl2>] - [B<-no_ssl3>] - [B<-no_tls1>] -+[B<-no_tls1_1>] -+[B<-no_tls1_2>] - [B<-no_dhe>] -+[B<-no_ecdhe>] - [B<-bugs>] - [B<-hack>] - [B<-www>] -@@ -54,6 +60,7 @@ B B - [B<-no_ticket>] - [B<-id_prefix arg>] - [B<-rand file(s)>] -+[B<-nextprotoneg protocols>] - - =head1 DESCRIPTION - -@@ -131,6 +138,10 @@ a static set of parameters hard coded in - if this option is set then no DH parameters will be loaded effectively - disabling the ephemeral DH cipher suites. - -+=item B<-no_ecdhe> -+ -+if this option is set then ephemeral ECDH cipher suites will be disabled. -+ - =item B<-no_tmp_rsa> - - certain export cipher suites sometimes use a temporary RSA key, this option -@@ -201,7 +212,7 @@ Use the PSK key B when using a PSK - given as a hexadecimal number without leading 0x, for example -psk - 1a2b3c4d. - --=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1> -+=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-tls1_1>, B<-tls1_2>, B<-dtls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2> - - these options disable the use of certain SSL or TLS protocols. By default - the initial handshake uses a method which should be compatible with all -@@ -276,6 +287,14 @@ Multiple files can be specified separate - The separator is B<;> for MS-Windows, B<,> for OpenVMS, and B<:> for - all others. - -+=item B<-nextprotoneg protocols> -+ -+enable Next Protocol Negotiation TLS extension and provide a -+comma-separated list of supported protocol names. -+The list should contain most wanted protocols first. -+Protocol names are printable ASCII strings, for example "http/1.1" or -+"spdy/3". -+ - =back - - =head1 CONNECTED COMMANDS -diff -up openssl-1.0.1e/doc/apps/verify.pod.manfix openssl-1.0.1e/doc/apps/verify.pod ---- openssl-1.0.1e/doc/apps/verify.pod.manfix 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/doc/apps/verify.pod 2013-09-12 11:25:13.994994992 +0200 -@@ -25,6 +25,7 @@ B B - [B<-untrusted file>] - [B<-help>] - [B<-issuer_checks>] -+[B<-attime timestamp>] - [B<-verbose>] - [B<->] - [certificates] -@@ -80,6 +81,12 @@ rejected. The presence of rejection mess - anything is wrong; during the normal verification process, several - rejections may take place. - -+=item B<-attime timestamp> -+ -+Perform validation checks using the time specified by B and not -+the current system time. B is the number of seconds since -+01.01.1970 (UNIX time). -+ - =item B<-policy arg> - - Enable policy processing and add B to the user-initial-policy-set (see -diff -up openssl-1.0.1e/doc/ssl/SSL_accept.pod.manfix openssl-1.0.1e/doc/ssl/SSL_accept.pod ---- openssl-1.0.1e/doc/ssl/SSL_accept.pod.manfix 2013-09-12 11:17:42.129091915 +0200 -+++ openssl-1.0.1e/doc/ssl/SSL_accept.pod 2013-09-12 11:17:42.156092507 +0200 -@@ -44,13 +44,13 @@ The following return values can occur: - - =over 4 - --=item 0 -+=item C<0> - - The TLS/SSL handshake was not successful but was shut down controlled and - by the specifications of the TLS/SSL protocol. Call SSL_get_error() with the - return value B to find out the reason. - --=item 1 -+=item C<1> - - The TLS/SSL handshake was successfully completed, a TLS/SSL connection has been - established. -diff -up openssl-1.0.1e/doc/ssl/SSL_clear.pod.manfix openssl-1.0.1e/doc/ssl/SSL_clear.pod ---- openssl-1.0.1e/doc/ssl/SSL_clear.pod.manfix 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_clear.pod 2013-09-12 11:17:42.158092551 +0200 -@@ -56,12 +56,12 @@ The following return values can occur: - - =over 4 - --=item 0 -+=item C<0> - - The SSL_clear() operation could not be performed. Check the error stack to - find out the reason. - --=item 1 -+=item C<1> - - The SSL_clear() operation was successful. - -diff -up openssl-1.0.1e/doc/ssl/SSL_COMP_add_compression_method.pod.manfix openssl-1.0.1e/doc/ssl/SSL_COMP_add_compression_method.pod ---- openssl-1.0.1e/doc/ssl/SSL_COMP_add_compression_method.pod.manfix 2013-09-12 11:17:42.049090162 +0200 -+++ openssl-1.0.1e/doc/ssl/SSL_COMP_add_compression_method.pod 2013-09-12 11:17:42.159092573 +0200 -@@ -60,11 +60,11 @@ SSL_COMP_add_compression_method() may re - - =over 4 - --=item 0 -+=item C<0> - - The operation succeeded. - --=item 1 -+=item C<1> - - The operation failed. Check the error queue to find out the reason. - -diff -up openssl-1.0.1e/doc/ssl/SSL_connect.pod.manfix openssl-1.0.1e/doc/ssl/SSL_connect.pod ---- openssl-1.0.1e/doc/ssl/SSL_connect.pod.manfix 2013-09-12 11:17:42.130091937 +0200 -+++ openssl-1.0.1e/doc/ssl/SSL_connect.pod 2013-09-12 11:17:42.161092616 +0200 -@@ -41,13 +41,13 @@ The following return values can occur: - - =over 4 - --=item 0 -+=item C<0> - - The TLS/SSL handshake was not successful but was shut down controlled and - by the specifications of the TLS/SSL protocol. Call SSL_get_error() with the - return value B to find out the reason. - --=item 1 -+=item C<1> - - The TLS/SSL handshake was successfully completed, a TLS/SSL connection has been - established. -diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_add_session.pod.manfix openssl-1.0.1e/doc/ssl/SSL_CTX_add_session.pod ---- openssl-1.0.1e/doc/ssl/SSL_CTX_add_session.pod.manfix 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_CTX_add_session.pod 2013-09-12 11:17:42.162092638 +0200 -@@ -52,13 +52,13 @@ The following values are returned by all - - =over 4 - --=item 0 -+=item C<0> - - The operation failed. In case of the add operation, it was tried to add - the same (identical) session twice. In case of the remove operation, the - session was not found in the cache. - --=item 1 -+=item C<1> - - The operation succeeded. - -diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_load_verify_locations.pod.manfix openssl-1.0.1e/doc/ssl/SSL_CTX_load_verify_locations.pod ---- openssl-1.0.1e/doc/ssl/SSL_CTX_load_verify_locations.pod.manfix 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_CTX_load_verify_locations.pod 2013-09-12 11:17:42.163092660 +0200 -@@ -100,13 +100,13 @@ The following return values can occur: - - =over 4 - --=item 0 -+=item C<0> - - The operation failed because B and B are NULL or the - processing at one of the locations specified failed. Check the error - stack to find out the reason. - --=item 1 -+=item C<1> - - The operation succeeded. - -diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_set_client_CA_list.pod.manfix openssl-1.0.1e/doc/ssl/SSL_CTX_set_client_CA_list.pod ---- openssl-1.0.1e/doc/ssl/SSL_CTX_set_client_CA_list.pod.manfix 2013-09-12 11:17:42.132091981 +0200 -+++ openssl-1.0.1e/doc/ssl/SSL_CTX_set_client_CA_list.pod 2013-09-12 11:17:42.164092682 +0200 -@@ -66,13 +66,13 @@ values: - - =over 4 - --=item 0 -+=item C<0> - - A failure while manipulating the STACK_OF(X509_NAME) object occurred or - the X509_NAME could not be extracted from B. Check the error stack - to find out the reason. - --=item 1 -+=item C<1> - - The operation succeeded. - -diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_set_session_id_context.pod.manfix openssl-1.0.1e/doc/ssl/SSL_CTX_set_session_id_context.pod ---- openssl-1.0.1e/doc/ssl/SSL_CTX_set_session_id_context.pod.manfix 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_CTX_set_session_id_context.pod 2013-09-12 11:17:42.166092726 +0200 -@@ -64,13 +64,13 @@ return the following values: - - =over 4 - --=item 0 -+=item C<0> - - The length B of the session id context B exceeded - the maximum allowed length of B. The error - is logged to the error stack. - --=item 1 -+=item C<1> - - The operation succeeded. - -diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_set_ssl_version.pod.manfix openssl-1.0.1e/doc/ssl/SSL_CTX_set_ssl_version.pod ---- openssl-1.0.1e/doc/ssl/SSL_CTX_set_ssl_version.pod.manfix 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_CTX_set_ssl_version.pod 2013-09-12 11:17:42.167092748 +0200 -@@ -42,11 +42,11 @@ and SSL_set_ssl_method(): - - =over 4 - --=item 0 -+=item C<0> - - The new choice failed, check the error stack to find out the reason. - --=item 1 -+=item C<1> - - The operation succeeded. - -diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_use_psk_identity_hint.pod.manfix openssl-1.0.1e/doc/ssl/SSL_CTX_use_psk_identity_hint.pod ---- openssl-1.0.1e/doc/ssl/SSL_CTX_use_psk_identity_hint.pod.manfix 2013-09-12 11:17:42.133092003 +0200 -+++ openssl-1.0.1e/doc/ssl/SSL_CTX_use_psk_identity_hint.pod 2013-09-12 11:17:42.168092770 +0200 -@@ -96,7 +96,7 @@ data to B and return the length of - connection will fail with decryption_error before it will be finished - completely. - --=item 0 -+=item C<0> - - PSK identity was not found. An "unknown_psk_identity" alert message - will be sent and the connection setup fails. -diff -up openssl-1.0.1e/doc/ssl/SSL_do_handshake.pod.manfix openssl-1.0.1e/doc/ssl/SSL_do_handshake.pod ---- openssl-1.0.1e/doc/ssl/SSL_do_handshake.pod.manfix 2013-09-12 11:17:42.135092047 +0200 -+++ openssl-1.0.1e/doc/ssl/SSL_do_handshake.pod 2013-09-12 11:17:42.170092814 +0200 -@@ -45,13 +45,13 @@ The following return values can occur: - - =over 4 - --=item 0 -+=item C<0> - - The TLS/SSL handshake was not successful but was shut down controlled and - by the specifications of the TLS/SSL protocol. Call SSL_get_error() with the - return value B to find out the reason. - --=item 1 -+=item C<1> - - The TLS/SSL handshake was successfully completed, a TLS/SSL connection has been - established. -diff -up openssl-1.0.1e/doc/ssl/SSL_read.pod.manfix openssl-1.0.1e/doc/ssl/SSL_read.pod ---- openssl-1.0.1e/doc/ssl/SSL_read.pod.manfix 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_read.pod 2013-09-12 11:17:42.171092836 +0200 -@@ -86,7 +86,7 @@ The following return values can occur: - The read operation was successful; the return value is the number of - bytes actually read from the TLS/SSL connection. - --=item 0 -+=item C<0> - - The read operation was not successful. The reason may either be a clean - shutdown due to a "close notify" alert sent by the peer (in which case -diff -up openssl-1.0.1e/doc/ssl/SSL_session_reused.pod.manfix openssl-1.0.1e/doc/ssl/SSL_session_reused.pod ---- openssl-1.0.1e/doc/ssl/SSL_session_reused.pod.manfix 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_session_reused.pod 2013-09-12 11:17:42.172092857 +0200 -@@ -27,11 +27,11 @@ The following return values can occur: - - =over 4 - --=item 0 -+=item C<0> - - A new session was negotiated. - --=item 1 -+=item C<1> - - A session was reused. - -diff -up openssl-1.0.1e/doc/ssl/SSL_set_fd.pod.manfix openssl-1.0.1e/doc/ssl/SSL_set_fd.pod ---- openssl-1.0.1e/doc/ssl/SSL_set_fd.pod.manfix 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_set_fd.pod 2013-09-12 11:17:42.174092901 +0200 -@@ -35,11 +35,11 @@ The following return values can occur: - - =over 4 - --=item 0 -+=item C<0> - - The operation failed. Check the error stack to find out why. - --=item 1 -+=item C<1> - - The operation succeeded. - -diff -up openssl-1.0.1e/doc/ssl/SSL_set_session.pod.manfix openssl-1.0.1e/doc/ssl/SSL_set_session.pod ---- openssl-1.0.1e/doc/ssl/SSL_set_session.pod.manfix 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_set_session.pod 2013-09-12 11:17:42.175092923 +0200 -@@ -37,11 +37,11 @@ The following return values can occur: - - =over 4 - --=item 0 -+=item C<0> - - The operation failed; check the error stack to find out the reason. - --=item 1 -+=item C<1> - - The operation succeeded. - -diff -up openssl-1.0.1e/doc/ssl/SSL_shutdown.pod.manfix openssl-1.0.1e/doc/ssl/SSL_shutdown.pod ---- openssl-1.0.1e/doc/ssl/SSL_shutdown.pod.manfix 2013-09-12 11:17:42.137092090 +0200 -+++ openssl-1.0.1e/doc/ssl/SSL_shutdown.pod 2013-09-12 11:17:42.177092967 +0200 -@@ -92,14 +92,14 @@ The following return values can occur: - - =over 4 - --=item 0 -+=item C<0> - - The shutdown is not yet finished. Call SSL_shutdown() for a second time, - if a bidirectional shutdown shall be performed. - The output of L may be misleading, as an - erroneous SSL_ERROR_SYSCALL may be flagged even though no error occurred. - --=item 1 -+=item C<1> - - The shutdown was successfully completed. The "close notify" alert was sent - and the peer's "close notify" alert was received. -diff -up openssl-1.0.1e/doc/ssl/SSL_write.pod.manfix openssl-1.0.1e/doc/ssl/SSL_write.pod ---- openssl-1.0.1e/doc/ssl/SSL_write.pod.manfix 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/ssl/SSL_write.pod 2013-09-12 11:17:42.177092967 +0200 -@@ -79,7 +79,7 @@ The following return values can occur: - The write operation was successful, the return value is the number of - bytes actually written to the TLS/SSL connection. - --=item 0 -+=item C<0> - - The write operation was not successful. Probably the underlying connection - was closed. Call SSL_get_error() with the return value B to find out, diff --git a/openssl-1.0.1e-ppc-asm-update.patch b/openssl-1.0.1e-ppc-asm-update.patch new file mode 100644 index 0000000..caa92ec --- /dev/null +++ b/openssl-1.0.1e-ppc-asm-update.patch @@ -0,0 +1,6664 @@ +diff --git a/Configure b/Configure +index 9c803dc..5a5c2d8 100755 +--- a/Configure ++++ b/Configure +@@ -139,8 +139,8 @@ my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes + my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void"; + my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; + my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; +-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; +-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; ++my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:"; ++my $ppc32_asm=$ppc64_asm; + my $no_asm=":::::::::::::::void"; + + # As for $BSDthreads. Idea is to maintain "collective" set of flags, +@@ -357,6 +357,7 @@ my %table=( + #### + "linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", ++"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::", + "linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +@@ -462,8 +463,8 @@ my %table=( + + #### IBM's AIX. + "aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::", +-"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32", +-"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:${ppc64_asm}:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64", ++"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:$ppc32_asm:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32", ++"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:$ppc64_asm:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64", + # Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE + # at build time. $OBJECT_MODE is respected at ./config stage! + "aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst::-qthreaded -D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-q32 -G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32", +@@ -1525,7 +1526,7 @@ else { + $wp_obj="wp_block.o"; + } + $cmll_obj=$cmll_enc unless ($cmll_obj =~ /.o$/); +-if ($modes_obj =~ /ghash/) ++if ($modes_obj =~ /ghash\-/) + { + $cflags.=" -DGHASH_ASM"; + } +diff --git a/config b/config +index 88b9bc6..8b80802 100755 +--- a/config ++++ b/config +@@ -587,13 +587,20 @@ case "$GUESSOS" in + fi + ;; + ppc64-*-linux2) +- echo "WARNING! If you wish to build 64-bit library, then you have to" +- echo " invoke './Configure linux-ppc64' *manually*." +- if [ "$TEST" = "false" -a -t 1 ]; then +- echo " You have about 5 seconds to press Ctrl-C to abort." +- (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1 ++ if [ -z "$KERNEL_BITS" ]; then ++ echo "WARNING! If you wish to build 64-bit library, then you have to" ++ echo " invoke './Configure linux-ppc64' *manually*." ++ if [ "$TEST" = "false" -a -t 1 ]; then ++ echo " You have about 5 seconds to press Ctrl-C to abort." ++ (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1 ++ fi ++ fi ++ if [ "$KERNEL_BITS" = "64" ]; then ++ OUT="linux-ppc64" ++ else ++ OUT="linux-ppc" ++ (echo "__LP64__" | gcc -E -x c - 2>/dev/null | grep "^__LP64__" 2>&1 > /dev/null) || options="$options -m32" + fi +- OUT="linux-ppc" + ;; + ppc-*-linux2) OUT="linux-ppc" ;; + ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;; +diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile +index 45ede0a..847f4ee 100644 +--- a/crypto/aes/Makefile ++++ b/crypto/aes/Makefile +@@ -71,6 +71,10 @@ aes-sparcv9.s: asm/aes-sparcv9.pl + + aes-ppc.s: asm/aes-ppc.pl + $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@ ++vpaes-ppc.s: asm/vpaes-ppc.pl ++ $(PERL) asm/vpaes-ppc.pl $(PERLASM_SCHEME) $@ ++aesp8-ppc.s: asm/aesp8-ppc.pl ++ $(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@ + + aes-parisc.s: asm/aes-parisc.pl + $(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@ +diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl +index 7c52cbe..7a99fc3 100644 +--- a/crypto/aes/asm/aes-ppc.pl ++++ b/crypto/aes/asm/aes-ppc.pl +@@ -45,6 +45,8 @@ if ($flavour =~ /64/) { + $PUSH ="stw"; + } else { die "nonsense $flavour"; } + ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +@@ -68,7 +70,7 @@ $key="r5"; + $Tbl0="r3"; + $Tbl1="r6"; + $Tbl2="r7"; +-$Tbl3="r2"; ++$Tbl3=$out; # stay away from "r2"; $out is offloaded to stack + + $s0="r8"; + $s1="r9"; +@@ -76,7 +78,7 @@ $s2="r10"; + $s3="r11"; + + $t0="r12"; +-$t1="r13"; ++$t1="r0"; # stay away from "r13"; + $t2="r14"; + $t3="r15"; + +@@ -100,9 +102,6 @@ $acc13="r29"; + $acc14="r30"; + $acc15="r31"; + +-# stay away from TLS pointer +-if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; } +-else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; } + $mask80=$Tbl2; + $mask1b=$Tbl3; + +@@ -337,8 +336,7 @@ $code.=<<___; + $STU $sp,-$FRAME($sp) + mflr r0 + +- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) +- $PUSH r13,`$FRAME-$SIZE_T*19`($sp) ++ $PUSH $out,`$FRAME-$SIZE_T*19`($sp) + $PUSH r14,`$FRAME-$SIZE_T*18`($sp) + $PUSH r15,`$FRAME-$SIZE_T*17`($sp) + $PUSH r16,`$FRAME-$SIZE_T*16`($sp) +@@ -365,16 +363,61 @@ $code.=<<___; + bne Lenc_unaligned + + Lenc_unaligned_ok: ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + lwz $s0,0($inp) + lwz $s1,4($inp) + lwz $s2,8($inp) + lwz $s3,12($inp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $t0,0($inp) ++ lwz $t1,4($inp) ++ lwz $t2,8($inp) ++ lwz $t3,12($inp) ++ rotlwi $s0,$t0,8 ++ rotlwi $s1,$t1,8 ++ rotlwi $s2,$t2,8 ++ rotlwi $s3,$t3,8 ++ rlwimi $s0,$t0,24,0,7 ++ rlwimi $s1,$t1,24,0,7 ++ rlwimi $s2,$t2,24,0,7 ++ rlwimi $s3,$t3,24,0,7 ++ rlwimi $s0,$t0,24,16,23 ++ rlwimi $s1,$t1,24,16,23 ++ rlwimi $s2,$t2,24,16,23 ++ rlwimi $s3,$t3,24,16,23 ++___ ++$code.=<<___; + bl LAES_Te + bl Lppc_AES_encrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ rotlwi $t0,$s0,8 ++ rotlwi $t1,$s1,8 ++ rotlwi $t2,$s2,8 ++ rotlwi $t3,$s3,8 ++ rlwimi $t0,$s0,24,0,7 ++ rlwimi $t1,$s1,24,0,7 ++ rlwimi $t2,$s2,24,0,7 ++ rlwimi $t3,$s3,24,0,7 ++ rlwimi $t0,$s0,24,16,23 ++ rlwimi $t1,$s1,24,16,23 ++ rlwimi $t2,$s2,24,16,23 ++ rlwimi $t3,$s3,24,16,23 ++ stw $t0,0($out) ++ stw $t1,4($out) ++ stw $t2,8($out) ++ stw $t3,12($out) ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + stw $s0,0($out) + stw $s1,4($out) + stw $s2,8($out) + stw $s3,12($out) ++___ ++$code.=<<___; + b Lenc_done + + Lenc_unaligned: +@@ -417,6 +460,7 @@ Lenc_xpage: + + bl LAES_Te + bl Lppc_AES_encrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) + + extrwi $acc00,$s0,8,0 + extrwi $acc01,$s0,8,8 +@@ -449,8 +493,6 @@ Lenc_xpage: + + Lenc_done: + $POP r0,`$FRAME+$LRSAVE`($sp) +- $POP $toc,`$FRAME-$SIZE_T*20`($sp) +- $POP r13,`$FRAME-$SIZE_T*19`($sp) + $POP r14,`$FRAME-$SIZE_T*18`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) +@@ -764,6 +806,7 @@ Lenc_compact_done: + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .AES_encrypt,.-.AES_encrypt + + .globl .AES_decrypt + .align 7 +@@ -771,8 +814,7 @@ Lenc_compact_done: + $STU $sp,-$FRAME($sp) + mflr r0 + +- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) +- $PUSH r13,`$FRAME-$SIZE_T*19`($sp) ++ $PUSH $out,`$FRAME-$SIZE_T*19`($sp) + $PUSH r14,`$FRAME-$SIZE_T*18`($sp) + $PUSH r15,`$FRAME-$SIZE_T*17`($sp) + $PUSH r16,`$FRAME-$SIZE_T*16`($sp) +@@ -799,16 +841,61 @@ Lenc_compact_done: + bne Ldec_unaligned + + Ldec_unaligned_ok: ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + lwz $s0,0($inp) + lwz $s1,4($inp) + lwz $s2,8($inp) + lwz $s3,12($inp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $t0,0($inp) ++ lwz $t1,4($inp) ++ lwz $t2,8($inp) ++ lwz $t3,12($inp) ++ rotlwi $s0,$t0,8 ++ rotlwi $s1,$t1,8 ++ rotlwi $s2,$t2,8 ++ rotlwi $s3,$t3,8 ++ rlwimi $s0,$t0,24,0,7 ++ rlwimi $s1,$t1,24,0,7 ++ rlwimi $s2,$t2,24,0,7 ++ rlwimi $s3,$t3,24,0,7 ++ rlwimi $s0,$t0,24,16,23 ++ rlwimi $s1,$t1,24,16,23 ++ rlwimi $s2,$t2,24,16,23 ++ rlwimi $s3,$t3,24,16,23 ++___ ++$code.=<<___; + bl LAES_Td + bl Lppc_AES_decrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ rotlwi $t0,$s0,8 ++ rotlwi $t1,$s1,8 ++ rotlwi $t2,$s2,8 ++ rotlwi $t3,$s3,8 ++ rlwimi $t0,$s0,24,0,7 ++ rlwimi $t1,$s1,24,0,7 ++ rlwimi $t2,$s2,24,0,7 ++ rlwimi $t3,$s3,24,0,7 ++ rlwimi $t0,$s0,24,16,23 ++ rlwimi $t1,$s1,24,16,23 ++ rlwimi $t2,$s2,24,16,23 ++ rlwimi $t3,$s3,24,16,23 ++ stw $t0,0($out) ++ stw $t1,4($out) ++ stw $t2,8($out) ++ stw $t3,12($out) ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + stw $s0,0($out) + stw $s1,4($out) + stw $s2,8($out) + stw $s3,12($out) ++___ ++$code.=<<___; + b Ldec_done + + Ldec_unaligned: +@@ -851,6 +938,7 @@ Ldec_xpage: + + bl LAES_Td + bl Lppc_AES_decrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) + + extrwi $acc00,$s0,8,0 + extrwi $acc01,$s0,8,8 +@@ -883,8 +971,6 @@ Ldec_xpage: + + Ldec_done: + $POP r0,`$FRAME+$LRSAVE`($sp) +- $POP $toc,`$FRAME-$SIZE_T*20`($sp) +- $POP r13,`$FRAME-$SIZE_T*19`($sp) + $POP r14,`$FRAME-$SIZE_T*18`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) +@@ -1355,6 +1441,7 @@ Ldec_compact_done: + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .AES_decrypt,.-.AES_decrypt + + .asciz "AES for PPC, CRYPTOGAMS by " + .align 7 +diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl +new file mode 100755 +index 0000000..3ee8979 +--- /dev/null ++++ b/crypto/aes/asm/aesp8-ppc.pl +@@ -0,0 +1,1940 @@ ++#!/usr/bin/env perl ++# ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# This module implements support for AES instructions as per PowerISA ++# specification version 2.07, first implemented by POWER8 processor. ++# The module is endian-agnostic in sense that it supports both big- ++# and little-endian cases. Data alignment in parallelizable modes is ++# handled with VSX loads and stores, which implies MSR.VSX flag being ++# set. It should also be noted that ISA specification doesn't prohibit ++# alignment exceptions for these instructions on page boundaries. ++# Initially alignment was handled in pure AltiVec/VMX way [when data ++# is aligned programmatically, which in turn guarantees exception- ++# free execution], but it turned to hamper performance when vcipher ++# instructions are interleaved. It's reckoned that eventual ++# misalignment penalties at page boundaries are in average lower ++# than additional overhead in pure AltiVec approach. ++ ++$flavour = shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T =8; ++ $LRSAVE =2*$SIZE_T; ++ $STU ="stdu"; ++ $POP ="ld"; ++ $PUSH ="std"; ++ $UCMP ="cmpld"; ++ $SHL ="sldi"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T =4; ++ $LRSAVE =$SIZE_T; ++ $STU ="stwu"; ++ $POP ="lwz"; ++ $PUSH ="stw"; ++ $UCMP ="cmplw"; ++ $SHL ="slwi"; ++} else { die "nonsense $flavour"; } ++ ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; ++ ++$FRAME=8*$SIZE_T; ++$prefix="aes_p8"; ++ ++$sp="r1"; ++$vrsave="r12"; ++ ++######################################################################### ++{{{ # Key setup procedures # ++my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); ++my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); ++my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); ++ ++$code.=<<___; ++.machine "any" ++ ++.text ++ ++.align 7 ++rcon: ++.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev ++.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev ++.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev ++.long 0,0,0,0 ?asis ++Lconsts: ++ mflr r0 ++ bcl 20,31,\$+4 ++ mflr $ptr #vvvvv "distance between . and rcon ++ addi $ptr,$ptr,-0x48 ++ mtlr r0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.asciz "AES for PowerISA 2.07, CRYPTOGAMS by " ++ ++.globl .${prefix}_set_encrypt_key ++.align 5 ++.${prefix}_set_encrypt_key: ++Lset_encrypt_key: ++ mflr r11 ++ $PUSH r11,$LRSAVE($sp) ++ ++ li $ptr,-1 ++ ${UCMP}i $inp,0 ++ beq- Lenc_key_abort # if ($inp==0) return -1; ++ ${UCMP}i $out,0 ++ beq- Lenc_key_abort # if ($out==0) return -1; ++ li $ptr,-2 ++ cmpwi $bits,128 ++ blt- Lenc_key_abort ++ cmpwi $bits,256 ++ bgt- Lenc_key_abort ++ andi. r0,$bits,0x3f ++ bne- Lenc_key_abort ++ ++ lis r0,0xfff0 ++ mfspr $vrsave,256 ++ mtspr 256,r0 ++ ++ bl Lconsts ++ mtlr r11 ++ ++ neg r9,$inp ++ lvx $in0,0,$inp ++ addi $inp,$inp,15 # 15 is not typo ++ lvsr $key,0,r9 # borrow $key ++ li r8,0x20 ++ cmpwi $bits,192 ++ lvx $in1,0,$inp ++ le?vspltisb $mask,0x0f # borrow $mask ++ lvx $rcon,0,$ptr ++ le?vxor $key,$key,$mask # adjust for byte swap ++ lvx $mask,r8,$ptr ++ addi $ptr,$ptr,0x10 ++ vperm $in0,$in0,$in1,$key # align [and byte swap in LE] ++ li $cnt,8 ++ vxor $zero,$zero,$zero ++ mtctr $cnt ++ ++ ?lvsr $outperm,0,$out ++ vspltisb $outmask,-1 ++ lvx $outhead,0,$out ++ ?vperm $outmask,$zero,$outmask,$outperm ++ ++ blt Loop128 ++ addi $inp,$inp,8 ++ beq L192 ++ addi $inp,$inp,8 ++ b L256 ++ ++.align 4 ++Loop128: ++ vperm $key,$in0,$in0,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in0,$in0,$key ++ bdnz Loop128 ++ ++ lvx $rcon,0,$ptr # last two round keys ++ ++ vperm $key,$in0,$in0,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in0,$in0,$key ++ ++ vperm $key,$in0,$in0,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vxor $in0,$in0,$key ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ ++ addi $inp,$out,15 # 15 is not typo ++ addi $out,$out,0x50 ++ ++ li $rounds,10 ++ b Ldone ++ ++.align 4 ++L192: ++ lvx $tmp,0,$inp ++ li $cnt,4 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] ++ vspltisb $key,8 # borrow $key ++ mtctr $cnt ++ vsububm $mask,$mask,$key # adjust the mask ++ ++Loop192: ++ vperm $key,$in1,$in1,$mask # roate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vcipherlast $key,$key,$rcon ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ ++ vsldoi $stage,$zero,$in1,8 ++ vspltw $tmp,$in0,3 ++ vxor $tmp,$tmp,$in1 ++ vsldoi $in1,$zero,$in1,12 # >>32 ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in1,$in1,$tmp ++ vxor $in0,$in0,$key ++ vxor $in1,$in1,$key ++ vsldoi $stage,$stage,$in0,8 ++ ++ vperm $key,$in1,$in1,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$stage,$stage,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vsldoi $stage,$in0,$in1,8 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vperm $outtail,$stage,$stage,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vspltw $tmp,$in0,3 ++ vxor $tmp,$tmp,$in1 ++ vsldoi $in1,$zero,$in1,12 # >>32 ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in1,$in1,$tmp ++ vxor $in0,$in0,$key ++ vxor $in1,$in1,$key ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $inp,$out,15 # 15 is not typo ++ addi $out,$out,16 ++ bdnz Loop192 ++ ++ li $rounds,12 ++ addi $out,$out,0x20 ++ b Ldone ++ ++.align 4 ++L256: ++ lvx $tmp,0,$inp ++ li $cnt,7 ++ li $rounds,14 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] ++ mtctr $cnt ++ ++Loop256: ++ vperm $key,$in1,$in1,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in1,$in1,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in0,$in0,$key ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $inp,$out,15 # 15 is not typo ++ addi $out,$out,16 ++ bdz Ldone ++ ++ vspltw $key,$in0,3 # just splat ++ vsldoi $tmp,$zero,$in1,12 # >>32 ++ vsbox $key,$key ++ ++ vxor $in1,$in1,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in1,$in1,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in1,$in1,$tmp ++ ++ vxor $in1,$in1,$key ++ b Loop256 ++ ++.align 4 ++Ldone: ++ lvx $in1,0,$inp # redundant in aligned case ++ vsel $in1,$outhead,$in1,$outmask ++ stvx $in1,0,$inp ++ li $ptr,0 ++ mtspr 256,$vrsave ++ stw $rounds,0($out) ++ ++Lenc_key_abort: ++ mr r3,$ptr ++ blr ++ .long 0 ++ .byte 0,12,0x14,1,0,0,3,0 ++ .long 0 ++.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key ++ ++.globl .${prefix}_set_decrypt_key ++.align 5 ++.${prefix}_set_decrypt_key: ++ $STU $sp,-$FRAME($sp) ++ mflr r10 ++ $PUSH r10,$FRAME+$LRSAVE($sp) ++ bl Lset_encrypt_key ++ mtlr r10 ++ ++ cmpwi r3,0 ++ bne- Ldec_key_abort ++ ++ slwi $cnt,$rounds,4 ++ subi $inp,$out,240 # first round key ++ srwi $rounds,$rounds,1 ++ add $out,$inp,$cnt # last round key ++ mtctr $rounds ++ ++Ldeckey: ++ lwz r0, 0($inp) ++ lwz r6, 4($inp) ++ lwz r7, 8($inp) ++ lwz r8, 12($inp) ++ addi $inp,$inp,16 ++ lwz r9, 0($out) ++ lwz r10,4($out) ++ lwz r11,8($out) ++ lwz r12,12($out) ++ stw r0, 0($out) ++ stw r6, 4($out) ++ stw r7, 8($out) ++ stw r8, 12($out) ++ subi $out,$out,16 ++ stw r9, -16($inp) ++ stw r10,-12($inp) ++ stw r11,-8($inp) ++ stw r12,-4($inp) ++ bdnz Ldeckey ++ ++ xor r3,r3,r3 # return value ++Ldec_key_abort: ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,4,1,0x80,0,3,0 ++ .long 0 ++.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key ++___ ++}}} ++######################################################################### ++{{{ # Single block en- and decrypt procedures # ++sub gen_block () { ++my $dir = shift; ++my $n = $dir eq "de" ? "n" : ""; ++my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); ++ ++$code.=<<___; ++.globl .${prefix}_${dir}crypt ++.align 5 ++.${prefix}_${dir}crypt: ++ lwz $rounds,240($key) ++ lis r0,0xfc00 ++ mfspr $vrsave,256 ++ li $idx,15 # 15 is not typo ++ mtspr 256,r0 ++ ++ lvx v0,0,$inp ++ neg r11,$out ++ lvx v1,$idx,$inp ++ lvsl v2,0,$inp # inpperm ++ le?vspltisb v4,0x0f ++ ?lvsl v3,0,r11 # outperm ++ le?vxor v2,v2,v4 ++ li $idx,16 ++ vperm v0,v0,v1,v2 # align [and byte swap in LE] ++ lvx v1,0,$key ++ ?lvsl v5,0,$key # keyperm ++ srwi $rounds,$rounds,1 ++ lvx v2,$idx,$key ++ addi $idx,$idx,16 ++ subi $rounds,$rounds,1 ++ ?vperm v1,v1,v2,v5 # align round key ++ ++ vxor v0,v0,v1 ++ lvx v1,$idx,$key ++ addi $idx,$idx,16 ++ mtctr $rounds ++ ++Loop_${dir}c: ++ ?vperm v2,v2,v1,v5 ++ v${n}cipher v0,v0,v2 ++ lvx v2,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm v1,v1,v2,v5 ++ v${n}cipher v0,v0,v1 ++ lvx v1,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_${dir}c ++ ++ ?vperm v2,v2,v1,v5 ++ v${n}cipher v0,v0,v2 ++ lvx v2,$idx,$key ++ ?vperm v1,v1,v2,v5 ++ v${n}cipherlast v0,v0,v1 ++ ++ vspltisb v2,-1 ++ vxor v1,v1,v1 ++ li $idx,15 # 15 is not typo ++ ?vperm v2,v1,v2,v3 # outmask ++ le?vxor v3,v3,v4 ++ lvx v1,0,$out # outhead ++ vperm v0,v0,v0,v3 # rotate [and byte swap in LE] ++ vsel v1,v1,v0,v2 ++ lvx v4,$idx,$out ++ stvx v1,0,$out ++ vsel v0,v0,v4,v2 ++ stvx v0,$idx,$out ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,3,0 ++ .long 0 ++.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt ++___ ++} ++&gen_block("en"); ++&gen_block("de"); ++}}} ++######################################################################### ++{{{ # CBC en- and decrypt procedures # ++my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); ++my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); ++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= ++ map("v$_",(4..10)); ++$code.=<<___; ++.globl .${prefix}_cbc_encrypt ++.align 5 ++.${prefix}_cbc_encrypt: ++ ${UCMP}i $len,16 ++ bltlr- ++ ++ cmpwi $enc,0 # test direction ++ lis r0,0xffe0 ++ mfspr $vrsave,256 ++ mtspr 256,r0 ++ ++ li $idx,15 ++ vxor $rndkey0,$rndkey0,$rndkey0 ++ le?vspltisb $tmp,0x0f ++ ++ lvx $ivec,0,$ivp # load [unaligned] iv ++ lvsl $inpperm,0,$ivp ++ lvx $inptail,$idx,$ivp ++ le?vxor $inpperm,$inpperm,$tmp ++ vperm $ivec,$ivec,$inptail,$inpperm ++ ++ neg r11,$inp ++ ?lvsl $keyperm,0,$key # prepare for unaligned key ++ lwz $rounds,240($key) ++ ++ lvsr $inpperm,0,r11 # prepare for unaligned load ++ lvx $inptail,0,$inp ++ addi $inp,$inp,15 # 15 is not typo ++ le?vxor $inpperm,$inpperm,$tmp ++ ++ ?lvsr $outperm,0,$out # prepare for unaligned store ++ vspltisb $outmask,-1 ++ lvx $outhead,0,$out ++ ?vperm $outmask,$rndkey0,$outmask,$outperm ++ le?vxor $outperm,$outperm,$tmp ++ ++ srwi $rounds,$rounds,1 ++ li $idx,16 ++ subi $rounds,$rounds,1 ++ beq Lcbc_dec ++ ++Lcbc_enc: ++ vmr $inout,$inptail ++ lvx $inptail,0,$inp ++ addi $inp,$inp,16 ++ mtctr $rounds ++ subi $len,$len,16 # len-=16 ++ ++ lvx $rndkey0,0,$key ++ vperm $inout,$inout,$inptail,$inpperm ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vxor $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ vxor $inout,$inout,$ivec ++ ++Loop_cbc_enc: ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vcipher $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_cbc_enc ++ ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ li $idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vcipherlast $ivec,$inout,$rndkey0 ++ ${UCMP}i $len,16 ++ ++ vperm $tmp,$ivec,$ivec,$outperm ++ vsel $inout,$outhead,$tmp,$outmask ++ vmr $outhead,$tmp ++ stvx $inout,0,$out ++ addi $out,$out,16 ++ bge Lcbc_enc ++ ++ b Lcbc_done ++ ++.align 4 ++Lcbc_dec: ++ ${UCMP}i $len,128 ++ bge _aesp8_cbc_decrypt8x ++ vmr $tmp,$inptail ++ lvx $inptail,0,$inp ++ addi $inp,$inp,16 ++ mtctr $rounds ++ subi $len,$len,16 # len-=16 ++ ++ lvx $rndkey0,0,$key ++ vperm $tmp,$tmp,$inptail,$inpperm ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vxor $inout,$tmp,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ ++Loop_cbc_dec: ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vncipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vncipher $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_cbc_dec ++ ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vncipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ li $idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vncipherlast $inout,$inout,$rndkey0 ++ ${UCMP}i $len,16 ++ ++ vxor $inout,$inout,$ivec ++ vmr $ivec,$tmp ++ vperm $tmp,$inout,$inout,$outperm ++ vsel $inout,$outhead,$tmp,$outmask ++ vmr $outhead,$tmp ++ stvx $inout,0,$out ++ addi $out,$out,16 ++ bge Lcbc_dec ++ ++Lcbc_done: ++ addi $out,$out,-1 ++ lvx $inout,0,$out # redundant in aligned case ++ vsel $inout,$outhead,$inout,$outmask ++ stvx $inout,0,$out ++ ++ neg $enc,$ivp # write [unaligned] iv ++ li $idx,15 # 15 is not typo ++ vxor $rndkey0,$rndkey0,$rndkey0 ++ vspltisb $outmask,-1 ++ le?vspltisb $tmp,0x0f ++ ?lvsl $outperm,0,$enc ++ ?vperm $outmask,$rndkey0,$outmask,$outperm ++ le?vxor $outperm,$outperm,$tmp ++ lvx $outhead,0,$ivp ++ vperm $ivec,$ivec,$ivec,$outperm ++ vsel $inout,$outhead,$ivec,$outmask ++ lvx $inptail,$idx,$ivp ++ stvx $inout,0,$ivp ++ vsel $inout,$ivec,$inptail,$outmask ++ stvx $inout,$idx,$ivp ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,6,0 ++ .long 0 ++___ ++######################################################################### ++{{ # Optimized CBC decrypt procedure # ++my $key_="r11"; ++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); ++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); ++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); ++my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys ++ # v26-v31 last 6 round keys ++my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment ++ ++$code.=<<___; ++.align 5 ++_aesp8_cbc_decrypt8x: ++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) ++ li r10,`$FRAME+8*16+15` ++ li r11,`$FRAME+8*16+31` ++ stvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ li r0,-1 ++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave ++ li $x10,0x10 ++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ li $x20,0x20 ++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ li $x30,0x30 ++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ li $x40,0x40 ++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ li $x50,0x50 ++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ li $x60,0x60 ++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ li $x70,0x70 ++ mtspr 256,r0 ++ ++ subi $rounds,$rounds,3 # -4 in total ++ subi $len,$len,128 # bias ++ ++ lvx $rndkey0,$x00,$key # load key schedule ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ lvx v31,$x00,$key ++ ?vperm $rndkey0,$rndkey0,v30,$keyperm ++ addi $key_,$sp,$FRAME+15 ++ mtctr $rounds ++ ++Load_cbc_dec_key: ++ ?vperm v24,v30,v31,$keyperm ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ stvx v24,$x00,$key_ # off-load round[1] ++ ?vperm v25,v31,v30,$keyperm ++ lvx v31,$x00,$key ++ stvx v25,$x10,$key_ # off-load round[2] ++ addi $key_,$key_,0x20 ++ bdnz Load_cbc_dec_key ++ ++ lvx v26,$x10,$key ++ ?vperm v24,v30,v31,$keyperm ++ lvx v27,$x20,$key ++ stvx v24,$x00,$key_ # off-load round[3] ++ ?vperm v25,v31,v26,$keyperm ++ lvx v28,$x30,$key ++ stvx v25,$x10,$key_ # off-load round[4] ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ ?vperm v26,v26,v27,$keyperm ++ lvx v29,$x40,$key ++ ?vperm v27,v27,v28,$keyperm ++ lvx v30,$x50,$key ++ ?vperm v28,v28,v29,$keyperm ++ lvx v31,$x60,$key ++ ?vperm v29,v29,v30,$keyperm ++ lvx $out0,$x70,$key # borrow $out0 ++ ?vperm v30,v30,v31,$keyperm ++ lvx v24,$x00,$key_ # pre-load round[1] ++ ?vperm v31,v31,$out0,$keyperm ++ lvx v25,$x10,$key_ # pre-load round[2] ++ ++ #lvx $inptail,0,$inp # "caller" already did this ++ #addi $inp,$inp,15 # 15 is not typo ++ subi $inp,$inp,15 # undo "caller" ++ ++ le?li $idx,8 ++ lvx_u $in0,$x00,$inp # load first 8 "words" ++ le?lvsl $inpperm,0,$idx ++ le?vspltisb $tmp,0x0f ++ lvx_u $in1,$x10,$inp ++ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u ++ lvx_u $in2,$x20,$inp ++ le?vperm $in0,$in0,$in0,$inpperm ++ lvx_u $in3,$x30,$inp ++ le?vperm $in1,$in1,$in1,$inpperm ++ lvx_u $in4,$x40,$inp ++ le?vperm $in2,$in2,$in2,$inpperm ++ vxor $out0,$in0,$rndkey0 ++ lvx_u $in5,$x50,$inp ++ le?vperm $in3,$in3,$in3,$inpperm ++ vxor $out1,$in1,$rndkey0 ++ lvx_u $in6,$x60,$inp ++ le?vperm $in4,$in4,$in4,$inpperm ++ vxor $out2,$in2,$rndkey0 ++ lvx_u $in7,$x70,$inp ++ addi $inp,$inp,0x80 ++ le?vperm $in5,$in5,$in5,$inpperm ++ vxor $out3,$in3,$rndkey0 ++ le?vperm $in6,$in6,$in6,$inpperm ++ vxor $out4,$in4,$rndkey0 ++ le?vperm $in7,$in7,$in7,$inpperm ++ vxor $out5,$in5,$rndkey0 ++ vxor $out6,$in6,$rndkey0 ++ vxor $out7,$in7,$rndkey0 ++ ++ mtctr $rounds ++ b Loop_cbc_dec8x ++.align 5 ++Loop_cbc_dec8x: ++ vncipher $out0,$out0,v24 ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ lvx v24,$x20,$key_ # round[3] ++ addi $key_,$key_,0x20 ++ ++ vncipher $out0,$out0,v25 ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ lvx v25,$x10,$key_ # round[4] ++ bdnz Loop_cbc_dec8x ++ ++ subic $len,$len,128 # $len-=128 ++ vncipher $out0,$out0,v24 ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ ++ subfe. r0,r0,r0 # borrow?-1:0 ++ vncipher $out0,$out0,v25 ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ ++ and r0,r0,$len ++ vncipher $out0,$out0,v26 ++ vncipher $out1,$out1,v26 ++ vncipher $out2,$out2,v26 ++ vncipher $out3,$out3,v26 ++ vncipher $out4,$out4,v26 ++ vncipher $out5,$out5,v26 ++ vncipher $out6,$out6,v26 ++ vncipher $out7,$out7,v26 ++ ++ add $inp,$inp,r0 # $inp is adjusted in such ++ # way that at exit from the ++ # loop inX-in7 are loaded ++ # with last "words" ++ vncipher $out0,$out0,v27 ++ vncipher $out1,$out1,v27 ++ vncipher $out2,$out2,v27 ++ vncipher $out3,$out3,v27 ++ vncipher $out4,$out4,v27 ++ vncipher $out5,$out5,v27 ++ vncipher $out6,$out6,v27 ++ vncipher $out7,$out7,v27 ++ ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ vncipher $out0,$out0,v28 ++ vncipher $out1,$out1,v28 ++ vncipher $out2,$out2,v28 ++ vncipher $out3,$out3,v28 ++ vncipher $out4,$out4,v28 ++ vncipher $out5,$out5,v28 ++ vncipher $out6,$out6,v28 ++ vncipher $out7,$out7,v28 ++ lvx v24,$x00,$key_ # re-pre-load round[1] ++ ++ vncipher $out0,$out0,v29 ++ vncipher $out1,$out1,v29 ++ vncipher $out2,$out2,v29 ++ vncipher $out3,$out3,v29 ++ vncipher $out4,$out4,v29 ++ vncipher $out5,$out5,v29 ++ vncipher $out6,$out6,v29 ++ vncipher $out7,$out7,v29 ++ lvx v25,$x10,$key_ # re-pre-load round[2] ++ ++ vncipher $out0,$out0,v30 ++ vxor $ivec,$ivec,v31 # xor with last round key ++ vncipher $out1,$out1,v30 ++ vxor $in0,$in0,v31 ++ vncipher $out2,$out2,v30 ++ vxor $in1,$in1,v31 ++ vncipher $out3,$out3,v30 ++ vxor $in2,$in2,v31 ++ vncipher $out4,$out4,v30 ++ vxor $in3,$in3,v31 ++ vncipher $out5,$out5,v30 ++ vxor $in4,$in4,v31 ++ vncipher $out6,$out6,v30 ++ vxor $in5,$in5,v31 ++ vncipher $out7,$out7,v30 ++ vxor $in6,$in6,v31 ++ ++ vncipherlast $out0,$out0,$ivec ++ vncipherlast $out1,$out1,$in0 ++ lvx_u $in0,$x00,$inp # load next input block ++ vncipherlast $out2,$out2,$in1 ++ lvx_u $in1,$x10,$inp ++ vncipherlast $out3,$out3,$in2 ++ le?vperm $in0,$in0,$in0,$inpperm ++ lvx_u $in2,$x20,$inp ++ vncipherlast $out4,$out4,$in3 ++ le?vperm $in1,$in1,$in1,$inpperm ++ lvx_u $in3,$x30,$inp ++ vncipherlast $out5,$out5,$in4 ++ le?vperm $in2,$in2,$in2,$inpperm ++ lvx_u $in4,$x40,$inp ++ vncipherlast $out6,$out6,$in5 ++ le?vperm $in3,$in3,$in3,$inpperm ++ lvx_u $in5,$x50,$inp ++ vncipherlast $out7,$out7,$in6 ++ le?vperm $in4,$in4,$in4,$inpperm ++ lvx_u $in6,$x60,$inp ++ vmr $ivec,$in7 ++ le?vperm $in5,$in5,$in5,$inpperm ++ lvx_u $in7,$x70,$inp ++ addi $inp,$inp,0x80 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $in6,$in6,$in6,$inpperm ++ vxor $out0,$in0,$rndkey0 ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $in7,$in7,$in7,$inpperm ++ vxor $out1,$in1,$rndkey0 ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ vxor $out2,$in2,$rndkey0 ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ vxor $out3,$in3,$rndkey0 ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ vxor $out4,$in4,$rndkey0 ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x50,$out ++ vxor $out5,$in5,$rndkey0 ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x60,$out ++ vxor $out6,$in6,$rndkey0 ++ stvx_u $out7,$x70,$out ++ addi $out,$out,0x80 ++ vxor $out7,$in7,$rndkey0 ++ ++ mtctr $rounds ++ beq Loop_cbc_dec8x # did $len-=128 borrow? ++ ++ addic. $len,$len,128 ++ beq Lcbc_dec8x_done ++ nop ++ nop ++ ++Loop_cbc_dec8x_tail: # up to 7 "words" tail... ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ lvx v24,$x20,$key_ # round[3] ++ addi $key_,$key_,0x20 ++ ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ lvx v25,$x10,$key_ # round[4] ++ bdnz Loop_cbc_dec8x_tail ++ ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ ++ vncipher $out1,$out1,v26 ++ vncipher $out2,$out2,v26 ++ vncipher $out3,$out3,v26 ++ vncipher $out4,$out4,v26 ++ vncipher $out5,$out5,v26 ++ vncipher $out6,$out6,v26 ++ vncipher $out7,$out7,v26 ++ ++ vncipher $out1,$out1,v27 ++ vncipher $out2,$out2,v27 ++ vncipher $out3,$out3,v27 ++ vncipher $out4,$out4,v27 ++ vncipher $out5,$out5,v27 ++ vncipher $out6,$out6,v27 ++ vncipher $out7,$out7,v27 ++ ++ vncipher $out1,$out1,v28 ++ vncipher $out2,$out2,v28 ++ vncipher $out3,$out3,v28 ++ vncipher $out4,$out4,v28 ++ vncipher $out5,$out5,v28 ++ vncipher $out6,$out6,v28 ++ vncipher $out7,$out7,v28 ++ ++ vncipher $out1,$out1,v29 ++ vncipher $out2,$out2,v29 ++ vncipher $out3,$out3,v29 ++ vncipher $out4,$out4,v29 ++ vncipher $out5,$out5,v29 ++ vncipher $out6,$out6,v29 ++ vncipher $out7,$out7,v29 ++ ++ vncipher $out1,$out1,v30 ++ vxor $ivec,$ivec,v31 # last round key ++ vncipher $out2,$out2,v30 ++ vxor $in1,$in1,v31 ++ vncipher $out3,$out3,v30 ++ vxor $in2,$in2,v31 ++ vncipher $out4,$out4,v30 ++ vxor $in3,$in3,v31 ++ vncipher $out5,$out5,v30 ++ vxor $in4,$in4,v31 ++ vncipher $out6,$out6,v30 ++ vxor $in5,$in5,v31 ++ vncipher $out7,$out7,v30 ++ vxor $in6,$in6,v31 ++ ++ cmplwi $len,32 # switch($len) ++ blt Lcbc_dec8x_one ++ nop ++ beq Lcbc_dec8x_two ++ cmplwi $len,64 ++ blt Lcbc_dec8x_three ++ nop ++ beq Lcbc_dec8x_four ++ cmplwi $len,96 ++ blt Lcbc_dec8x_five ++ nop ++ beq Lcbc_dec8x_six ++ ++Lcbc_dec8x_seven: ++ vncipherlast $out1,$out1,$ivec ++ vncipherlast $out2,$out2,$in1 ++ vncipherlast $out3,$out3,$in2 ++ vncipherlast $out4,$out4,$in3 ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out1,$out1,$out1,$inpperm ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x00,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x10,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x20,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x30,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x40,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x50,$out ++ stvx_u $out7,$x60,$out ++ addi $out,$out,0x70 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_six: ++ vncipherlast $out2,$out2,$ivec ++ vncipherlast $out3,$out3,$in2 ++ vncipherlast $out4,$out4,$in3 ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out2,$out2,$out2,$inpperm ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x00,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x10,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x20,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x30,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x40,$out ++ stvx_u $out7,$x50,$out ++ addi $out,$out,0x60 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_five: ++ vncipherlast $out3,$out3,$ivec ++ vncipherlast $out4,$out4,$in3 ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out3,$out3,$out3,$inpperm ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x00,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x10,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x20,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x30,$out ++ stvx_u $out7,$x40,$out ++ addi $out,$out,0x50 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_four: ++ vncipherlast $out4,$out4,$ivec ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out4,$out4,$out4,$inpperm ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x00,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x10,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x20,$out ++ stvx_u $out7,$x30,$out ++ addi $out,$out,0x40 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_three: ++ vncipherlast $out5,$out5,$ivec ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out5,$out5,$out5,$inpperm ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x00,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x10,$out ++ stvx_u $out7,$x20,$out ++ addi $out,$out,0x30 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_two: ++ vncipherlast $out6,$out6,$ivec ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out6,$out6,$out6,$inpperm ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x00,$out ++ stvx_u $out7,$x10,$out ++ addi $out,$out,0x20 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_one: ++ vncipherlast $out7,$out7,$ivec ++ vmr $ivec,$in7 ++ ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out7,0,$out ++ addi $out,$out,0x10 ++ ++Lcbc_dec8x_done: ++ le?vperm $ivec,$ivec,$ivec,$inpperm ++ stvx_u $ivec,0,$ivp # write [unaligned] iv ++ ++ li r10,`$FRAME+15` ++ li r11,`$FRAME+31` ++ stvx $inpperm,r10,$sp # wipe copies of round keys ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ ++ mtspr 256,$vrsave ++ lvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0x80,6,6,0 ++ .long 0 ++.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt ++___ ++}} }}} ++ ++######################################################################### ++{{{ # CTR procedure[s] # ++my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); ++my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); ++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= ++ map("v$_",(4..11)); ++my $dat=$tmp; ++ ++$code.=<<___; ++.globl .${prefix}_ctr32_encrypt_blocks ++.align 5 ++.${prefix}_ctr32_encrypt_blocks: ++ ${UCMP}i $len,1 ++ bltlr- ++ ++ lis r0,0xfff0 ++ mfspr $vrsave,256 ++ mtspr 256,r0 ++ ++ li $idx,15 ++ vxor $rndkey0,$rndkey0,$rndkey0 ++ le?vspltisb $tmp,0x0f ++ ++ lvx $ivec,0,$ivp # load [unaligned] iv ++ lvsl $inpperm,0,$ivp ++ lvx $inptail,$idx,$ivp ++ vspltisb $one,1 ++ le?vxor $inpperm,$inpperm,$tmp ++ vperm $ivec,$ivec,$inptail,$inpperm ++ vsldoi $one,$rndkey0,$one,1 ++ ++ neg r11,$inp ++ ?lvsl $keyperm,0,$key # prepare for unaligned key ++ lwz $rounds,240($key) ++ ++ lvsr $inpperm,0,r11 # prepare for unaligned load ++ lvx $inptail,0,$inp ++ addi $inp,$inp,15 # 15 is not typo ++ le?vxor $inpperm,$inpperm,$tmp ++ ++ srwi $rounds,$rounds,1 ++ li $idx,16 ++ subi $rounds,$rounds,1 ++ ++ ${UCMP}i $len,8 ++ bge _aesp8_ctr32_encrypt8x ++ ++ ?lvsr $outperm,0,$out # prepare for unaligned store ++ vspltisb $outmask,-1 ++ lvx $outhead,0,$out ++ ?vperm $outmask,$rndkey0,$outmask,$outperm ++ le?vxor $outperm,$outperm,$tmp ++ ++ lvx $rndkey0,0,$key ++ mtctr $rounds ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vxor $inout,$ivec,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ b Loop_ctr32_enc ++ ++.align 5 ++Loop_ctr32_enc: ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vcipher $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_ctr32_enc ++ ++ vadduwm $ivec,$ivec,$one ++ vmr $dat,$inptail ++ lvx $inptail,0,$inp ++ addi $inp,$inp,16 ++ subic. $len,$len,1 # blocks-- ++ ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ vperm $dat,$dat,$inptail,$inpperm ++ li $idx,16 ++ ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm ++ lvx $rndkey0,0,$key ++ vxor $dat,$dat,$rndkey1 # last round key ++ vcipherlast $inout,$inout,$dat ++ ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ vperm $inout,$inout,$inout,$outperm ++ vsel $dat,$outhead,$inout,$outmask ++ mtctr $rounds ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vmr $outhead,$inout ++ vxor $inout,$ivec,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ stvx $dat,0,$out ++ addi $out,$out,16 ++ bne Loop_ctr32_enc ++ ++ addi $out,$out,-1 ++ lvx $inout,0,$out # redundant in aligned case ++ vsel $inout,$outhead,$inout,$outmask ++ stvx $inout,0,$out ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,6,0 ++ .long 0 ++___ ++######################################################################### ++{{ # Optimized CTR procedure # ++my $key_="r11"; ++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); ++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); ++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); ++my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys ++ # v26-v31 last 6 round keys ++my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment ++my ($two,$three,$four)=($outhead,$outperm,$outmask); ++ ++$code.=<<___; ++.align 5 ++_aesp8_ctr32_encrypt8x: ++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) ++ li r10,`$FRAME+8*16+15` ++ li r11,`$FRAME+8*16+31` ++ stvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ li r0,-1 ++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave ++ li $x10,0x10 ++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ li $x20,0x20 ++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ li $x30,0x30 ++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ li $x40,0x40 ++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ li $x50,0x50 ++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ li $x60,0x60 ++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ li $x70,0x70 ++ mtspr 256,r0 ++ ++ subi $rounds,$rounds,3 # -4 in total ++ ++ lvx $rndkey0,$x00,$key # load key schedule ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ lvx v31,$x00,$key ++ ?vperm $rndkey0,$rndkey0,v30,$keyperm ++ addi $key_,$sp,$FRAME+15 ++ mtctr $rounds ++ ++Load_ctr32_enc_key: ++ ?vperm v24,v30,v31,$keyperm ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ stvx v24,$x00,$key_ # off-load round[1] ++ ?vperm v25,v31,v30,$keyperm ++ lvx v31,$x00,$key ++ stvx v25,$x10,$key_ # off-load round[2] ++ addi $key_,$key_,0x20 ++ bdnz Load_ctr32_enc_key ++ ++ lvx v26,$x10,$key ++ ?vperm v24,v30,v31,$keyperm ++ lvx v27,$x20,$key ++ stvx v24,$x00,$key_ # off-load round[3] ++ ?vperm v25,v31,v26,$keyperm ++ lvx v28,$x30,$key ++ stvx v25,$x10,$key_ # off-load round[4] ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ ?vperm v26,v26,v27,$keyperm ++ lvx v29,$x40,$key ++ ?vperm v27,v27,v28,$keyperm ++ lvx v30,$x50,$key ++ ?vperm v28,v28,v29,$keyperm ++ lvx v31,$x60,$key ++ ?vperm v29,v29,v30,$keyperm ++ lvx $out0,$x70,$key # borrow $out0 ++ ?vperm v30,v30,v31,$keyperm ++ lvx v24,$x00,$key_ # pre-load round[1] ++ ?vperm v31,v31,$out0,$keyperm ++ lvx v25,$x10,$key_ # pre-load round[2] ++ ++ vadduwm $two,$one,$one ++ subi $inp,$inp,15 # undo "caller" ++ $SHL $len,$len,4 ++ ++ vadduwm $out1,$ivec,$one # counter values ... ++ vadduwm $out2,$ivec,$two ++ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] ++ le?li $idx,8 ++ vadduwm $out3,$out1,$two ++ vxor $out1,$out1,$rndkey0 ++ le?lvsl $inpperm,0,$idx ++ vadduwm $out4,$out2,$two ++ vxor $out2,$out2,$rndkey0 ++ le?vspltisb $tmp,0x0f ++ vadduwm $out5,$out3,$two ++ vxor $out3,$out3,$rndkey0 ++ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u ++ vadduwm $out6,$out4,$two ++ vxor $out4,$out4,$rndkey0 ++ vadduwm $out7,$out5,$two ++ vxor $out5,$out5,$rndkey0 ++ vadduwm $ivec,$out6,$two # next counter value ++ vxor $out6,$out6,$rndkey0 ++ vxor $out7,$out7,$rndkey0 ++ ++ mtctr $rounds ++ b Loop_ctr32_enc8x ++.align 5 ++Loop_ctr32_enc8x: ++ vcipher $out0,$out0,v24 ++ vcipher $out1,$out1,v24 ++ vcipher $out2,$out2,v24 ++ vcipher $out3,$out3,v24 ++ vcipher $out4,$out4,v24 ++ vcipher $out5,$out5,v24 ++ vcipher $out6,$out6,v24 ++ vcipher $out7,$out7,v24 ++Loop_ctr32_enc8x_middle: ++ lvx v24,$x20,$key_ # round[3] ++ addi $key_,$key_,0x20 ++ ++ vcipher $out0,$out0,v25 ++ vcipher $out1,$out1,v25 ++ vcipher $out2,$out2,v25 ++ vcipher $out3,$out3,v25 ++ vcipher $out4,$out4,v25 ++ vcipher $out5,$out5,v25 ++ vcipher $out6,$out6,v25 ++ vcipher $out7,$out7,v25 ++ lvx v25,$x10,$key_ # round[4] ++ bdnz Loop_ctr32_enc8x ++ ++ subic r11,$len,256 # $len-256, borrow $key_ ++ vcipher $out0,$out0,v24 ++ vcipher $out1,$out1,v24 ++ vcipher $out2,$out2,v24 ++ vcipher $out3,$out3,v24 ++ vcipher $out4,$out4,v24 ++ vcipher $out5,$out5,v24 ++ vcipher $out6,$out6,v24 ++ vcipher $out7,$out7,v24 ++ ++ subfe r0,r0,r0 # borrow?-1:0 ++ vcipher $out0,$out0,v25 ++ vcipher $out1,$out1,v25 ++ vcipher $out2,$out2,v25 ++ vcipher $out3,$out3,v25 ++ vcipher $out4,$out4,v25 ++ vcipher $out5,$out5,v25 ++ vcipher $out6,$out6,v25 ++ vcipher $out7,$out7,v25 ++ ++ and r0,r0,r11 ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ vcipher $out0,$out0,v26 ++ vcipher $out1,$out1,v26 ++ vcipher $out2,$out2,v26 ++ vcipher $out3,$out3,v26 ++ vcipher $out4,$out4,v26 ++ vcipher $out5,$out5,v26 ++ vcipher $out6,$out6,v26 ++ vcipher $out7,$out7,v26 ++ lvx v24,$x00,$key_ # re-pre-load round[1] ++ ++ subic $len,$len,129 # $len-=129 ++ vcipher $out0,$out0,v27 ++ addi $len,$len,1 # $len-=128 really ++ vcipher $out1,$out1,v27 ++ vcipher $out2,$out2,v27 ++ vcipher $out3,$out3,v27 ++ vcipher $out4,$out4,v27 ++ vcipher $out5,$out5,v27 ++ vcipher $out6,$out6,v27 ++ vcipher $out7,$out7,v27 ++ lvx v25,$x10,$key_ # re-pre-load round[2] ++ ++ vcipher $out0,$out0,v28 ++ lvx_u $in0,$x00,$inp # load input ++ vcipher $out1,$out1,v28 ++ lvx_u $in1,$x10,$inp ++ vcipher $out2,$out2,v28 ++ lvx_u $in2,$x20,$inp ++ vcipher $out3,$out3,v28 ++ lvx_u $in3,$x30,$inp ++ vcipher $out4,$out4,v28 ++ lvx_u $in4,$x40,$inp ++ vcipher $out5,$out5,v28 ++ lvx_u $in5,$x50,$inp ++ vcipher $out6,$out6,v28 ++ lvx_u $in6,$x60,$inp ++ vcipher $out7,$out7,v28 ++ lvx_u $in7,$x70,$inp ++ addi $inp,$inp,0x80 ++ ++ vcipher $out0,$out0,v29 ++ le?vperm $in0,$in0,$in0,$inpperm ++ vcipher $out1,$out1,v29 ++ le?vperm $in1,$in1,$in1,$inpperm ++ vcipher $out2,$out2,v29 ++ le?vperm $in2,$in2,$in2,$inpperm ++ vcipher $out3,$out3,v29 ++ le?vperm $in3,$in3,$in3,$inpperm ++ vcipher $out4,$out4,v29 ++ le?vperm $in4,$in4,$in4,$inpperm ++ vcipher $out5,$out5,v29 ++ le?vperm $in5,$in5,$in5,$inpperm ++ vcipher $out6,$out6,v29 ++ le?vperm $in6,$in6,$in6,$inpperm ++ vcipher $out7,$out7,v29 ++ le?vperm $in7,$in7,$in7,$inpperm ++ ++ add $inp,$inp,r0 # $inp is adjusted in such ++ # way that at exit from the ++ # loop inX-in7 are loaded ++ # with last "words" ++ subfe. r0,r0,r0 # borrow?-1:0 ++ vcipher $out0,$out0,v30 ++ vxor $in0,$in0,v31 # xor with last round key ++ vcipher $out1,$out1,v30 ++ vxor $in1,$in1,v31 ++ vcipher $out2,$out2,v30 ++ vxor $in2,$in2,v31 ++ vcipher $out3,$out3,v30 ++ vxor $in3,$in3,v31 ++ vcipher $out4,$out4,v30 ++ vxor $in4,$in4,v31 ++ vcipher $out5,$out5,v30 ++ vxor $in5,$in5,v31 ++ vcipher $out6,$out6,v30 ++ vxor $in6,$in6,v31 ++ vcipher $out7,$out7,v30 ++ vxor $in7,$in7,v31 ++ ++ bne Lctr32_enc8x_break # did $len-129 borrow? ++ ++ vcipherlast $in0,$out0,$in0 ++ vcipherlast $in1,$out1,$in1 ++ vadduwm $out1,$ivec,$one # counter values ... ++ vcipherlast $in2,$out2,$in2 ++ vadduwm $out2,$ivec,$two ++ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] ++ vcipherlast $in3,$out3,$in3 ++ vadduwm $out3,$out1,$two ++ vxor $out1,$out1,$rndkey0 ++ vcipherlast $in4,$out4,$in4 ++ vadduwm $out4,$out2,$two ++ vxor $out2,$out2,$rndkey0 ++ vcipherlast $in5,$out5,$in5 ++ vadduwm $out5,$out3,$two ++ vxor $out3,$out3,$rndkey0 ++ vcipherlast $in6,$out6,$in6 ++ vadduwm $out6,$out4,$two ++ vxor $out4,$out4,$rndkey0 ++ vcipherlast $in7,$out7,$in7 ++ vadduwm $out7,$out5,$two ++ vxor $out5,$out5,$rndkey0 ++ le?vperm $in0,$in0,$in0,$inpperm ++ vadduwm $ivec,$out6,$two # next counter value ++ vxor $out6,$out6,$rndkey0 ++ le?vperm $in1,$in1,$in1,$inpperm ++ vxor $out7,$out7,$rndkey0 ++ mtctr $rounds ++ ++ vcipher $out0,$out0,v24 ++ stvx_u $in0,$x00,$out ++ le?vperm $in2,$in2,$in2,$inpperm ++ vcipher $out1,$out1,v24 ++ stvx_u $in1,$x10,$out ++ le?vperm $in3,$in3,$in3,$inpperm ++ vcipher $out2,$out2,v24 ++ stvx_u $in2,$x20,$out ++ le?vperm $in4,$in4,$in4,$inpperm ++ vcipher $out3,$out3,v24 ++ stvx_u $in3,$x30,$out ++ le?vperm $in5,$in5,$in5,$inpperm ++ vcipher $out4,$out4,v24 ++ stvx_u $in4,$x40,$out ++ le?vperm $in6,$in6,$in6,$inpperm ++ vcipher $out5,$out5,v24 ++ stvx_u $in5,$x50,$out ++ le?vperm $in7,$in7,$in7,$inpperm ++ vcipher $out6,$out6,v24 ++ stvx_u $in6,$x60,$out ++ vcipher $out7,$out7,v24 ++ stvx_u $in7,$x70,$out ++ addi $out,$out,0x80 ++ ++ b Loop_ctr32_enc8x_middle ++ ++.align 5 ++Lctr32_enc8x_break: ++ cmpwi $len,-0x60 ++ blt Lctr32_enc8x_one ++ nop ++ beq Lctr32_enc8x_two ++ cmpwi $len,-0x40 ++ blt Lctr32_enc8x_three ++ nop ++ beq Lctr32_enc8x_four ++ cmpwi $len,-0x20 ++ blt Lctr32_enc8x_five ++ nop ++ beq Lctr32_enc8x_six ++ cmpwi $len,0x00 ++ blt Lctr32_enc8x_seven ++ ++Lctr32_enc8x_eight: ++ vcipherlast $out0,$out0,$in0 ++ vcipherlast $out1,$out1,$in1 ++ vcipherlast $out2,$out2,$in2 ++ vcipherlast $out3,$out3,$in3 ++ vcipherlast $out4,$out4,$in4 ++ vcipherlast $out5,$out5,$in5 ++ vcipherlast $out6,$out6,$in6 ++ vcipherlast $out7,$out7,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x50,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x60,$out ++ stvx_u $out7,$x70,$out ++ addi $out,$out,0x80 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_seven: ++ vcipherlast $out0,$out0,$in1 ++ vcipherlast $out1,$out1,$in2 ++ vcipherlast $out2,$out2,$in3 ++ vcipherlast $out3,$out3,$in4 ++ vcipherlast $out4,$out4,$in5 ++ vcipherlast $out5,$out5,$in6 ++ vcipherlast $out6,$out6,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x50,$out ++ stvx_u $out6,$x60,$out ++ addi $out,$out,0x70 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_six: ++ vcipherlast $out0,$out0,$in2 ++ vcipherlast $out1,$out1,$in3 ++ vcipherlast $out2,$out2,$in4 ++ vcipherlast $out3,$out3,$in5 ++ vcipherlast $out4,$out4,$in6 ++ vcipherlast $out5,$out5,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ stvx_u $out5,$x50,$out ++ addi $out,$out,0x60 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_five: ++ vcipherlast $out0,$out0,$in3 ++ vcipherlast $out1,$out1,$in4 ++ vcipherlast $out2,$out2,$in5 ++ vcipherlast $out3,$out3,$in6 ++ vcipherlast $out4,$out4,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ stvx_u $out4,$x40,$out ++ addi $out,$out,0x50 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_four: ++ vcipherlast $out0,$out0,$in4 ++ vcipherlast $out1,$out1,$in5 ++ vcipherlast $out2,$out2,$in6 ++ vcipherlast $out3,$out3,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ stvx_u $out3,$x30,$out ++ addi $out,$out,0x40 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_three: ++ vcipherlast $out0,$out0,$in5 ++ vcipherlast $out1,$out1,$in6 ++ vcipherlast $out2,$out2,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ stvx_u $out2,$x20,$out ++ addi $out,$out,0x30 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lctr32_enc8x_two: ++ vcipherlast $out0,$out0,$in6 ++ vcipherlast $out1,$out1,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ stvx_u $out1,$x10,$out ++ addi $out,$out,0x20 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lctr32_enc8x_one: ++ vcipherlast $out0,$out0,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ stvx_u $out0,0,$out ++ addi $out,$out,0x10 ++ ++Lctr32_enc8x_done: ++ li r10,`$FRAME+15` ++ li r11,`$FRAME+31` ++ stvx $inpperm,r10,$sp # wipe copies of round keys ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ ++ mtspr 256,$vrsave ++ lvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0x80,6,6,0 ++ .long 0 ++.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks ++___ ++}} }}} ++ ++my $consts=1; ++foreach(split("\n",$code)) { ++ s/\`([^\`]*)\`/eval($1)/geo; ++ ++ # constants table endian-specific conversion ++ if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { ++ my $conv=$3; ++ my @bytes=(); ++ ++ # convert to endian-agnostic format ++ if ($1 eq "long") { ++ foreach (split(/,\s*/,$2)) { ++ my $l = /^0/?oct:int; ++ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; ++ } ++ } else { ++ @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); ++ } ++ ++ # little-endian conversion ++ if ($flavour =~ /le$/o) { ++ SWITCH: for($conv) { ++ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; ++ /\?rev/ && do { @bytes=reverse(@bytes); last; }; ++ } ++ } ++ ++ #emit ++ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; ++ next; ++ } ++ $consts=0 if (m/Lconsts:/o); # end of table ++ ++ # instructions prefixed with '?' are endian-specific and need ++ # to be adjusted accordingly... ++ if ($flavour =~ /le$/o) { # little-endian ++ s/le\?//o or ++ s/be\?/#be#/o or ++ s/\?lvsr/lvsl/o or ++ s/\?lvsl/lvsr/o or ++ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or ++ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or ++ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; ++ } else { # big-endian ++ s/le\?/#le#/o or ++ s/be\?//o or ++ s/\?([a-z]+)/$1/o; ++ } ++ ++ print $_,"\n"; ++} ++ ++close STDOUT; +diff --git a/crypto/aes/asm/vpaes-ppc.pl b/crypto/aes/asm/vpaes-ppc.pl +new file mode 100644 +index 0000000..7fda60e +--- /dev/null ++++ b/crypto/aes/asm/vpaes-ppc.pl +@@ -0,0 +1,1512 @@ ++#!/usr/bin/env perl ++ ++###################################################################### ++## Constant-time SSSE3 AES core implementation. ++## version 0.1 ++## ++## By Mike Hamburg (Stanford University), 2009 ++## Public domain. ++## ++## For details see http://shiftleft.org/papers/vector_aes/ and ++## http://crypto.stanford.edu/vpaes/. ++ ++# CBC encrypt/decrypt performance in cycles per byte processed with ++# 128-bit key. ++# ++# aes-ppc.pl this ++# G4e 35.5/52.1/(23.8) 11.9(*)/15.4 ++# POWER6 42.7/54.3/(28.2) 63.0/92.8(**) ++# POWER7 32.3/42.9/(18.4) 18.5/23.3 ++# ++# (*) This is ~10% worse than reported in paper. The reason is ++# twofold. This module doesn't make any assumption about ++# key schedule (or data for that matter) alignment and handles ++# it in-line. Secondly it, being transliterated from ++# vpaes-x86_64.pl, relies on "nested inversion" better suited ++# for Intel CPUs. ++# (**) Inadequate POWER6 performance is due to astronomic AltiVec ++# latency, 9 cycles per simple logical operation. ++ ++$flavour = shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T =8; ++ $LRSAVE =2*$SIZE_T; ++ $STU ="stdu"; ++ $POP ="ld"; ++ $PUSH ="std"; ++ $UCMP ="cmpld"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T =4; ++ $LRSAVE =$SIZE_T; ++ $STU ="stwu"; ++ $POP ="lwz"; ++ $PUSH ="stw"; ++ $UCMP ="cmplw"; ++} else { die "nonsense $flavour"; } ++ ++$sp="r1"; ++$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; ++ ++$code.=<<___; ++.machine "any" ++ ++.text ++ ++.align 7 # totally strategic alignment ++_vpaes_consts: ++Lk_mc_forward: # mc_forward ++ .long 0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c ?inv ++ .long 0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300 ?inv ++ .long 0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704 ?inv ++ .long 0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08 ?inv ++Lk_mc_backward: # mc_backward ++ .long 0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e ?inv ++ .long 0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a ?inv ++ .long 0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506 ?inv ++ .long 0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102 ?inv ++Lk_sr: # sr ++ .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f ?inv ++ .long 0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b ?inv ++ .long 0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07 ?inv ++ .long 0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603 ?inv ++ ++## ++## "Hot" constants ++## ++Lk_inv: # inv, inva ++ .long 0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704 ?rev ++ .long 0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03 ?rev ++Lk_ipt: # input transform (lo, hi) ++ .long 0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca ?rev ++ .long 0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd ?rev ++Lk_sbo: # sbou, sbot ++ .long 0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15 ?rev ++ .long 0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e ?rev ++Lk_sb1: # sb1u, sb1t ++ .long 0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b ?rev ++ .long 0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5 ?rev ++Lk_sb2: # sb2u, sb2t ++ .long 0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2 ?rev ++ .long 0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e ?rev ++ ++## ++## Decryption stuff ++## ++Lk_dipt: # decryption input transform ++ .long 0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15 ?rev ++ .long 0x00650560, 0xe683e386, 0x94f191f4, 0x72177712 ?rev ++Lk_dsbo: # decryption sbox final output ++ .long 0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7 ?rev ++ .long 0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca ?rev ++Lk_dsb9: # decryption sbox output *9*u, *9*t ++ .long 0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca ?rev ++ .long 0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72 ?rev ++Lk_dsbd: # decryption sbox output *D*u, *D*t ++ .long 0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5 ?rev ++ .long 0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129 ?rev ++Lk_dsbb: # decryption sbox output *B*u, *B*t ++ .long 0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660 ?rev ++ .long 0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3 ?rev ++Lk_dsbe: # decryption sbox output *E*u, *E*t ++ .long 0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222 ?rev ++ .long 0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794 ?rev ++ ++## ++## Key schedule constants ++## ++Lk_dksd: # decryption key schedule: invskew x*D ++ .long 0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007 ?rev ++ .long 0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f ?rev ++Lk_dksb: # decryption key schedule: invskew x*B ++ .long 0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603 ?rev ++ .long 0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9 ?rev ++Lk_dkse: # decryption key schedule: invskew x*E + 0x63 ++ .long 0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553 ?rev ++ .long 0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd ?rev ++Lk_dks9: # decryption key schedule: invskew x*9 ++ .long 0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a ?rev ++ .long 0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b ?rev ++ ++Lk_rcon: # rcon ++ .long 0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70 ?asis ++Lk_s63: ++ .long 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b ?asis ++ ++Lk_opt: # output transform ++ .long 0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7 ?rev ++ .long 0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1 ?rev ++Lk_deskew: # deskew tables: inverts the sbox's "skew" ++ .long 0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d ?rev ++ .long 0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128 ?rev ++.align 5 ++Lconsts: ++ mflr r0 ++ bcl 20,31,\$+4 ++ mflr r12 #vvvvv "distance between . and _vpaes_consts ++ addi r12,r12,-0x308 ++ mtlr r0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.asciz "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)" ++.align 6 ++___ ++ ++my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31)); ++{ ++my ($inp,$out,$key) = map("r$_",(3..5)); ++ ++my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15)); ++my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19)); ++my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23)); ++ ++$code.=<<___; ++## ++## _aes_preheat ++## ++## Fills register %r10 -> .aes_consts (so you can -fPIC) ++## and %xmm9-%xmm15 as specified below. ++## ++.align 4 ++_vpaes_encrypt_preheat: ++ mflr r8 ++ bl Lconsts ++ mtlr r8 ++ li r11, 0xc0 # Lk_inv ++ li r10, 0xd0 ++ li r9, 0xe0 # Lk_ipt ++ li r8, 0xf0 ++ vxor v7, v7, v7 # 0x00..00 ++ vspltisb v8,4 # 0x04..04 ++ vspltisb v9,0x0f # 0x0f..0f ++ lvx $invlo, r12, r11 ++ li r11, 0x100 ++ lvx $invhi, r12, r10 ++ li r10, 0x110 ++ lvx $iptlo, r12, r9 ++ li r9, 0x120 ++ lvx $ipthi, r12, r8 ++ li r8, 0x130 ++ lvx $sbou, r12, r11 ++ li r11, 0x140 ++ lvx $sbot, r12, r10 ++ li r10, 0x150 ++ lvx $sb1u, r12, r9 ++ lvx $sb1t, r12, r8 ++ lvx $sb2u, r12, r11 ++ lvx $sb2t, r12, r10 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## _aes_encrypt_core ++## ++## AES-encrypt %xmm0. ++## ++## Inputs: ++## %xmm0 = input ++## %xmm9-%xmm15 as in _vpaes_preheat ++## (%rdx) = scheduled keys ++## ++## Output in %xmm0 ++## Clobbers %xmm1-%xmm6, %r9, %r10, %r11, %rax ++## ++## ++.align 5 ++_vpaes_encrypt_core: ++ lwz r8, 240($key) # pull rounds ++ li r9, 16 ++ lvx v5, 0, $key # vmovdqu (%r9), %xmm5 # round0 key ++ li r11, 0x10 ++ lvx v6, r9, $key ++ addi r9, r9, 16 ++ ?vperm v5, v5, v6, $keyperm # align round key ++ addi r10, r11, 0x40 ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 ++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm1 ++ vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm3, %xmm2 ++ vxor v0, v0, v5 # vpxor %xmm5, %xmm1, %xmm0 ++ vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0 ++ mtctr r8 ++ b Lenc_entry ++ ++.align 4 ++Lenc_loop: ++ # middle of middle round ++ vperm v4, $sb1t, v7, v2 # vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u ++ lvx v1, r12, r11 # vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] ++ addi r11, r11, 16 ++ vperm v0, $sb1u, v7, v3 # vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t ++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ andi. r11, r11, 0x30 # and \$0x30, %r11 # ... mod 4 ++ vperm v5, $sb2t, v7, v2 # vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u ++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ vperm v2, $sb2u, v7, v3 # vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t ++ lvx v4, r12, r10 # vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] ++ addi r10, r11, 0x40 ++ vperm v3, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm3 # 0 = B ++ vxor v2, v2, v5 # vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A ++ vperm v0, v0, v7, v4 # vpshufb %xmm4, %xmm0, %xmm0 # 3 = D ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B ++ vperm v4, v3, v7, v1 # vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C ++ vxor v0, v0, v3 # vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D ++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D ++ ++Lenc_entry: ++ # top of round ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ vperm v5, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k ++ vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vand v0, v0, v9 ++ vxor v3, v3, v5 # vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ vmr v5, v6 ++ lvx v6, r9, $key # vmovdqu (%r9), %xmm5 ++ vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ addi r9, r9, 16 ++ vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ ?vperm v5, v5, v6, $keyperm # align round key ++ vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ bdnz Lenc_loop ++ ++ # middle of last round ++ addi r10, r11, 0x80 ++ # vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo ++ # vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 ++ vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ lvx v1, r12, r10 # vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] ++ vperm v0, $sbot, v7, v3 # vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t ++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ vperm v0, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.globl .vpaes_encrypt ++.align 5 ++.vpaes_encrypt: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r6 ++ mfspr r7, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r7,`$FRAME-4`($sp) # save vrsave ++ li r0, -1 ++ $PUSH r6,`$FRAME+$LRSAVE`($sp) ++ mtspr 256, r0 # preserve all AltiVec registers ++ ++ bl _vpaes_encrypt_preheat ++ ++ ?lvsl $inpperm, 0, $inp # prepare for unaligned access ++ lvx v0, 0, $inp ++ addi $inp, $inp, 15 # 15 is not a typo ++ ?lvsr $outperm, 0, $out ++ ?lvsl $keyperm, 0, $key # prepare for unaligned access ++ vnor $outmask, v7, v7 # 0xff..ff ++ lvx $inptail, 0, $inp # redundant in aligned case ++ ?vperm $outmask, v7, $outmask, $outperm ++ lvx $outhead, 0, $out ++ ?vperm v0, v0, $inptail, $inpperm ++ ++ bl _vpaes_encrypt_core ++ ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 15 # 15 is not a typo ++ ######## ++ ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtlr r6 ++ mtspr 256, r7 # restore vrsave ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_encrypt,.-.vpaes_encrypt ++ ++.align 4 ++_vpaes_decrypt_preheat: ++ mflr r8 ++ bl Lconsts ++ mtlr r8 ++ li r11, 0xc0 # Lk_inv ++ li r10, 0xd0 ++ li r9, 0x160 # Ldipt ++ li r8, 0x170 ++ vxor v7, v7, v7 # 0x00..00 ++ vspltisb v8,4 # 0x04..04 ++ vspltisb v9,0x0f # 0x0f..0f ++ lvx $invlo, r12, r11 ++ li r11, 0x180 ++ lvx $invhi, r12, r10 ++ li r10, 0x190 ++ lvx $iptlo, r12, r9 ++ li r9, 0x1a0 ++ lvx $ipthi, r12, r8 ++ li r8, 0x1b0 ++ lvx $sbou, r12, r11 ++ li r11, 0x1c0 ++ lvx $sbot, r12, r10 ++ li r10, 0x1d0 ++ lvx $sb9u, r12, r9 ++ li r9, 0x1e0 ++ lvx $sb9t, r12, r8 ++ li r8, 0x1f0 ++ lvx $sbdu, r12, r11 ++ li r11, 0x200 ++ lvx $sbdt, r12, r10 ++ li r10, 0x210 ++ lvx $sbbu, r12, r9 ++ lvx $sbbt, r12, r8 ++ lvx $sbeu, r12, r11 ++ lvx $sbet, r12, r10 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## Decryption core ++## ++## Same API as encryption core. ++## ++.align 4 ++_vpaes_decrypt_core: ++ lwz r8, 240($key) # pull rounds ++ li r9, 16 ++ lvx v5, 0, $key # vmovdqu (%r9), %xmm4 # round0 key ++ li r11, 0x30 ++ lvx v6, r9, $key ++ addi r9, r9, 16 ++ ?vperm v5, v5, v6, $keyperm # align round key ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 ++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2 ++ vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm1, %xmm0 ++ vxor v0, v0, v5 # vpxor %xmm4, %xmm2, %xmm2 ++ vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0 ++ mtctr r8 ++ b Ldec_entry ++ ++.align 4 ++Ldec_loop: ++# ++# Inverse mix columns ++# ++ lvx v0, r12, r11 # v5 and v0 are flipped ++ # vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u ++ # vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t ++ vperm v4, $sb9u, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u ++ subi r11, r11, 16 ++ vperm v1, $sb9t, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t ++ andi. r11, r11, 0x30 ++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 ++ # vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu ++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ # vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt ++ ++ vperm v4, $sbdu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu ++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vperm v1, $sbdt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt ++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ # vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu ++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ # vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt ++ ++ vperm v4, $sbbu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu ++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vperm v1, $sbbt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt ++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ # vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu ++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ # vmovdqa 0x50(%r10), %xmm1 # 0 : sbet ++ ++ vperm v4, $sbeu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu ++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vperm v1, $sbet, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet ++ vxor v0, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ vxor v0, v0, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ ++Ldec_entry: ++ # top of round ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ vperm v2, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vand v0, v0, v9 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ vmr v5, v6 ++ lvx v6, r9, $key # vmovdqu (%r9), %xmm0 ++ vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ addi r9, r9, 16 ++ vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ ?vperm v5, v5, v6, $keyperm # align round key ++ vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ bdnz Ldec_loop ++ ++ # middle of last round ++ addi r10, r11, 0x80 ++ # vmovdqa 0x60(%r10), %xmm4 # 3 : sbou ++ vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ # vmovdqa 0x70(%r10), %xmm1 # 0 : sbot ++ lvx v2, r12, r10 # vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 ++ vperm v1, $sbot, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t ++ vxor v4, v4, v5 # vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k ++ vxor v0, v1, v4 # vpxor %xmm4, %xmm1, %xmm0 # 0 = A ++ vperm v0, v0, v7, v2 # vpshufb %xmm2, %xmm0, %xmm0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.globl .vpaes_decrypt ++.align 5 ++.vpaes_decrypt: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r6 ++ mfspr r7, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r7,`$FRAME-4`($sp) # save vrsave ++ li r0, -1 ++ $PUSH r6,`$FRAME+$LRSAVE`($sp) ++ mtspr 256, r0 # preserve all AltiVec registers ++ ++ bl _vpaes_decrypt_preheat ++ ++ ?lvsl $inpperm, 0, $inp # prepare for unaligned access ++ lvx v0, 0, $inp ++ addi $inp, $inp, 15 # 15 is not a typo ++ ?lvsr $outperm, 0, $out ++ ?lvsl $keyperm, 0, $key ++ vnor $outmask, v7, v7 # 0xff..ff ++ lvx $inptail, 0, $inp # redundant in aligned case ++ ?vperm $outmask, v7, $outmask, $outperm ++ lvx $outhead, 0, $out ++ ?vperm v0, v0, $inptail, $inpperm ++ ++ bl _vpaes_decrypt_core ++ ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 15 # 15 is not a typo ++ ######## ++ ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtlr r6 ++ mtspr 256, r7 # restore vrsave ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_decrypt,.-.vpaes_decrypt ++ ++.globl .vpaes_cbc_encrypt ++.align 5 ++.vpaes_cbc_encrypt: ++ ${UCMP}i r5,16 ++ bltlr- ++ ++ $STU $sp,-`($FRAME+2*$SIZE_T)`($sp) ++ mflr r0 ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mfspr r12, 256 ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r12,`$FRAME-4`($sp) # save vrsave ++ $PUSH r30,`$FRAME+$SIZE_T*0`($sp) ++ $PUSH r31,`$FRAME+$SIZE_T*1`($sp) ++ li r9, -16 ++ $PUSH r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp) ++ ++ and r30, r5, r9 # copy length&-16 ++ mr r5, r6 # copy pointer to key ++ mr r31, r7 # copy pointer to iv ++ blt Lcbc_abort ++ cmpwi r8, 0 # test direction ++ li r6, -1 ++ mr r7, r12 # copy vrsave ++ mtspr 256, r6 # preserve all AltiVec registers ++ ++ lvx v24, 0, r31 # load [potentially unaligned] iv ++ li r9, 15 ++ ?lvsl $inpperm, 0, r31 ++ lvx v25, r9, r31 ++ ?vperm v24, v24, v25, $inpperm ++ ++ neg r8, $inp # prepare for unaligned access ++ vxor v7, v7, v7 ++ ?lvsl $keyperm, 0, $key ++ ?lvsr $outperm, 0, $out ++ ?lvsr $inpperm, 0, r8 # -$inp ++ vnor $outmask, v7, v7 # 0xff..ff ++ lvx $inptail, 0, $inp ++ ?vperm $outmask, v7, $outmask, $outperm ++ addi $inp, $inp, 15 # 15 is not a typo ++ lvx $outhead, 0, $out ++ ++ beq Lcbc_decrypt ++ ++ bl _vpaes_encrypt_preheat ++ li r0, 16 ++ ++Lcbc_enc_loop: ++ vmr v0, $inptail ++ lvx $inptail, 0, $inp ++ addi $inp, $inp, 16 ++ ?vperm v0, v0, $inptail, $inpperm ++ vxor v0, v0, v24 # ^= iv ++ ++ bl _vpaes_encrypt_core ++ ++ vmr v24, v0 # put aside iv ++ sub. r30, r30, r0 # len -= 16 ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 16 ++ bne Lcbc_enc_loop ++ ++ b Lcbc_done ++ ++.align 5 ++Lcbc_decrypt: ++ bl _vpaes_decrypt_preheat ++ li r0, 16 ++ ++Lcbc_dec_loop: ++ vmr v0, $inptail ++ lvx $inptail, 0, $inp ++ addi $inp, $inp, 16 ++ ?vperm v0, v0, $inptail, $inpperm ++ vmr v25, v0 # put aside input ++ ++ bl _vpaes_decrypt_core ++ ++ vxor v0, v0, v24 # ^= iv ++ vmr v24, v25 ++ sub. r30, r30, r0 # len -= 16 ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 16 ++ bne Lcbc_dec_loop ++ ++Lcbc_done: ++ addi $out, $out, -1 ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++ neg r8, r31 # write [potentially unaligned] iv ++ ?lvsl $outperm, 0, r8 ++ li r6, 15 ++ vnor $outmask, v7, v7 # 0xff..ff ++ ?vperm $outmask, v7, $outmask, $outperm ++ lvx $outhead, 0, r31 ++ vperm v24, v24, v24, $outperm # rotate right/left ++ vsel v0, $outhead, v24, $outmask ++ lvx v1, r6, r31 ++ stvx v0, 0, r31 ++ vsel v1, v24, v1, $outmask ++ stvx v1, r6, r31 ++ ++ mtspr 256, r7 # restore vrsave ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++Lcbc_abort: ++ $POP r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp) ++ $POP r30,`$FRAME+$SIZE_T*0`($sp) ++ $POP r31,`$FRAME+$SIZE_T*1`($sp) ++ mtlr r0 ++ addi $sp,$sp,`$FRAME+$SIZE_T*2` ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,2,6,0 ++ .long 0 ++.size .vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt ++___ ++} ++{ ++my ($inp,$bits,$out)=map("r$_",(3..5)); ++my $dir="cr1"; ++my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24)); ++ ++$code.=<<___; ++######################################################## ++## ## ++## AES key schedule ## ++## ## ++######################################################## ++.align 4 ++_vpaes_key_preheat: ++ mflr r8 ++ bl Lconsts ++ mtlr r8 ++ li r11, 0xc0 # Lk_inv ++ li r10, 0xd0 ++ li r9, 0xe0 # L_ipt ++ li r8, 0xf0 ++ ++ vspltisb v8,4 # 0x04..04 ++ vxor v9,v9,v9 # 0x00..00 ++ lvx $invlo, r12, r11 # Lk_inv ++ li r11, 0x120 ++ lvx $invhi, r12, r10 ++ li r10, 0x130 ++ lvx $iptlo, r12, r9 # Lk_ipt ++ li r9, 0x220 ++ lvx $ipthi, r12, r8 ++ li r8, 0x230 ++ ++ lvx v14, r12, r11 # Lk_sb1 ++ li r11, 0x240 ++ lvx v15, r12, r10 ++ li r10, 0x250 ++ ++ lvx v16, r12, r9 # Lk_dksd ++ li r9, 0x260 ++ lvx v17, r12, r8 ++ li r8, 0x270 ++ lvx v18, r12, r11 # Lk_dksb ++ li r11, 0x280 ++ lvx v19, r12, r10 ++ li r10, 0x290 ++ lvx v20, r12, r9 # Lk_dkse ++ li r9, 0x2a0 ++ lvx v21, r12, r8 ++ li r8, 0x2b0 ++ lvx v22, r12, r11 # Lk_dks9 ++ lvx v23, r12, r10 ++ ++ lvx v24, r12, r9 # Lk_rcon ++ lvx v25, 0, r12 # Lk_mc_forward[0] ++ lvx v26, r12, r8 # Lks63 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.align 4 ++_vpaes_schedule_core: ++ mflr r7 ++ ++ bl _vpaes_key_preheat # load the tables ++ ++ #lvx v0, 0, $inp # vmovdqu (%rdi), %xmm0 # load key (unaligned) ++ neg r8, $inp # prepare for unaligned access ++ lvx v0, 0, $inp ++ addi $inp, $inp, 15 # 15 is not typo ++ ?lvsr $inpperm, 0, r8 # -$inp ++ lvx v6, 0, $inp # v6 serves as inptail ++ addi $inp, $inp, 8 ++ ?vperm v0, v0, v6, $inpperm ++ ++ # input transform ++ vmr v3, v0 # vmovdqa %xmm0, %xmm3 ++ bl _vpaes_schedule_transform ++ vmr v7, v0 # vmovdqa %xmm0, %xmm7 ++ ++ bne $dir, Lschedule_am_decrypting ++ ++ # encrypting, output zeroth round key after transform ++ li r8, 0x30 # mov \$0x30,%r8d ++ addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10 ++ ++ ?lvsr $outperm, 0, $out # prepare for unaligned access ++ vnor $outmask, v9, v9 # 0xff..ff ++ lvx $outhead, 0, $out ++ ?vperm $outmask, v9, $outmask, $outperm ++ ++ #stvx v0, 0, $out # vmovdqu %xmm0, (%rdx) ++ vperm v1, v0, v0, $outperm # rotate right/left ++ vsel v2, $outhead, v1, $outmask ++ vmr $outhead, v1 ++ stvx v2, 0, $out ++ b Lschedule_go ++ ++Lschedule_am_decrypting: ++ srwi r8, $bits, 1 # shr \$1,%r8d ++ andi. r8, r8, 32 # and \$32,%r8d ++ xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32 ++ addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10 ++ # decrypting, output zeroth round key after shiftrows ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 ++ vperm v4, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ ++ neg r0, $out # prepare for unaligned access ++ ?lvsl $outperm, 0, r0 ++ addi $out, $out, 15 # 15 is not typo ++ vnor $outmask, v9, v9 # 0xff..ff ++ lvx $outhead, 0, $out ++ ?vperm $outmask, $outmask, v9, $outperm ++ ++ #stvx v4, 0, $out # vmovdqu %xmm3, (%rdx) ++ vperm v4, v4, v4, $outperm # rotate right/left ++ vsel v2, $outhead, v4, $outmask ++ vmr $outhead, v4 ++ stvx v2, 0, $out ++ xori r8, r8, 0x30 # xor \$0x30, %r8 ++ ++Lschedule_go: ++ cmplwi $bits, 192 # cmp \$192, %esi ++ bgt Lschedule_256 ++ beq Lschedule_192 ++ # 128: fall though ++ ++## ++## .schedule_128 ++## ++## 128-bit specific part of key schedule. ++## ++## This schedule is really simple, because all its parts ++## are accomplished by the subroutines. ++## ++Lschedule_128: ++ li r0, 10 # mov \$10, %esi ++ mtctr r0 ++ ++Loop_schedule_128: ++ bl _vpaes_schedule_round ++ bdz Lschedule_mangle_last # dec %esi ++ bl _vpaes_schedule_mangle # write output ++ b Loop_schedule_128 ++ ++## ++## .aes_schedule_192 ++## ++## 192-bit specific part of key schedule. ++## ++## The main body of this schedule is the same as the 128-bit ++## schedule, but with more smearing. The long, high side is ++## stored in %xmm7 as before, and the short, low side is in ++## the high bits of %xmm6. ++## ++## This schedule is somewhat nastier, however, because each ++## round produces 192 bits of key material, or 1.5 round keys. ++## Therefore, on each cycle we do 2 rounds and produce 3 round ++## keys. ++## ++.align 4 ++Lschedule_192: ++ li r0, 4 # mov \$4, %esi ++ lvx v0, 0, $inp ++ ?vperm v0, v6, v0, $inpperm ++ ?vsldoi v0, v3, v0, 8 # vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) ++ bl _vpaes_schedule_transform # input transform ++ ?vsldoi v6, v0, v9, 8 ++ ?vsldoi v6, v9, v6, 8 # clobber "low" side with zeros ++ mtctr r0 ++ ++Loop_schedule_192: ++ bl _vpaes_schedule_round ++ ?vsldoi v0, v6, v0, 8 # vpalignr \$8,%xmm6,%xmm0,%xmm0 ++ bl _vpaes_schedule_mangle # save key n ++ bl _vpaes_schedule_192_smear ++ bl _vpaes_schedule_mangle # save key n+1 ++ bl _vpaes_schedule_round ++ bdz Lschedule_mangle_last # dec %esi ++ bl _vpaes_schedule_mangle # save key n+2 ++ bl _vpaes_schedule_192_smear ++ b Loop_schedule_192 ++ ++## ++## .aes_schedule_256 ++## ++## 256-bit specific part of key schedule. ++## ++## The structure here is very similar to the 128-bit ++## schedule, but with an additional "low side" in ++## %xmm6. The low side's rounds are the same as the ++## high side's, except no rcon and no rotation. ++## ++.align 4 ++Lschedule_256: ++ li r0, 7 # mov \$7, %esi ++ addi $inp, $inp, 8 ++ lvx v0, 0, $inp # vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) ++ ?vperm v0, v6, v0, $inpperm ++ bl _vpaes_schedule_transform # input transform ++ mtctr r0 ++ ++Loop_schedule_256: ++ bl _vpaes_schedule_mangle # output low result ++ vmr v6, v0 # vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 ++ ++ # high round ++ bl _vpaes_schedule_round ++ bdz Lschedule_mangle_last # dec %esi ++ bl _vpaes_schedule_mangle ++ ++ # low round. swap xmm7 and xmm6 ++ ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0 ++ vmr v5, v7 # vmovdqa %xmm7, %xmm5 ++ vmr v7, v6 # vmovdqa %xmm6, %xmm7 ++ bl _vpaes_schedule_low_round ++ vmr v7, v5 # vmovdqa %xmm5, %xmm7 ++ ++ b Loop_schedule_256 ++## ++## .aes_schedule_mangle_last ++## ++## Mangler for last round of key schedule ++## Mangles %xmm0 ++## when encrypting, outputs out(%xmm0) ^ 63 ++## when decrypting, outputs unskew(%xmm0) ++## ++## Always called right before return... jumps to cleanup and exits ++## ++.align 4 ++Lschedule_mangle_last: ++ # schedule last round key from xmm0 ++ li r11, 0x2e0 # lea .Lk_deskew(%rip),%r11 ++ li r9, 0x2f0 ++ bne $dir, Lschedule_mangle_last_dec ++ ++ # encrypting ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10),%xmm1 ++ li r11, 0x2c0 # lea .Lk_opt(%rip), %r11 # prepare to output transform ++ li r9, 0x2d0 # prepare to output transform ++ vperm v0, v0, v0, v1 # vpshufb %xmm1, %xmm0, %xmm0 # output permute ++ ++ lvx $iptlo, r11, r12 # reload $ipt ++ lvx $ipthi, r9, r12 ++ addi $out, $out, 16 # add \$16, %rdx ++ vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0 ++ bl _vpaes_schedule_transform # output transform ++ ++ #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v2, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v2, 0, $out ++ ++ addi $out, $out, 15 # 15 is not typo ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ b Lschedule_mangle_done ++ ++.align 4 ++Lschedule_mangle_last_dec: ++ lvx $iptlo, r11, r12 # reload $ipt ++ lvx $ipthi, r9, r12 ++ addi $out, $out, -16 # add \$-16, %rdx ++ vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0 ++ bl _vpaes_schedule_transform # output transform ++ ++ #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v2, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v2, 0, $out ++ ++ addi $out, $out, -15 # -15 is not typo ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++Lschedule_mangle_done: ++ mtlr r7 ++ # cleanup ++ vxor v0, v0, v0 # vpxor %xmm0, %xmm0, %xmm0 ++ vxor v1, v1, v1 # vpxor %xmm1, %xmm1, %xmm1 ++ vxor v2, v2, v2 # vpxor %xmm2, %xmm2, %xmm2 ++ vxor v3, v3, v3 # vpxor %xmm3, %xmm3, %xmm3 ++ vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4 ++ vxor v5, v5, v5 # vpxor %xmm5, %xmm5, %xmm5 ++ vxor v6, v6, v6 # vpxor %xmm6, %xmm6, %xmm6 ++ vxor v7, v7, v7 # vpxor %xmm7, %xmm7, %xmm7 ++ ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_192_smear ++## ++## Smear the short, low side in the 192-bit key schedule. ++## ++## Inputs: ++## %xmm7: high side, b a x y ++## %xmm6: low side, d c 0 0 ++## %xmm13: 0 ++## ++## Outputs: ++## %xmm6: b+c+d b+c 0 0 ++## %xmm0: b+c+d b+c b a ++## ++.align 4 ++_vpaes_schedule_192_smear: ++ ?vspltw v0, v7, 3 ++ ?vsldoi v1, v9, v6, 12 # vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 ++ ?vsldoi v0, v7, v0, 8 # vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a ++ vxor v6, v6, v1 # vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 ++ vxor v6, v6, v0 # vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a ++ vmr v0, v6 ++ ?vsldoi v6, v6, v9, 8 ++ ?vsldoi v6, v9, v6, 8 # clobber low side with zeros ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_round ++## ++## Runs one main round of the key schedule on %xmm0, %xmm7 ++## ++## Specifically, runs subbytes on the high dword of %xmm0 ++## then rotates it by one byte and xors into the low dword of ++## %xmm7. ++## ++## Adds rcon from low byte of %xmm8, then rotates %xmm8 for ++## next rcon. ++## ++## Smears the dwords of %xmm7 by xoring the low into the ++## second low, result into third, result into highest. ++## ++## Returns results in %xmm7 = %xmm0. ++## Clobbers %xmm1-%xmm4, %r11. ++## ++.align 4 ++_vpaes_schedule_round: ++ # extract rcon from xmm8 ++ #vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4 ++ ?vsldoi v1, $rcon, v9, 15 # vpalignr \$15, %xmm8, %xmm4, %xmm1 ++ ?vsldoi $rcon, $rcon, $rcon, 15 # vpalignr \$15, %xmm8, %xmm8, %xmm8 ++ vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7 ++ ++ # rotate ++ ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0 ++ ?vsldoi v0, v0, v0, 1 # vpalignr \$1, %xmm0, %xmm0, %xmm0 ++ ++ # fall through... ++ ++ # low round: same as high round, but no rotation and no rcon. ++_vpaes_schedule_low_round: ++ # smear xmm7 ++ ?vsldoi v1, v9, v7, 12 # vpslldq \$4, %xmm7, %xmm1 ++ vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7 ++ vspltisb v1, 0x0f # 0x0f..0f ++ ?vsldoi v4, v9, v7, 8 # vpslldq \$8, %xmm7, %xmm4 ++ ++ # subbytes ++ vand v1, v1, v0 # vpand %xmm9, %xmm0, %xmm1 # 0 = k ++ vsrb v0, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ vxor v7, v7, v4 # vpxor %xmm4, %xmm7, %xmm7 ++ vperm v2, $invhi, v9, v1 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ vxor v1, v1, v0 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vperm v3, $invlo, v9, v0 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ vperm v4, $invlo, v9, v1 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vxor v7, v7, v26 # vpxor .Lk_s63(%rip), %xmm7, %xmm7 ++ vperm v3, $invlo, v9, v3 # vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak ++ vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vperm v2, $invlo, v9, v4 # vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak ++ vxor v3, v3, v1 # vpxor %xmm1, %xmm3, %xmm3 # 2 = io ++ vxor v2, v2, v0 # vpxor %xmm0, %xmm2, %xmm2 # 3 = jo ++ vperm v4, v15, v9, v3 # vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou ++ vperm v1, v14, v9, v2 # vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t ++ vxor v1, v1, v4 # vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output ++ ++ # add in smeared stuff ++ vxor v0, v1, v7 # vpxor %xmm7, %xmm1, %xmm0 ++ vxor v7, v1, v7 # vmovdqa %xmm0, %xmm7 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_transform ++## ++## Linear-transform %xmm0 according to tables at (%r11) ++## ++## Requires that %xmm9 = 0x0F0F... as in preheat ++## Output in %xmm0 ++## Clobbers %xmm2 ++## ++.align 4 ++_vpaes_schedule_transform: ++ #vand v1, v0, v9 # vpand %xmm9, %xmm0, %xmm1 ++ vsrb v2, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 ++ # vmovdqa (%r11), %xmm2 # lo ++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2 ++ # vmovdqa 16(%r11), %xmm1 # hi ++ vperm v2, $ipthi, $ipthi, v2 # vpshufb %xmm0, %xmm1, %xmm0 ++ vxor v0, v0, v2 # vpxor %xmm2, %xmm0, %xmm0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_mangle ++## ++## Mangle xmm0 from (basis-transformed) standard version ++## to our version. ++## ++## On encrypt, ++## xor with 0x63 ++## multiply by circulant 0,1,1,1 ++## apply shiftrows transform ++## ++## On decrypt, ++## xor with 0x63 ++## multiply by "inverse mixcolumns" circulant E,B,D,9 ++## deskew ++## apply shiftrows transform ++## ++## ++## Writes out to (%rdx), and increments or decrements it ++## Keeps track of round number mod 4 in %r8 ++## Preserves xmm0 ++## Clobbers xmm1-xmm5 ++## ++.align 4 ++_vpaes_schedule_mangle: ++ #vmr v4, v0 # vmovdqa %xmm0, %xmm4 # save xmm0 for later ++ # vmovdqa .Lk_mc_forward(%rip),%xmm5 ++ bne $dir, Lschedule_mangle_dec ++ ++ # encrypting ++ vxor v4, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm4 ++ addi $out, $out, 16 # add \$16, %rdx ++ vperm v4, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm4 ++ vperm v1, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm1 ++ vperm v3, v1, v1, v25 # vpshufb %xmm5, %xmm1, %xmm3 ++ vxor v4, v4, v1 # vpxor %xmm1, %xmm4, %xmm4 ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 ++ vxor v3, v3, v4 # vpxor %xmm4, %xmm3, %xmm3 ++ ++ vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ addi r8, r8, -16 # add \$-16, %r8 ++ andi. r8, r8, 0x30 # and \$0x30, %r8 ++ ++ #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx) ++ vperm v1, v3, v3, $outperm # rotate right/left ++ vsel v2, $outhead, v1, $outmask ++ vmr $outhead, v1 ++ stvx v2, 0, $out ++ blr ++ ++.align 4 ++Lschedule_mangle_dec: ++ # inverse mix columns ++ # lea .Lk_dksd(%rip),%r11 ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm4, %xmm1 # 1 = hi ++ #and v4, v0, v9 # vpand %xmm9, %xmm4, %xmm4 # 4 = lo ++ ++ # vmovdqa 0x00(%r11), %xmm2 ++ vperm v2, v16, v16, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ # vmovdqa 0x10(%r11), %xmm3 ++ vperm v3, v17, v17, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 ++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 ++ ++ # vmovdqa 0x20(%r11), %xmm2 ++ vperm v2, v18, v18, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 ++ # vmovdqa 0x30(%r11), %xmm3 ++ vperm v3, v19, v19, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 ++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 ++ ++ # vmovdqa 0x40(%r11), %xmm2 ++ vperm v2, v20, v20, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 ++ # vmovdqa 0x50(%r11), %xmm3 ++ vperm v3, v21, v21, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 ++ ++ # vmovdqa 0x60(%r11), %xmm2 ++ vperm v2, v22, v22, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 ++ # vmovdqa 0x70(%r11), %xmm4 ++ vperm v4, v23, v23, v1 # vpshufb %xmm1, %xmm4, %xmm4 ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 ++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 ++ vxor v3, v4, v2 # vpxor %xmm2, %xmm4, %xmm3 ++ ++ addi $out, $out, -16 # add \$-16, %rdx ++ ++ vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ addi r8, r8, -16 # add \$-16, %r8 ++ andi. r8, r8, 0x30 # and \$0x30, %r8 ++ ++ #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx) ++ vperm v1, v3, v3, $outperm # rotate right/left ++ vsel v2, $outhead, v1, $outmask ++ vmr $outhead, v1 ++ stvx v2, 0, $out ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.globl .vpaes_set_encrypt_key ++.align 5 ++.vpaes_set_encrypt_key: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r0 ++ mfspr r6, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r6,`$FRAME-4`($sp) # save vrsave ++ li r7, -1 ++ $PUSH r0, `$FRAME+$LRSAVE`($sp) ++ mtspr 256, r7 # preserve all AltiVec registers ++ ++ srwi r9, $bits, 5 # shr \$5,%eax ++ addi r9, r9, 6 # add \$5,%eax ++ stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ ++ cmplw $dir, $bits, $bits # set encrypt direction ++ li r8, 0x30 # mov \$0x30,%r8d ++ bl _vpaes_schedule_core ++ ++ $POP r0, `$FRAME+$LRSAVE`($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtspr 256, r6 # restore vrsave ++ mtlr r0 ++ xor r3, r3, r3 ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key ++ ++.globl .vpaes_set_decrypt_key ++.align 4 ++.vpaes_set_decrypt_key: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r0 ++ mfspr r6, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r6,`$FRAME-4`($sp) # save vrsave ++ li r7, -1 ++ $PUSH r0, `$FRAME+$LRSAVE`($sp) ++ mtspr 256, r7 # preserve all AltiVec registers ++ ++ srwi r9, $bits, 5 # shr \$5,%eax ++ addi r9, r9, 6 # add \$5,%eax ++ stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ ++ slwi r9, r9, 4 # shl \$4,%eax ++ add $out, $out, r9 # lea (%rdx,%rax),%rdx ++ ++ cmplwi $dir, $bits, 0 # set decrypt direction ++ srwi r8, $bits, 1 # shr \$1,%r8d ++ andi. r8, r8, 32 # and \$32,%r8d ++ xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32 ++ bl _vpaes_schedule_core ++ ++ $POP r0, `$FRAME+$LRSAVE`($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtspr 256, r6 # restore vrsave ++ mtlr r0 ++ xor r3, r3, r3 ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key ++___ ++} ++ ++my $consts=1; ++foreach (split("\n",$code)) { ++ s/\`([^\`]*)\`/eval $1/geo; ++ ++ # constants table endian-specific conversion ++ if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) { ++ my $conv=$2; ++ my @bytes=(); ++ ++ # convert to endian-agnostic format ++ foreach (split(/,\s+/,$1)) { ++ my $l = /^0/?oct:int; ++ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; ++ } ++ ++ # little-endian conversion ++ if ($flavour =~ /le$/o) { ++ SWITCH: for($conv) { ++ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; ++ /\?rev/ && do { @bytes=reverse(@bytes); last; }; ++ } ++ } ++ ++ #emit ++ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; ++ next; ++ } ++ $consts=0 if (m/Lconsts:/o); # end of table ++ ++ # instructions prefixed with '?' are endian-specific and need ++ # to be adjusted accordingly... ++ if ($flavour =~ /le$/o) { # little-endian ++ s/\?lvsr/lvsl/o or ++ s/\?lvsl/lvsr/o or ++ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or ++ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or ++ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; ++ } else { # big-endian ++ s/\?([a-z]+)/$1/o; ++ } ++ ++ print $_,"\n"; ++} ++ ++close STDOUT; +diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl +index f9b6992..da69c6a 100644 +--- a/crypto/bn/asm/ppc-mont.pl ++++ b/crypto/bn/asm/ppc-mont.pl +@@ -325,6 +325,7 @@ Lcopy: ; copy or in-place refresh + .long 0 + .byte 0,12,4,0,0x80,12,6,0 + .long 0 ++.size .bn_mul_mont_int,.-.bn_mul_mont_int + + .asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by " + ___ +diff --git a/crypto/bn/asm/ppc.pl b/crypto/bn/asm/ppc.pl +index 1249ce2..04df1fe 100644 +--- a/crypto/bn/asm/ppc.pl ++++ b/crypto/bn/asm/ppc.pl +@@ -392,6 +392,7 @@ $data=< for the OpenSSL ++# Written by Andy Polyakov for the OpenSSL + # project. The module is, however, dual licensed under OpenSSL and + # CRYPTOGAMS licenses depending on where you obtain it. For further + # details see http://www.openssl.org/~appro/cryptogams/. +@@ -65,6 +65,14 @@ + # others alternative would be to break dependence on upper halves of + # GPRs by sticking to 32-bit integer operations... + ++# December 2012 ++ ++# Remove above mentioned dependence on GPRs' upper halves in 32-bit ++# build. No signal masking overhead, but integer instructions are ++# *more* numerous... It's still "universally" faster than 32-bit ++# ppc-mont.pl, but improvement coefficient is not as impressive ++# for longer keys... ++ + $flavour = shift; + + if ($flavour =~ /32/) { +@@ -110,6 +118,9 @@ $tp="r10"; + $j="r11"; + $i="r12"; + # non-volatile registers ++$c1="r19"; ++$n1="r20"; ++$a1="r21"; + $nap_d="r22"; # interleaved ap and np in double format + $a0="r23"; # ap[0] + $t0="r24"; # temporary registers +@@ -180,8 +191,8 @@ $T3a="f30"; $T3b="f31"; + # . . + # +-------------------------------+ + # . . +-# -12*size_t +-------------------------------+ +-# | 10 saved gpr, r22-r31 | ++# -13*size_t +-------------------------------+ ++# | 13 saved gpr, r19-r31 | + # . . + # . . + # -12*8 +-------------------------------+ +@@ -215,6 +226,9 @@ $code=<<___; + mr $i,$sp + $STUX $sp,$sp,$tp ; alloca + ++ $PUSH r19,`-12*8-13*$SIZE_T`($i) ++ $PUSH r20,`-12*8-12*$SIZE_T`($i) ++ $PUSH r21,`-12*8-11*$SIZE_T`($i) + $PUSH r22,`-12*8-10*$SIZE_T`($i) + $PUSH r23,`-12*8-9*$SIZE_T`($i) + $PUSH r24,`-12*8-8*$SIZE_T`($i) +@@ -237,40 +251,26 @@ $code=<<___; + stfd f29,`-3*8`($i) + stfd f30,`-2*8`($i) + stfd f31,`-1*8`($i) +-___ +-$code.=<<___ if ($SIZE_T==8); +- ld $a0,0($ap) ; pull ap[0] value +- ld $n0,0($n0) ; pull n0[0] value +- ld $t3,0($bp) ; bp[0] +-___ +-$code.=<<___ if ($SIZE_T==4); +- mr $t1,$n0 +- lwz $a0,0($ap) ; pull ap[0,1] value +- lwz $t0,4($ap) +- lwz $n0,0($t1) ; pull n0[0,1] value +- lwz $t1,4($t1) +- lwz $t3,0($bp) ; bp[0,1] +- lwz $t2,4($bp) +- insrdi $a0,$t0,32,0 +- insrdi $n0,$t1,32,0 +- insrdi $t3,$t2,32,0 +-___ +-$code.=<<___; ++ + addi $tp,$sp,`$FRAME+$TRANSFER+8+64` + li $i,-64 + add $nap_d,$tp,$num + and $nap_d,$nap_d,$i ; align to 64 bytes +- +- mulld $t7,$a0,$t3 ; ap[0]*bp[0] + ; nap_d is off by 1, because it's used with stfdu/lfdu + addi $nap_d,$nap_d,-8 + srwi $j,$num,`3+1` ; counter register, num/2 +- mulld $t7,$t7,$n0 ; tp[0]*n0 + addi $j,$j,-1 + addi $tp,$sp,`$FRAME+$TRANSFER-8` + li $carry,0 + mtctr $j ++___ ++ ++$code.=<<___ if ($SIZE_T==8); ++ ld $a0,0($ap) ; pull ap[0] value ++ ld $t3,0($bp) ; bp[0] ++ ld $n0,0($n0) ; pull n0[0] value + ++ mulld $t7,$a0,$t3 ; ap[0]*bp[0] + ; transfer bp[0] to FPU as 4x16-bit values + extrdi $t0,$t3,16,48 + extrdi $t1,$t3,16,32 +@@ -280,6 +280,8 @@ $code.=<<___; + std $t1,`$FRAME+8`($sp) + std $t2,`$FRAME+16`($sp) + std $t3,`$FRAME+24`($sp) ++ ++ mulld $t7,$t7,$n0 ; tp[0]*n0 + ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values + extrdi $t4,$t7,16,48 + extrdi $t5,$t7,16,32 +@@ -289,21 +291,61 @@ $code.=<<___; + std $t5,`$FRAME+40`($sp) + std $t6,`$FRAME+48`($sp) + std $t7,`$FRAME+56`($sp) +-___ +-$code.=<<___ if ($SIZE_T==8); +- lwz $t0,4($ap) ; load a[j] as 32-bit word pair +- lwz $t1,0($ap) +- lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair ++ ++ extrdi $t0,$a0,32,32 ; lwz $t0,4($ap) ++ extrdi $t1,$a0,32,0 ; lwz $t1,0($ap) ++ lwz $t2,12($ap) ; load a[1] as 32-bit word pair + lwz $t3,8($ap) +- lwz $t4,4($np) ; load n[j] as 32-bit word pair ++ lwz $t4,4($np) ; load n[0] as 32-bit word pair + lwz $t5,0($np) +- lwz $t6,12($np) ; load n[j+1] as 32-bit word pair ++ lwz $t6,12($np) ; load n[1] as 32-bit word pair + lwz $t7,8($np) + ___ + $code.=<<___ if ($SIZE_T==4); +- lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs +- lwz $t1,4($ap) +- lwz $t2,8($ap) ++ lwz $a0,0($ap) ; pull ap[0,1] value ++ mr $n1,$n0 ++ lwz $a1,4($ap) ++ li $c1,0 ++ lwz $t1,0($bp) ; bp[0,1] ++ lwz $t3,4($bp) ++ lwz $n0,0($n1) ; pull n0[0,1] value ++ lwz $n1,4($n1) ++ ++ mullw $t4,$a0,$t1 ; mulld ap[0]*bp[0] ++ mulhwu $t5,$a0,$t1 ++ mullw $t6,$a1,$t1 ++ mullw $t7,$a0,$t3 ++ add $t5,$t5,$t6 ++ add $t5,$t5,$t7 ++ ; transfer bp[0] to FPU as 4x16-bit values ++ extrwi $t0,$t1,16,16 ++ extrwi $t1,$t1,16,0 ++ extrwi $t2,$t3,16,16 ++ extrwi $t3,$t3,16,0 ++ std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build ++ std $t1,`$FRAME+8`($sp) ++ std $t2,`$FRAME+16`($sp) ++ std $t3,`$FRAME+24`($sp) ++ ++ mullw $t0,$t4,$n0 ; mulld tp[0]*n0 ++ mulhwu $t1,$t4,$n0 ++ mullw $t2,$t5,$n0 ++ mullw $t3,$t4,$n1 ++ add $t1,$t1,$t2 ++ add $t1,$t1,$t3 ++ ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values ++ extrwi $t4,$t0,16,16 ++ extrwi $t5,$t0,16,0 ++ extrwi $t6,$t1,16,16 ++ extrwi $t7,$t1,16,0 ++ std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build ++ std $t5,`$FRAME+40`($sp) ++ std $t6,`$FRAME+48`($sp) ++ std $t7,`$FRAME+56`($sp) ++ ++ mr $t0,$a0 ; lwz $t0,0($ap) ++ mr $t1,$a1 ; lwz $t1,4($ap) ++ lwz $t2,8($ap) ; load a[j..j+3] as 32-bit word pairs + lwz $t3,12($ap) + lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs + lwz $t5,4($np) +@@ -319,7 +361,7 @@ $code.=<<___; + lfd $nb,`$FRAME+40`($sp) + lfd $nc,`$FRAME+48`($sp) + lfd $nd,`$FRAME+56`($sp) +- std $t0,`$FRAME+64`($sp) ++ std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build + std $t1,`$FRAME+72`($sp) + std $t2,`$FRAME+80`($sp) + std $t3,`$FRAME+88`($sp) +@@ -441,7 +483,7 @@ $code.=<<___ if ($SIZE_T==4); + lwz $t7,12($np) + ___ + $code.=<<___; +- std $t0,`$FRAME+64`($sp) ++ std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build + std $t1,`$FRAME+72`($sp) + std $t2,`$FRAME+80`($sp) + std $t3,`$FRAME+88`($sp) +@@ -449,6 +491,9 @@ $code.=<<___; + std $t5,`$FRAME+104`($sp) + std $t6,`$FRAME+112`($sp) + std $t7,`$FRAME+120`($sp) ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + ld $t0,`$FRAME+0`($sp) + ld $t1,`$FRAME+8`($sp) + ld $t2,`$FRAME+16`($sp) +@@ -457,6 +502,20 @@ $code.=<<___; + ld $t5,`$FRAME+40`($sp) + ld $t6,`$FRAME+48`($sp) + ld $t7,`$FRAME+56`($sp) ++___ ++} else { ++$code.=<<___; ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++___ ++} ++$code.=<<___; + lfd $A0,`$FRAME+64`($sp) + lfd $A1,`$FRAME+72`($sp) + lfd $A2,`$FRAME+80`($sp) +@@ -488,7 +547,9 @@ $code.=<<___; + fmadd $T0b,$A0,$bb,$dotb + stfd $A2,24($nap_d) ; save a[j+1] in double format + stfd $A3,32($nap_d) +- ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + fmadd $T1a,$A0,$bc,$T1a + fmadd $T1b,$A0,$bd,$T1b + fmadd $T2a,$A1,$bc,$T2a +@@ -561,11 +622,123 @@ $code.=<<___; + stfd $T3b,`$FRAME+56`($sp) + std $t0,8($tp) ; tp[j-1] + stdu $t4,16($tp) ; tp[j] ++___ ++} else { ++$code.=<<___; ++ fmadd $T1a,$A0,$bc,$T1a ++ fmadd $T1b,$A0,$bd,$T1b ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ fmadd $T2a,$A1,$bc,$T2a ++ fmadd $T2b,$A1,$bd,$T2b ++ stfd $N0,40($nap_d) ; save n[j] in double format ++ stfd $N1,48($nap_d) ++ srwi $c1,$t1,16 ++ insrwi $carry,$t1,16,0 ++ fmadd $T3a,$A2,$bc,$T3a ++ fmadd $T3b,$A2,$bd,$T3b ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ fmul $dota,$A3,$bc ++ fmul $dotb,$A3,$bd ++ stfd $N2,56($nap_d) ; save n[j+1] in double format ++ stfdu $N3,64($nap_d) ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ ++ fmadd $T1a,$N1,$na,$T1a ++ fmadd $T1b,$N1,$nb,$T1b ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ fmadd $T2a,$N2,$na,$T2a ++ fmadd $T2b,$N2,$nb,$T2b ++ srwi $c1,$t5,16 ++ insrwi $carry,$t5,16,0 ++ fmadd $T3a,$N3,$na,$T3a ++ fmadd $T3b,$N3,$nb,$T3b ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ fmadd $T0a,$N0,$na,$T0a ++ fmadd $T0b,$N0,$nb,$T0b ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ srwi $c1,$t7,16 ++ insrwi $carry,$t7,16,0 ++ ++ fmadd $T1a,$N0,$nc,$T1a ++ fmadd $T1b,$N0,$nd,$T1b ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ fmadd $T2a,$N1,$nc,$T2a ++ fmadd $T2b,$N1,$nd,$T2b ++ stw $t0,12($tp) ; tp[j-1] ++ stw $t4,8($tp) ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ fmadd $T3a,$N2,$nc,$T3a ++ fmadd $T3b,$N2,$nd,$T3b ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ fmadd $dota,$N3,$nc,$dota ++ fmadd $dotb,$N3,$nd,$dotb ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ srwi $c1,$t7,16 ++ insrwi $carry,$t7,16,0 ++ ++ fctid $T0a,$T0a ++ fctid $T0b,$T0b ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ fctid $T1a,$T1a ++ fctid $T1b,$T1b ++ srwi $c1,$t1,16 ++ insrwi $carry,$t1,16,0 ++ fctid $T2a,$T2a ++ fctid $T2b,$T2b ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ fctid $T3a,$T3a ++ fctid $T3b,$T3b ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ srwi $c1,$t5,16 ++ insrwi $carry,$t5,16,0 ++ ++ stfd $T0a,`$FRAME+0`($sp) ++ stfd $T0b,`$FRAME+8`($sp) ++ stfd $T1a,`$FRAME+16`($sp) ++ stfd $T1b,`$FRAME+24`($sp) ++ stfd $T2a,`$FRAME+32`($sp) ++ stfd $T2b,`$FRAME+40`($sp) ++ stfd $T3a,`$FRAME+48`($sp) ++ stfd $T3b,`$FRAME+56`($sp) ++ stw $t2,20($tp) ; tp[j] ++ stwu $t0,16($tp) ++___ ++} ++$code.=<<___; + bdnz- L1st + + fctid $dota,$dota + fctid $dotb,$dotb +- ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + ld $t0,`$FRAME+0`($sp) + ld $t1,`$FRAME+8`($sp) + ld $t2,`$FRAME+16`($sp) +@@ -611,33 +784,117 @@ $code.=<<___; + insrdi $t6,$t7,48,0 + srdi $ovf,$t7,48 + std $t6,8($tp) ; tp[num-1] ++___ ++} else { ++$code.=<<___; ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++ stfd $dota,`$FRAME+64`($sp) ++ stfd $dotb,`$FRAME+72`($sp) + ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ stw $t0,12($tp) ; tp[j-1] ++ stw $t4,8($tp) ++ ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ stw $t2,20($tp) ; tp[j] ++ stwu $t0,16($tp) ++ ++ lwz $t7,`$FRAME+64`($sp) ++ lwz $t6,`$FRAME+68`($sp) ++ lwz $t5,`$FRAME+72`($sp) ++ lwz $t4,`$FRAME+76`($sp) ++ ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ ++ insrwi $t6,$t4,16,0 ++ srwi $t4,$t4,16 ++ insrwi $t4,$t5,16,0 ++ srwi $ovf,$t5,16 ++ stw $t6,12($tp) ; tp[num-1] ++ stw $t4,8($tp) ++___ ++} ++$code.=<<___; + slwi $t7,$num,2 + subf $nap_d,$t7,$nap_d ; rewind pointer + + li $i,8 ; i=1 + .align 5 + Louter: +-___ +-$code.=<<___ if ($SIZE_T==8); +- ldx $t3,$bp,$i ; bp[i] +-___ +-$code.=<<___ if ($SIZE_T==4); +- add $t0,$bp,$i +- lwz $t3,0($t0) ; bp[i,i+1] +- lwz $t0,4($t0) +- insrdi $t3,$t0,32,0 +-___ +-$code.=<<___; +- ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] +- mulld $t7,$a0,$t3 ; ap[0]*bp[i] +- + addi $tp,$sp,`$FRAME+$TRANSFER` +- add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0] + li $carry,0 +- mulld $t7,$t7,$n0 ; tp[0]*n0 + mtctr $j ++___ ++$code.=<<___ if ($SIZE_T==8); ++ ldx $t3,$bp,$i ; bp[i] + ++ ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] ++ mulld $t7,$a0,$t3 ; ap[0]*bp[i] ++ add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0] + ; transfer bp[i] to FPU as 4x16-bit values + extrdi $t0,$t3,16,48 + extrdi $t1,$t3,16,32 +@@ -647,6 +904,8 @@ $code.=<<___; + std $t1,`$FRAME+8`($sp) + std $t2,`$FRAME+16`($sp) + std $t3,`$FRAME+24`($sp) ++ ++ mulld $t7,$t7,$n0 ; tp[0]*n0 + ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values + extrdi $t4,$t7,16,48 + extrdi $t5,$t7,16,32 +@@ -656,7 +915,50 @@ $code.=<<___; + std $t5,`$FRAME+40`($sp) + std $t6,`$FRAME+48`($sp) + std $t7,`$FRAME+56`($sp) ++___ ++$code.=<<___ if ($SIZE_T==4); ++ add $t0,$bp,$i ++ li $c1,0 ++ lwz $t1,0($t0) ; bp[i,i+1] ++ lwz $t3,4($t0) ++ ++ mullw $t4,$a0,$t1 ; ap[0]*bp[i] ++ lwz $t0,`$FRAME+$TRANSFER+8+4`($sp) ; tp[0] ++ mulhwu $t5,$a0,$t1 ++ lwz $t2,`$FRAME+$TRANSFER+8`($sp) ; tp[0] ++ mullw $t6,$a1,$t1 ++ mullw $t7,$a0,$t3 ++ add $t5,$t5,$t6 ++ add $t5,$t5,$t7 ++ addc $t4,$t4,$t0 ; ap[0]*bp[i]+tp[0] ++ adde $t5,$t5,$t2 ++ ; transfer bp[i] to FPU as 4x16-bit values ++ extrwi $t0,$t1,16,16 ++ extrwi $t1,$t1,16,0 ++ extrwi $t2,$t3,16,16 ++ extrwi $t3,$t3,16,0 ++ std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build ++ std $t1,`$FRAME+8`($sp) ++ std $t2,`$FRAME+16`($sp) ++ std $t3,`$FRAME+24`($sp) + ++ mullw $t0,$t4,$n0 ; mulld tp[0]*n0 ++ mulhwu $t1,$t4,$n0 ++ mullw $t2,$t5,$n0 ++ mullw $t3,$t4,$n1 ++ add $t1,$t1,$t2 ++ add $t1,$t1,$t3 ++ ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values ++ extrwi $t4,$t0,16,16 ++ extrwi $t5,$t0,16,0 ++ extrwi $t6,$t1,16,16 ++ extrwi $t7,$t1,16,0 ++ std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build ++ std $t5,`$FRAME+40`($sp) ++ std $t6,`$FRAME+48`($sp) ++ std $t7,`$FRAME+56`($sp) ++___ ++$code.=<<___; + lfd $A0,8($nap_d) ; load a[j] in double format + lfd $A1,16($nap_d) + lfd $A2,24($nap_d) ; load a[j+1] in double format +@@ -769,7 +1071,9 @@ Linner: + fmul $dotb,$A3,$bd + lfd $A2,24($nap_d) ; load a[j+1] in double format + lfd $A3,32($nap_d) +- ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + fmadd $T1a,$N1,$na,$T1a + fmadd $T1b,$N1,$nb,$T1b + ld $t0,`$FRAME+0`($sp) +@@ -856,10 +1160,131 @@ $code.=<<___; + addze $carry,$carry + std $t3,-16($tp) ; tp[j-1] + std $t5,-8($tp) ; tp[j] ++___ ++} else { ++$code.=<<___; ++ fmadd $T1a,$N1,$na,$T1a ++ fmadd $T1b,$N1,$nb,$T1b ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ fmadd $T2a,$N2,$na,$T2a ++ fmadd $T2b,$N2,$nb,$T2b ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ fmadd $T3a,$N3,$na,$T3a ++ fmadd $T3b,$N3,$nb,$T3b ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ fmadd $T0a,$N0,$na,$T0a ++ fmadd $T0b,$N0,$nb,$T0b ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++ srwi $c1,$t1,16 ++ insrwi $carry,$t1,16,0 ++ ++ fmadd $T1a,$N0,$nc,$T1a ++ fmadd $T1b,$N0,$nd,$T1b ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ fmadd $T2a,$N1,$nc,$T2a ++ fmadd $T2b,$N1,$nd,$T2b ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ fmadd $T3a,$N2,$nc,$T3a ++ fmadd $T3b,$N2,$nd,$T3b ++ lwz $t2,12($tp) ; tp[j] ++ lwz $t3,8($tp) ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ fmadd $dota,$N3,$nc,$dota ++ fmadd $dotb,$N3,$nd,$dotb ++ srwi $c1,$t5,16 ++ insrwi $carry,$t5,16,0 ++ ++ fctid $T0a,$T0a ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ fctid $T0b,$T0b ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ srwi $c1,$t7,16 ++ insrwi $carry,$t7,16,0 ++ fctid $T1a,$T1a ++ addc $t0,$t0,$t2 ++ adde $t4,$t4,$t3 ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ fctid $T1b,$T1b ++ addze $carry,$carry ++ addze $c1,$c1 ++ stw $t0,4($tp) ; tp[j-1] ++ stw $t4,0($tp) ++ fctid $T2a,$T2a ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ fctid $T2b,$T2b ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ fctid $T3a,$T3a ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ fctid $T3b,$T3b ++ ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ lwz $t6,20($tp) ++ lwzu $t7,16($tp) ++ addc $t0,$t0,$carry ++ stfd $T0a,`$FRAME+0`($sp) ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ stfd $T0b,`$FRAME+8`($sp) ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t4,$t4,$carry ++ stfd $T1a,`$FRAME+16`($sp) ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ stfd $T1b,`$FRAME+24`($sp) ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ ++ addc $t2,$t2,$t6 ++ stfd $T2a,`$FRAME+32`($sp) ++ adde $t0,$t0,$t7 ++ stfd $T2b,`$FRAME+40`($sp) ++ addze $carry,$carry ++ stfd $T3a,`$FRAME+48`($sp) ++ addze $c1,$c1 ++ stfd $T3b,`$FRAME+56`($sp) ++ stw $t2,-4($tp) ; tp[j] ++ stw $t0,-8($tp) ++___ ++} ++$code.=<<___; + bdnz- Linner + + fctid $dota,$dota + fctid $dotb,$dotb ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + ld $t0,`$FRAME+0`($sp) + ld $t1,`$FRAME+8`($sp) + ld $t2,`$FRAME+16`($sp) +@@ -926,7 +1351,116 @@ $code.=<<___; + insrdi $t6,$t7,48,0 + srdi $ovf,$t7,48 + std $t6,0($tp) ; tp[num-1] ++___ ++} else { ++$code.=<<___; ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++ stfd $dota,`$FRAME+64`($sp) ++ stfd $dotb,`$FRAME+72`($sp) + ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ lwz $t2,12($tp) ; tp[j] ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ lwz $t3,8($tp) ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ ++ addc $t0,$t0,$t2 ++ adde $t4,$t4,$t3 ++ addze $carry,$carry ++ addze $c1,$c1 ++ stw $t0,4($tp) ; tp[j-1] ++ stw $t4,0($tp) ++ ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ lwz $t6,20($tp) ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ lwzu $t7,16($tp) ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ ++ addc $t2,$t2,$t6 ++ adde $t0,$t0,$t7 ++ lwz $t7,`$FRAME+64`($sp) ++ lwz $t6,`$FRAME+68`($sp) ++ addze $carry,$carry ++ addze $c1,$c1 ++ lwz $t5,`$FRAME+72`($sp) ++ lwz $t4,`$FRAME+76`($sp) ++ ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ stw $t2,-4($tp) ; tp[j] ++ stw $t0,-8($tp) ++ addc $t6,$t6,$ovf ++ addze $t7,$t7 ++ srwi $carry,$t6,16 ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ ++ insrwi $t6,$t4,16,0 ++ srwi $t4,$t4,16 ++ insrwi $t4,$t5,16,0 ++ srwi $ovf,$t5,16 ++ stw $t6,4($tp) ; tp[num-1] ++ stw $t4,0($tp) ++___ ++} ++$code.=<<___; + slwi $t7,$num,2 + addi $i,$i,8 + subf $nap_d,$t7,$nap_d ; rewind pointer +@@ -994,14 +1528,14 @@ $code.=<<___ if ($SIZE_T==4); + mtctr $j + + .align 4 +-Lsub: ld $t0,8($tp) ; load tp[j..j+3] in 64-bit word order +- ldu $t2,16($tp) ++Lsub: lwz $t0,12($tp) ; load tp[j..j+3] in 64-bit word order ++ lwz $t1,8($tp) ++ lwz $t2,20($tp) ++ lwzu $t3,16($tp) + lwz $t4,4($np) ; load np[j..j+3] in 32-bit word order + lwz $t5,8($np) + lwz $t6,12($np) + lwzu $t7,16($np) +- extrdi $t1,$t0,32,0 +- extrdi $t3,$t2,32,0 + subfe $t4,$t4,$t0 ; tp[j]-np[j] + stw $t0,4($ap) ; save tp[j..j+3] in 32-bit word order + subfe $t5,$t5,$t1 ; tp[j+1]-np[j+1] +@@ -1052,6 +1586,9 @@ ___ + $code.=<<___; + $POP $i,0($sp) + li r3,1 ; signal "handled" ++ $POP r19,`-12*8-13*$SIZE_T`($i) ++ $POP r20,`-12*8-12*$SIZE_T`($i) ++ $POP r21,`-12*8-11*$SIZE_T`($i) + $POP r22,`-12*8-10*$SIZE_T`($i) + $POP r23,`-12*8-9*$SIZE_T`($i) + $POP r24,`-12*8-8*$SIZE_T`($i) +@@ -1077,8 +1614,9 @@ $code.=<<___; + mr $sp,$i + blr + .long 0 +- .byte 0,12,4,0,0x8c,10,6,0 ++ .byte 0,12,4,0,0x8c,13,6,0 + .long 0 ++.size .$fname,.-.$fname + + .asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by " + ___ +diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c +index 1bfb5d9..51137fd 100644 +--- a/crypto/evp/e_aes.c ++++ b/crypto/evp/e_aes.c +@@ -153,6 +153,20 @@ void AES_xts_decrypt(const char *inp,char *out,size_t len, + const unsigned char iv[16]); + #endif + ++#if defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) ++# include "ppc_arch.h" ++# ifdef VPAES_ASM ++# define VPAES_CAPABLE (OPENSSL_ppccap_P & PPC_ALTIVEC) ++# endif ++# define HWAES_CAPABLE (OPENSSL_ppccap_P & PPC_CRYPTO207) ++# define HWAES_set_encrypt_key aes_p8_set_encrypt_key ++# define HWAES_set_decrypt_key aes_p8_set_decrypt_key ++# define HWAES_encrypt aes_p8_encrypt ++# define HWAES_decrypt aes_p8_decrypt ++# define HWAES_cbc_encrypt aes_p8_cbc_encrypt ++# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks ++#endif ++ + #if defined(AES_ASM) && !defined(I386_ONLY) && ( \ + ((defined(__i386) || defined(__i386__) || \ + defined(_M_IX86)) && defined(OPENSSL_IA32_SSE2))|| \ +diff --git a/crypto/modes/Makefile b/crypto/modes/Makefile +index c825b12..e684e02 100644 +--- a/crypto/modes/Makefile ++++ b/crypto/modes/Makefile +@@ -56,6 +56,10 @@ ghash-alpha.s: asm/ghash-alpha.pl + $(PERL) $< | $(CC) -E - | tee $@ > /dev/null + ghash-parisc.s: asm/ghash-parisc.pl + $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@ ++ghashv8-armx.S: asm/ghashv8-armx.pl ++ $(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@ ++ghashp8-ppc.s: asm/ghashp8-ppc.pl ++ $(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@ + + # GNU make "catch all" + ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ +diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl +new file mode 100755 +index 0000000..e76a58c +--- /dev/null ++++ b/crypto/modes/asm/ghashp8-ppc.pl +@@ -0,0 +1,234 @@ ++#!/usr/bin/env perl ++# ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# GHASH for for PowerISA v2.07. ++# ++# July 2014 ++# ++# Accurate performance measurements are problematic, because it's ++# always virtualized setup with possibly throttled processor. ++# Relative comparison is therefore more informative. This initial ++# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x ++# faster than "4-bit" integer-only compiler-generated 64-bit code. ++# "Initial version" means that there is room for futher improvement. ++ ++$flavour=shift; ++$output =shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T=8; ++ $LRSAVE=2*$SIZE_T; ++ $STU="stdu"; ++ $POP="ld"; ++ $PUSH="std"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T=4; ++ $LRSAVE=$SIZE_T; ++ $STU="stwu"; ++ $POP="lwz"; ++ $PUSH="stw"; ++} else { die "nonsense $flavour"; } ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; ++ ++my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block ++ ++my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); ++my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); ++my $vrsave="r12"; ++ ++$code=<<___; ++.machine "any" ++ ++.text ++ ++.globl .gcm_init_p8 ++.align 5 ++.gcm_init_p8: ++ lis r0,0xfff0 ++ li r8,0x10 ++ mfspr $vrsave,256 ++ li r9,0x20 ++ mtspr 256,r0 ++ li r10,0x30 ++ lvx_u $H,0,r4 # load H ++ ++ vspltisb $xC2,-16 # 0xf0 ++ vspltisb $t0,1 # one ++ vaddubm $xC2,$xC2,$xC2 # 0xe0 ++ vxor $zero,$zero,$zero ++ vor $xC2,$xC2,$t0 # 0xe1 ++ vsldoi $xC2,$xC2,$zero,15 # 0xe1... ++ vsldoi $t1,$zero,$t0,1 # ...1 ++ vaddubm $xC2,$xC2,$xC2 # 0xc2... ++ vspltisb $t2,7 ++ vor $xC2,$xC2,$t1 # 0xc2....01 ++ vspltb $t1,$H,0 # most significant byte ++ vsl $H,$H,$t0 # H<<=1 ++ vsrab $t1,$t1,$t2 # broadcast carry bit ++ vand $t1,$t1,$xC2 ++ vxor $H,$H,$t1 # twisted H ++ ++ vsldoi $H,$H,$H,8 # twist even more ... ++ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 ++ vsldoi $Hl,$zero,$H,8 # ... and split ++ vsldoi $Hh,$H,$zero,8 ++ ++ stvx_u $xC2,0,r3 # save pre-computed table ++ stvx_u $Hl,r8,r3 ++ stvx_u $H, r9,r3 ++ stvx_u $Hh,r10,r3 ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,2,0 ++ .long 0 ++.size .gcm_init_p8,.-.gcm_init_p8 ++ ++.globl .gcm_gmult_p8 ++.align 5 ++.gcm_gmult_p8: ++ lis r0,0xfff8 ++ li r8,0x10 ++ mfspr $vrsave,256 ++ li r9,0x20 ++ mtspr 256,r0 ++ li r10,0x30 ++ lvx_u $IN,0,$Xip # load Xi ++ ++ lvx_u $Hl,r8,$Htbl # load pre-computed table ++ le?lvsl $lemask,r0,r0 ++ lvx_u $H, r9,$Htbl ++ le?vspltisb $t0,0x07 ++ lvx_u $Hh,r10,$Htbl ++ le?vxor $lemask,$lemask,$t0 ++ lvx_u $xC2,0,$Htbl ++ le?vperm $IN,$IN,$IN,$lemask ++ vxor $zero,$zero,$zero ++ ++ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo ++ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi ++ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi ++ ++ vpmsumd $t2,$Xl,$xC2 # 1st phase ++ ++ vsldoi $t0,$Xm,$zero,8 ++ vsldoi $t1,$zero,$Xm,8 ++ vxor $Xl,$Xl,$t0 ++ vxor $Xh,$Xh,$t1 ++ ++ vsldoi $Xl,$Xl,$Xl,8 ++ vxor $Xl,$Xl,$t2 ++ ++ vsldoi $t1,$Xl,$Xl,8 # 2nd phase ++ vpmsumd $Xl,$Xl,$xC2 ++ vxor $t1,$t1,$Xh ++ vxor $Xl,$Xl,$t1 ++ ++ le?vperm $Xl,$Xl,$Xl,$lemask ++ stvx_u $Xl,0,$Xip # write out Xi ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,2,0 ++ .long 0 ++.size .gcm_gmult_p8,.-.gcm_gmult_p8 ++ ++.globl .gcm_ghash_p8 ++.align 5 ++.gcm_ghash_p8: ++ lis r0,0xfff8 ++ li r8,0x10 ++ mfspr $vrsave,256 ++ li r9,0x20 ++ mtspr 256,r0 ++ li r10,0x30 ++ lvx_u $Xl,0,$Xip # load Xi ++ ++ lvx_u $Hl,r8,$Htbl # load pre-computed table ++ le?lvsl $lemask,r0,r0 ++ lvx_u $H, r9,$Htbl ++ le?vspltisb $t0,0x07 ++ lvx_u $Hh,r10,$Htbl ++ le?vxor $lemask,$lemask,$t0 ++ lvx_u $xC2,0,$Htbl ++ le?vperm $Xl,$Xl,$Xl,$lemask ++ vxor $zero,$zero,$zero ++ ++ lvx_u $IN,0,$inp ++ addi $inp,$inp,16 ++ subi $len,$len,16 ++ le?vperm $IN,$IN,$IN,$lemask ++ vxor $IN,$IN,$Xl ++ b Loop ++ ++.align 5 ++Loop: ++ subic $len,$len,16 ++ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo ++ subfe. r0,r0,r0 # borrow?-1:0 ++ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi ++ and r0,r0,$len ++ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi ++ add $inp,$inp,r0 ++ ++ vpmsumd $t2,$Xl,$xC2 # 1st phase ++ ++ vsldoi $t0,$Xm,$zero,8 ++ vsldoi $t1,$zero,$Xm,8 ++ vxor $Xl,$Xl,$t0 ++ vxor $Xh,$Xh,$t1 ++ ++ vsldoi $Xl,$Xl,$Xl,8 ++ vxor $Xl,$Xl,$t2 ++ lvx_u $IN,0,$inp ++ addi $inp,$inp,16 ++ ++ vsldoi $t1,$Xl,$Xl,8 # 2nd phase ++ vpmsumd $Xl,$Xl,$xC2 ++ le?vperm $IN,$IN,$IN,$lemask ++ vxor $t1,$t1,$Xh ++ vxor $IN,$IN,$t1 ++ vxor $IN,$IN,$Xl ++ beq Loop # did $len-=16 borrow? ++ ++ vxor $Xl,$Xl,$t1 ++ le?vperm $Xl,$Xl,$Xl,$lemask ++ stvx_u $Xl,0,$Xip # write out Xi ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,4,0 ++ .long 0 ++.size .gcm_ghash_p8,.-.gcm_ghash_p8 ++ ++.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by " ++.align 2 ++___ ++ ++foreach (split("\n",$code)) { ++ if ($flavour =~ /le$/o) { # little-endian ++ s/le\?//o or ++ s/be\?/#be#/o; ++ } else { ++ s/le\?/#le#/o or ++ s/be\?//o; ++ } ++ print $_,"\n"; ++} ++ ++close STDOUT; # enforce flush +diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c +index 0e6ff8b..6f8e7ee 100644 +--- a/crypto/modes/gcm128.c ++++ b/crypto/modes/gcm128.c +@@ -671,6 +671,21 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len + void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); + void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); + # endif ++# elif defined(__sparc__) || defined(__sparc) ++# include "sparc_arch.h" ++# define GHASH_ASM_SPARC ++# define GCM_FUNCREF_4BIT ++extern unsigned int OPENSSL_sparcv9cap_P[]; ++void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]); ++void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]); ++void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); ++#elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) ++# include "ppc_arch.h" ++# define GHASH_ASM_PPC ++# define GCM_FUNCREF_4BIT ++void gcm_init_p8(u128 Htable[16],const u64 Xi[2]); ++void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]); ++void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); + # endif + #endif + +@@ -747,6 +762,16 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) + ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; + } ++# elif defined(GHASH_ASM_PPC) ++ if (OPENSSL_ppccap_P & PPC_CRYPTO207) { ++ gcm_init_p8(ctx->Htable,ctx->H.u); ++ ctx->gmult = gcm_gmult_p8; ++ ctx->ghash = gcm_ghash_p8; ++ } else { ++ gcm_init_4bit(ctx->Htable,ctx->H.u); ++ ctx->gmult = gcm_gmult_4bit; ++ ctx->ghash = gcm_ghash_4bit; ++ } + # else + gcm_init_4bit(ctx->Htable,ctx->H.u); + # endif +diff --git a/crypto/perlasm/ppc-xlate.pl b/crypto/perlasm/ppc-xlate.pl +index a3edd98..f89e814 100755 +--- a/crypto/perlasm/ppc-xlate.pl ++++ b/crypto/perlasm/ppc-xlate.pl +@@ -27,7 +27,8 @@ my $globl = sub { + /osx/ && do { $name = "_$name"; + last; + }; +- /linux.*32/ && do { $ret .= ".globl $name\n"; ++ /linux.*(32|64le)/ ++ && do { $ret .= ".globl $name\n"; + $ret .= ".type $name,\@function"; + last; + }; +@@ -37,7 +38,6 @@ my $globl = sub { + $ret .= ".align 3\n"; + $ret .= "$name:\n"; + $ret .= ".quad .$name,.TOC.\@tocbase,0\n"; +- $ret .= ".size $name,24\n"; + $ret .= ".previous\n"; + + $name = ".$name"; +@@ -50,7 +50,9 @@ my $globl = sub { + $ret; + }; + my $text = sub { +- ($flavour =~ /aix/) ? ".csect" : ".text"; ++ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; ++ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); ++ $ret; + }; + my $machine = sub { + my $junk = shift; +@@ -62,9 +64,12 @@ my $machine = sub { + ".machine $arch"; + }; + my $size = sub { +- if ($flavour =~ /linux.*32/) ++ if ($flavour =~ /linux/) + { shift; +- ".size " . join(",",@_); ++ my $name = shift; $name =~ s|^[\.\_]||; ++ my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name; ++ $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/); ++ $ret; + } + else + { ""; } +@@ -77,6 +82,25 @@ my $asciz = sub { + else + { ""; } + }; ++my $quad = sub { ++ shift; ++ my @ret; ++ my ($hi,$lo); ++ for (@_) { ++ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) ++ { $hi=$1?"0x$1":"0"; $lo="0x$2"; } ++ elsif (/^([0-9]+)$/o) ++ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl ++ else ++ { $hi=undef; $lo=$_; } ++ ++ if (defined($hi)) ++ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } ++ else ++ { push(@ret,".quad $lo"); } ++ } ++ join("\n",@ret); ++}; + + ################################################################ + # simplified mnemonics not handled by at least one assembler +@@ -122,6 +146,46 @@ my $extrdi = sub { + $b = ($b+$n)&63; $n = 64-$n; + " rldicl $ra,$rs,$b,$n"; + }; ++my $vmr = sub { ++ my ($f,$vx,$vy) = @_; ++ " vor $vx,$vy,$vy"; ++}; ++ ++# PowerISA 2.06 stuff ++sub vsxmem_op { ++ my ($f, $vrt, $ra, $rb, $op) = @_; ++ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); ++} ++# made-up unaligned memory reference AltiVec/VMX instructions ++my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x ++my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x ++my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx ++my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx ++my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x ++my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x ++ ++# PowerISA 2.07 stuff ++sub vcrypto_op { ++ my ($f, $vrt, $vra, $vrb, $op) = @_; ++ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; ++} ++my $vcipher = sub { vcrypto_op(@_, 1288); }; ++my $vcipherlast = sub { vcrypto_op(@_, 1289); }; ++my $vncipher = sub { vcrypto_op(@_, 1352); }; ++my $vncipherlast= sub { vcrypto_op(@_, 1353); }; ++my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; ++my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; ++my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; ++my $vpmsumb = sub { vcrypto_op(@_, 1032); }; ++my $vpmsumd = sub { vcrypto_op(@_, 1224); }; ++my $vpmsubh = sub { vcrypto_op(@_, 1096); }; ++my $vpmsumw = sub { vcrypto_op(@_, 1160); }; ++my $vaddudm = sub { vcrypto_op(@_, 192); }; ++ ++my $mtsle = sub { ++ my ($f, $arg) = @_; ++ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); ++}; + + while($line=<>) { + +@@ -138,7 +202,10 @@ while($line=<>) { + { + $line =~ s|(^[\.\w]+)\:\s*||; + my $label = $1; +- printf "%s:",($GLOBALS{$label} or $label) if ($label); ++ if ($label) { ++ printf "%s:",($GLOBALS{$label} or $label); ++ printf "\n.localentry\t$GLOBALS{$label},0" if ($GLOBALS{$label} && $flavour =~ /linux.*64le/); ++ } + } + + { +@@ -147,7 +214,7 @@ while($line=<>) { + my $mnemonic = $2; + my $f = $3; + my $opcode = eval("\$$mnemonic"); +- $line =~ s|\bc?[rf]([0-9]+)\b|$1|g if ($c ne "." and $flavour !~ /osx/); ++ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); + if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } + elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } + } +diff --git a/crypto/ppc_arch.h b/crypto/ppc_arch.h +new file mode 100644 +index 0000000..1192edf +--- /dev/null ++++ b/crypto/ppc_arch.h +@@ -0,0 +1,10 @@ ++#ifndef __PPC_ARCH_H__ ++#define __PPC_ARCH_H__ ++ ++extern unsigned int OPENSSL_ppccap_P; ++ ++#define PPC_FPU64 (1<<0) ++#define PPC_ALTIVEC (1<<1) ++#define PPC_CRYPTO207 (1<<2) ++ ++#endif +diff --git a/crypto/ppccap.c b/crypto/ppccap.c +index f71ba66..13c2ca5 100644 +--- a/crypto/ppccap.c ++++ b/crypto/ppccap.c +@@ -4,13 +4,15 @@ + #include + #include + #include ++#if defined(__linux) || defined(_AIX) ++#include ++#endif + #include + #include + +-#define PPC_FPU64 (1<<0) +-#define PPC_ALTIVEC (1<<1) ++#include "ppc_arch.h" + +-static int OPENSSL_ppccap_P = 0; ++unsigned int OPENSSL_ppccap_P = 0; + + static sigset_t all_masked; + +@@ -22,7 +24,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U + + if (sizeof(size_t)==4) + { +-#if (defined(__APPLE__) && defined(__MACH__)) ++#if 1 || (defined(__APPLE__) && defined(__MACH__)) + if (num>=8 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64)) + return bn_mul_mont_fpu64(rp,ap,bp,np,n0,num); + #else +@@ -50,11 +52,28 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U + } + #endif + ++void sha256_block_p8(void *ctx,const void *inp,size_t len); ++void sha256_block_ppc(void *ctx,const void *inp,size_t len); ++void sha256_block_data_order(void *ctx,const void *inp,size_t len) ++ { ++ OPENSSL_ppccap_P&PPC_CRYPTO207? sha256_block_p8(ctx,inp,len): ++ sha256_block_ppc(ctx,inp,len); ++ } ++ ++void sha512_block_p8(void *ctx,const void *inp,size_t len); ++void sha512_block_ppc(void *ctx,const void *inp,size_t len); ++void sha512_block_data_order(void *ctx,const void *inp,size_t len) ++ { ++ OPENSSL_ppccap_P&PPC_CRYPTO207? sha512_block_p8(ctx,inp,len): ++ sha512_block_ppc(ctx,inp,len); ++ } ++ + static sigjmp_buf ill_jmp; + static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } + + void OPENSSL_ppc64_probe(void); + void OPENSSL_altivec_probe(void); ++void OPENSSL_crypto207_probe(void); + + void OPENSSL_cpuid_setup(void) + { +@@ -85,12 +104,14 @@ void OPENSSL_cpuid_setup(void) + OPENSSL_ppccap_P = 0; + + #if defined(_AIX) +- if (sizeof(size_t)==4 ++ if (sizeof(size_t)==4) ++ { ++ struct utsname uts; + # if defined(_SC_AIX_KERNEL_BITMODE) +- && sysconf(_SC_AIX_KERNEL_BITMODE)!=64 ++ if (sysconf(_SC_AIX_KERNEL_BITMODE)!=64) return; + # endif +- ) +- return; ++ if (uname(&uts)!=0 || atoi(uts.version)<6) return; ++ } + #endif + + memset(&ill_act,0,sizeof(ill_act)); +@@ -102,6 +123,10 @@ void OPENSSL_cpuid_setup(void) + + if (sizeof(size_t)==4) + { ++#ifdef __linux ++ struct utsname uts; ++ if (uname(&uts)==0 && strcmp(uts.machine,"ppc64")==0) ++#endif + if (sigsetjmp(ill_jmp,1) == 0) + { + OPENSSL_ppc64_probe(); +@@ -119,6 +144,11 @@ void OPENSSL_cpuid_setup(void) + { + OPENSSL_altivec_probe(); + OPENSSL_ppccap_P |= PPC_ALTIVEC; ++ if (sigsetjmp(ill_jmp,1) == 0) ++ { ++ OPENSSL_crypto207_probe(); ++ OPENSSL_ppccap_P |= PPC_CRYPTO207; ++ } + } + + sigaction (SIGILL,&ill_oact,NULL); +diff --git a/crypto/ppccpuid.pl b/crypto/ppccpuid.pl +index 4ba736a..56cc851 100755 +--- a/crypto/ppccpuid.pl ++++ b/crypto/ppccpuid.pl +@@ -31,6 +31,7 @@ $code=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_ppc64_probe,.-.OPENSSL_ppc64_probe + + .globl .OPENSSL_altivec_probe + .align 4 +@@ -39,6 +40,17 @@ $code=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_altivec_probe,.-..OPENSSL_altivec_probe ++ ++.globl .OPENSSL_crypto207_probe ++.align 4 ++.OPENSSL_crypto207_probe: ++ lvx_u v0,0,r1 ++ vcipher v0,v0,v0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_crypto207_probe,.-.OPENSSL_crypto207_probe + + .globl .OPENSSL_wipe_cpu + .align 4 +@@ -71,6 +83,7 @@ $code=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_wipe_cpu,.-.OPENSSL_wipe_cpu + + .globl .OPENSSL_atomic_add + .align 4 +@@ -84,6 +97,7 @@ Ladd: lwarx r5,0,r3 + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 ++.size .OPENSSL_atomic_add,.-.OPENSSL_atomic_add + + .globl .OPENSSL_rdtsc + .align 4 +@@ -93,6 +107,7 @@ Ladd: lwarx r5,0,r3 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_rdtsc,.-.OPENSSL_rdtsc + + .globl .OPENSSL_cleanse + .align 4 +@@ -125,7 +140,99 @@ Laligned: + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 ++.size .OPENSSL_cleanse,.-.OPENSSL_cleanse ++___ ++{ ++my ($out,$cnt,$max)=("r3","r4","r5"); ++my ($tick,$lasttick)=("r6","r7"); ++my ($diff,$lastdiff)=("r8","r9"); ++ ++$code.=<<___; ++.globl .OPENSSL_instrument_bus ++.align 4 ++.OPENSSL_instrument_bus: ++ mtctr $cnt ++ ++ mftb $lasttick # collect 1st tick ++ li $diff,0 ++ ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ ++Loop: mftb $tick ++ sub $diff,$tick,$lasttick ++ mr $lasttick,$tick ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ addi $out,$out,4 # ++$out ++ bdnz Loop ++ ++ mr r3,$cnt ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,2,0 ++ .long 0 ++.size .OPENSSL_instrument_bus,.-.OPENSSL_instrument_bus ++ ++.globl .OPENSSL_instrument_bus2 ++.align 4 ++.OPENSSL_instrument_bus2: ++ mr r0,$cnt ++ slwi $cnt,$cnt,2 ++ ++ mftb $lasttick # collect 1st tick ++ li $diff,0 ++ ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ ++ mftb $tick # collect 1st diff ++ sub $diff,$tick,$lasttick ++ mr $lasttick,$tick ++ mr $lastdiff,$diff ++Loop2: ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ ++ addic. $max,$max,-1 ++ beq Ldone2 ++ ++ mftb $tick ++ sub $diff,$tick,$lasttick ++ mr $lasttick,$tick ++ cmplw 7,$diff,$lastdiff ++ mr $lastdiff,$diff ++ ++ mfcr $tick # pull cr ++ not $tick,$tick # flip bits ++ rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale ++ ++ sub. $cnt,$cnt,$tick # conditional --$cnt ++ add $out,$out,$tick # conditional ++$out ++ bne Loop2 ++ ++Ldone2: ++ srwi $cnt,$cnt,2 ++ sub r3,r0,$cnt ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,3,0 ++ .long 0 ++.size .OPENSSL_instrument_bus2,.-.OPENSSL_instrument_bus2 + ___ ++} + + $code =~ s/\`([^\`]*)\`/eval $1/gem; + print $code; +diff --git a/crypto/sha/Makefile b/crypto/sha/Makefile +index 6d191d3..58c6705 100644 +--- a/crypto/sha/Makefile ++++ b/crypto/sha/Makefile +@@ -73,6 +73,8 @@ sha512-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAG + sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@ + sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ + sha512-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ ++sha256p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@ ++sha512p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@ + + sha1-parisc.s: asm/sha1-parisc.pl; $(PERL) asm/sha1-parisc.pl $(PERLASM_SCHEME) $@ + sha256-parisc.s:asm/sha512-parisc.pl; $(PERL) asm/sha512-parisc.pl $(PERLASM_SCHEME) $@ +diff --git a/crypto/sha/asm/sha1-ppc.pl b/crypto/sha/asm/sha1-ppc.pl +index 2140dd2..df59896 100755 +--- a/crypto/sha/asm/sha1-ppc.pl ++++ b/crypto/sha/asm/sha1-ppc.pl +@@ -9,8 +9,7 @@ + + # I let hardware handle unaligned input(*), except on page boundaries + # (see below for details). Otherwise straightforward implementation +-# with X vector in register bank. The module is big-endian [which is +-# not big deal as there're no little-endian targets left around]. ++# with X vector in register bank. + # + # (*) this means that this module is inappropriate for PPC403? Does + # anybody know if pre-POWER3 can sustain unaligned load? +@@ -38,6 +37,10 @@ if ($flavour =~ /64/) { + $PUSH ="stw"; + } else { die "nonsense $flavour"; } + ++# Define endianess based on flavour ++# i.e.: linux64le ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +@@ -68,14 +71,28 @@ $T ="r12"; + @X=("r16","r17","r18","r19","r20","r21","r22","r23", + "r24","r25","r26","r27","r28","r29","r30","r31"); + ++sub loadbe { ++my ($dst, $src, $temp_reg) = @_; ++$code.=<<___ if (!$LITTLE_ENDIAN); ++ lwz $dst,$src ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $temp_reg,$src ++ rotlwi $dst,$temp_reg,8 ++ rlwimi $dst,$temp_reg,24,0,7 ++ rlwimi $dst,$temp_reg,24,16,23 ++___ ++} ++ + sub BODY_00_19 { + my ($i,$a,$b,$c,$d,$e,$f)=@_; + my $j=$i+1; +-$code.=<<___ if ($i==0); +- lwz @X[$i],`$i*4`($inp) +-___ ++ ++ # Since the last value of $f is discarded, we can use ++ # it as a temp reg to swap byte-order when needed. ++ loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0); ++ loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15); + $code.=<<___ if ($i<15); +- lwz @X[$j],`$j*4`($inp) + add $f,$K,$e + rotlwi $e,$a,5 + add $f,$f,@X[$i] +@@ -108,31 +125,31 @@ my ($i,$a,$b,$c,$d,$e,$f)=@_; + my $j=$i+1; + $code.=<<___ if ($i<79); + add $f,$K,$e ++ xor $t0,$b,$d + rotlwi $e,$a,5 + xor @X[$j%16],@X[$j%16],@X[($j+2)%16] + add $f,$f,@X[$i%16] +- xor $t0,$b,$c ++ xor $t0,$t0,$c + xor @X[$j%16],@X[$j%16],@X[($j+8)%16] +- add $f,$f,$e ++ add $f,$f,$t0 + rotlwi $b,$b,30 +- xor $t0,$t0,$d + xor @X[$j%16],@X[$j%16],@X[($j+13)%16] +- add $f,$f,$t0 ++ add $f,$f,$e + rotlwi @X[$j%16],@X[$j%16],1 + ___ + $code.=<<___ if ($i==79); + add $f,$K,$e ++ xor $t0,$b,$d + rotlwi $e,$a,5 + lwz r16,0($ctx) + add $f,$f,@X[$i%16] +- xor $t0,$b,$c ++ xor $t0,$t0,$c + lwz r17,4($ctx) +- add $f,$f,$e ++ add $f,$f,$t0 + rotlwi $b,$b,30 + lwz r18,8($ctx) +- xor $t0,$t0,$d + lwz r19,12($ctx) +- add $f,$f,$t0 ++ add $f,$f,$e + lwz r20,16($ctx) + ___ + } +@@ -316,6 +333,7 @@ $code.=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .sha1_block_data_order,.-.sha1_block_data_order + ___ + $code.=<<___; + .asciz "SHA1 block transform for PPC, CRYPTOGAMS by " +diff --git a/crypto/sha/asm/sha512-ppc.pl b/crypto/sha/asm/sha512-ppc.pl +index 6b44a68..734f3c1 100755 +--- a/crypto/sha/asm/sha512-ppc.pl ++++ b/crypto/sha/asm/sha512-ppc.pl +@@ -1,7 +1,7 @@ + #!/usr/bin/env perl + + # ==================================================================== +-# Written by Andy Polyakov for the OpenSSL ++# Written by Andy Polyakov for the OpenSSL + # project. The module is, however, dual licensed under OpenSSL and + # CRYPTOGAMS licenses depending on where you obtain it. For further + # details see http://www.openssl.org/~appro/cryptogams/. +@@ -9,8 +9,7 @@ + + # I let hardware handle unaligned input, except on page boundaries + # (see below for details). Otherwise straightforward implementation +-# with X vector in register bank. The module is big-endian [which is +-# not big deal as there're no little-endian targets left around]. ++# with X vector in register bank. + + # sha256 | sha512 + # -m64 -m32 | -m64 -m32 +@@ -56,6 +55,8 @@ if ($flavour =~ /64/) { + $PUSH="stw"; + } else { die "nonsense $flavour"; } + ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +@@ -64,7 +65,7 @@ die "can't locate ppc-xlate.pl"; + open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + + if ($output =~ /512/) { +- $func="sha512_block_data_order"; ++ $func="sha512_block_ppc"; + $SZ=8; + @Sigma0=(28,34,39); + @Sigma1=(14,18,41); +@@ -76,7 +77,7 @@ if ($output =~ /512/) { + $ROR="rotrdi"; + $SHR="srdi"; + } else { +- $func="sha256_block_data_order"; ++ $func="sha256_block_ppc"; + $SZ=4; + @Sigma0=( 2,13,22); + @Sigma1=( 6,11,25); +@@ -110,7 +111,7 @@ $B ="r9"; + $C ="r10"; + $D ="r11"; + $E ="r12"; +-$F ="r13"; $F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer ++$F =$t1; $t1 = "r0"; # stay away from "r13"; + $G ="r14"; + $H ="r15"; + +@@ -118,24 +119,23 @@ $H ="r15"; + @X=("r16","r17","r18","r19","r20","r21","r22","r23", + "r24","r25","r26","r27","r28","r29","r30","r31"); + +-$inp="r31"; # reassigned $inp! aliases with @X[15] ++$inp="r31" if($SZ==4 || $SIZE_T==8); # reassigned $inp! aliases with @X[15] + + sub ROUND_00_15 { + my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; + $code.=<<___; +- $LD $T,`$i*$SZ`($Tbl) + $ROR $a0,$e,$Sigma1[0] + $ROR $a1,$e,$Sigma1[1] + and $t0,$f,$e +- andc $t1,$g,$e +- add $T,$T,$h + xor $a0,$a0,$a1 ++ add $h,$h,$t1 ++ andc $t1,$g,$e + $ROR $a1,$a1,`$Sigma1[2]-$Sigma1[1]` + or $t0,$t0,$t1 ; Ch(e,f,g) +- add $T,$T,@X[$i] ++ add $h,$h,@X[$i%16] + xor $a0,$a0,$a1 ; Sigma1(e) +- add $T,$T,$t0 +- add $T,$T,$a0 ++ add $h,$h,$t0 ++ add $h,$h,$a0 + + $ROR $a0,$a,$Sigma0[0] + $ROR $a1,$a,$Sigma0[1] +@@ -146,9 +146,14 @@ $code.=<<___; + xor $t0,$t0,$t1 + and $t1,$b,$c + xor $a0,$a0,$a1 ; Sigma0(a) +- add $d,$d,$T ++ add $d,$d,$h + xor $t0,$t0,$t1 ; Maj(a,b,c) +- add $h,$T,$a0 ++___ ++$code.=<<___ if ($i<15); ++ $LD $t1,`($i+1)*$SZ`($Tbl) ++___ ++$code.=<<___; ++ add $h,$h,$a0 + add $h,$h,$t0 + + ___ +@@ -169,10 +174,11 @@ $code.=<<___; + add @X[$i],@X[$i],@X[($i+9)%16] + xor $a0,$a0,$a1 ; sigma0(X[(i+1)&0x0f]) + xor $t0,$t0,$t1 ; sigma1(X[(i+14)&0x0f]) ++ $LD $t1,`$i*$SZ`($Tbl) + add @X[$i],@X[$i],$a0 + add @X[$i],@X[$i],$t0 + ___ +-&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h); ++&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); + } + + $code=<<___; +@@ -188,8 +194,6 @@ $func: + + $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp) + +- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) +- $PUSH r13,`$FRAME-$SIZE_T*19`($sp) + $PUSH r14,`$FRAME-$SIZE_T*18`($sp) + $PUSH r15,`$FRAME-$SIZE_T*17`($sp) + $PUSH r16,`$FRAME-$SIZE_T*16`($sp) +@@ -209,7 +213,10 @@ $func: + $PUSH r30,`$FRAME-$SIZE_T*2`($sp) + $PUSH r31,`$FRAME-$SIZE_T*1`($sp) + $PUSH r0,`$FRAME+$LRSAVE`($sp) ++___ + ++if ($SZ==4 || $SIZE_T==8) { ++$code.=<<___; + $LD $A,`0*$SZ`($ctx) + mr $inp,r4 ; incarnate $inp + $LD $B,`1*$SZ`($ctx) +@@ -219,7 +226,16 @@ $func: + $LD $F,`5*$SZ`($ctx) + $LD $G,`6*$SZ`($ctx) + $LD $H,`7*$SZ`($ctx) ++___ ++} else { ++ for ($i=16;$i<32;$i++) { ++ $code.=<<___; ++ lwz r$i,`$LITTLE_ENDIAN^(4*($i-16))`($ctx) ++___ ++ } ++} + ++$code.=<<___; + bl LPICmeup + LPICedup: + andi. r0,$inp,3 +@@ -255,6 +271,9 @@ Lunaligned: + Lcross_page: + li $t1,`16*$SZ/4` + mtctr $t1 ++___ ++if ($SZ==4 || $SIZE_T==8) { ++$code.=<<___; + addi r20,$sp,$LOCALS ; aligned spot below the frame + Lmemcpy: + lbz r16,0($inp) +@@ -268,7 +287,26 @@ Lmemcpy: + stb r19,3(r20) + addi r20,r20,4 + bdnz Lmemcpy ++___ ++} else { ++$code.=<<___; ++ addi r12,$sp,$LOCALS ; aligned spot below the frame ++Lmemcpy: ++ lbz r8,0($inp) ++ lbz r9,1($inp) ++ lbz r10,2($inp) ++ lbz r11,3($inp) ++ addi $inp,$inp,4 ++ stb r8,0(r12) ++ stb r9,1(r12) ++ stb r10,2(r12) ++ stb r11,3(r12) ++ addi r12,r12,4 ++ bdnz Lmemcpy ++___ ++} + ++$code.=<<___; + $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp + addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer + addi $inp,$sp,$LOCALS ; fictitious inp pointer +@@ -283,8 +321,6 @@ Lmemcpy: + + Ldone: + $POP r0,`$FRAME+$LRSAVE`($sp) +- $POP $toc,`$FRAME-$SIZE_T*20`($sp) +- $POP r13,`$FRAME-$SIZE_T*19`($sp) + $POP r14,`$FRAME-$SIZE_T*18`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) +@@ -309,27 +345,48 @@ Ldone: + .long 0 + .byte 0,12,4,1,0x80,18,3,0 + .long 0 ++___ + ++if ($SZ==4 || $SIZE_T==8) { ++$code.=<<___; + .align 4 + Lsha2_block_private: ++ $LD $t1,0($Tbl) + ___ + for($i=0;$i<16;$i++) { +-$code.=<<___ if ($SZ==4); ++$code.=<<___ if ($SZ==4 && !$LITTLE_ENDIAN); + lwz @X[$i],`$i*$SZ`($inp) + ___ ++$code.=<<___ if ($SZ==4 && $LITTLE_ENDIAN); ++ lwz $a0,`$i*$SZ`($inp) ++ rotlwi @X[$i],$a0,8 ++ rlwimi @X[$i],$a0,24,0,7 ++ rlwimi @X[$i],$a0,24,16,23 ++___ + # 64-bit loads are split to 2x32-bit ones, as CPU can't handle + # unaligned 64-bit loads, only 32-bit ones... +-$code.=<<___ if ($SZ==8); ++$code.=<<___ if ($SZ==8 && !$LITTLE_ENDIAN); + lwz $t0,`$i*$SZ`($inp) + lwz @X[$i],`$i*$SZ+4`($inp) + insrdi @X[$i],$t0,32,0 + ___ ++$code.=<<___ if ($SZ==8 && $LITTLE_ENDIAN); ++ lwz $a0,`$i*$SZ`($inp) ++ lwz $a1,`$i*$SZ+4`($inp) ++ rotlwi $t0,$a0,8 ++ rotlwi @X[$i],$a1,8 ++ rlwimi $t0,$a0,24,0,7 ++ rlwimi @X[$i],$a1,24,0,7 ++ rlwimi $t0,$a0,24,16,23 ++ rlwimi @X[$i],$a1,24,16,23 ++ insrdi @X[$i],$t0,32,0 ++___ + &ROUND_00_15($i,@V); + unshift(@V,pop(@V)); + } + $code.=<<___; +- li $T,`$rounds/16-1` +- mtctr $T ++ li $t0,`$rounds/16-1` ++ mtctr $t0 + .align 4 + Lrounds: + addi $Tbl,$Tbl,`16*$SZ` +@@ -377,7 +434,282 @@ $code.=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size $func,.-$func ++___ ++} else { ++######################################################################## ++# SHA512 for PPC32, X vector is off-loaded to stack... ++# ++# | sha512 ++# | -m32 ++# ----------------------+----------------------- ++# PPC74x0,gcc-4.0.1 | +48% ++# POWER6,gcc-4.4.6 | +124%(*) ++# POWER7,gcc-4.4.6 | +79%(*) ++# e300,gcc-4.1.0 | +167% ++# ++# (*) ~1/3 of -m64 result [and ~20% better than -m32 code generated ++# by xlc-12.1] ++ ++my $XOFF=$LOCALS; ++ ++my @V=map("r$_",(16..31)); # A..H ++ ++my ($s0,$s1,$t0,$t1,$t2,$t3,$a0,$a1,$a2,$a3)=map("r$_",(0,5,6,8..12,14,15)); ++my ($x0,$x1)=("r3","r4"); # zaps $ctx and $inp ++ ++sub ROUND_00_15_ppc32 { ++my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, ++ $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_; ++ ++$code.=<<___; ++ lwz $t2,`$SZ*($i%16)+($LITTLE_ENDIAN^4)`($Tbl) ++ xor $a0,$flo,$glo ++ lwz $t3,`$SZ*($i%16)+($LITTLE_ENDIAN^0)`($Tbl) ++ xor $a1,$fhi,$ghi ++ addc $hlo,$hlo,$t0 ; h+=x[i] ++ stw $t0,`$XOFF+0+$SZ*($i%16)`($sp) ; save x[i] ++ ++ srwi $s0,$elo,$Sigma1[0] ++ srwi $s1,$ehi,$Sigma1[0] ++ and $a0,$a0,$elo ++ adde $hhi,$hhi,$t1 ++ and $a1,$a1,$ehi ++ stw $t1,`$XOFF+4+$SZ*($i%16)`($sp) ++ srwi $t0,$elo,$Sigma1[1] ++ srwi $t1,$ehi,$Sigma1[1] ++ addc $hlo,$hlo,$t2 ; h+=K512[i] ++ insrwi $s0,$ehi,$Sigma1[0],0 ++ insrwi $s1,$elo,$Sigma1[0],0 ++ xor $a0,$a0,$glo ; Ch(e,f,g) ++ adde $hhi,$hhi,$t3 ++ xor $a1,$a1,$ghi ++ insrwi $t0,$ehi,$Sigma1[1],0 ++ insrwi $t1,$elo,$Sigma1[1],0 ++ addc $hlo,$hlo,$a0 ; h+=Ch(e,f,g) ++ srwi $t2,$ehi,$Sigma1[2]-32 ++ srwi $t3,$elo,$Sigma1[2]-32 ++ xor $s0,$s0,$t0 ++ xor $s1,$s1,$t1 ++ insrwi $t2,$elo,$Sigma1[2]-32,0 ++ insrwi $t3,$ehi,$Sigma1[2]-32,0 ++ xor $a0,$alo,$blo ; a^b, b^c in next round ++ adde $hhi,$hhi,$a1 ++ xor $a1,$ahi,$bhi ++ xor $s0,$s0,$t2 ; Sigma1(e) ++ xor $s1,$s1,$t3 ++ ++ srwi $t0,$alo,$Sigma0[0] ++ and $a2,$a2,$a0 ++ addc $hlo,$hlo,$s0 ; h+=Sigma1(e) ++ and $a3,$a3,$a1 ++ srwi $t1,$ahi,$Sigma0[0] ++ srwi $s0,$ahi,$Sigma0[1]-32 ++ adde $hhi,$hhi,$s1 ++ srwi $s1,$alo,$Sigma0[1]-32 ++ insrwi $t0,$ahi,$Sigma0[0],0 ++ insrwi $t1,$alo,$Sigma0[0],0 ++ xor $a2,$a2,$blo ; Maj(a,b,c) ++ addc $dlo,$dlo,$hlo ; d+=h ++ xor $a3,$a3,$bhi ++ insrwi $s0,$alo,$Sigma0[1]-32,0 ++ insrwi $s1,$ahi,$Sigma0[1]-32,0 ++ adde $dhi,$dhi,$hhi ++ srwi $t2,$ahi,$Sigma0[2]-32 ++ srwi $t3,$alo,$Sigma0[2]-32 ++ xor $s0,$s0,$t0 ++ addc $hlo,$hlo,$a2 ; h+=Maj(a,b,c) ++ xor $s1,$s1,$t1 ++ insrwi $t2,$alo,$Sigma0[2]-32,0 ++ insrwi $t3,$ahi,$Sigma0[2]-32,0 ++ adde $hhi,$hhi,$a3 ++___ ++$code.=<<___ if ($i>=15); ++ lwz $t0,`$XOFF+0+$SZ*(($i+2)%16)`($sp) ++ lwz $t1,`$XOFF+4+$SZ*(($i+2)%16)`($sp) ++___ ++$code.=<<___ if ($i<15 && !$LITTLE_ENDIAN); ++ lwz $t1,`$SZ*($i+1)+0`($inp) ++ lwz $t0,`$SZ*($i+1)+4`($inp) + ___ ++$code.=<<___ if ($i<15 && $LITTLE_ENDIAN); ++ lwz $a2,`$SZ*($i+1)+0`($inp) ++ lwz $a3,`$SZ*($i+1)+4`($inp) ++ rotlwi $t1,$a2,8 ++ rotlwi $t0,$a3,8 ++ rlwimi $t1,$a2,24,0,7 ++ rlwimi $t0,$a3,24,0,7 ++ rlwimi $t1,$a2,24,16,23 ++ rlwimi $t0,$a3,24,16,23 ++___ ++$code.=<<___; ++ xor $s0,$s0,$t2 ; Sigma0(a) ++ xor $s1,$s1,$t3 ++ addc $hlo,$hlo,$s0 ; h+=Sigma0(a) ++ adde $hhi,$hhi,$s1 ++___ ++$code.=<<___ if ($i==15); ++ lwz $x0,`$XOFF+0+$SZ*(($i+1)%16)`($sp) ++ lwz $x1,`$XOFF+4+$SZ*(($i+1)%16)`($sp) ++___ ++} ++sub ROUND_16_xx_ppc32 { ++my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, ++ $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_; ++ ++$code.=<<___; ++ srwi $s0,$t0,$sigma0[0] ++ srwi $s1,$t1,$sigma0[0] ++ srwi $t2,$t0,$sigma0[1] ++ srwi $t3,$t1,$sigma0[1] ++ insrwi $s0,$t1,$sigma0[0],0 ++ insrwi $s1,$t0,$sigma0[0],0 ++ srwi $a0,$t0,$sigma0[2] ++ insrwi $t2,$t1,$sigma0[1],0 ++ insrwi $t3,$t0,$sigma0[1],0 ++ insrwi $a0,$t1,$sigma0[2],0 ++ xor $s0,$s0,$t2 ++ lwz $t2,`$XOFF+0+$SZ*(($i+14)%16)`($sp) ++ srwi $a1,$t1,$sigma0[2] ++ xor $s1,$s1,$t3 ++ lwz $t3,`$XOFF+4+$SZ*(($i+14)%16)`($sp) ++ xor $a0,$a0,$s0 ++ srwi $s0,$t2,$sigma1[0] ++ xor $a1,$a1,$s1 ++ srwi $s1,$t3,$sigma1[0] ++ addc $x0,$x0,$a0 ; x[i]+=sigma0(x[i+1]) ++ srwi $a0,$t3,$sigma1[1]-32 ++ insrwi $s0,$t3,$sigma1[0],0 ++ insrwi $s1,$t2,$sigma1[0],0 ++ adde $x1,$x1,$a1 ++ srwi $a1,$t2,$sigma1[1]-32 ++ ++ insrwi $a0,$t2,$sigma1[1]-32,0 ++ srwi $t2,$t2,$sigma1[2] ++ insrwi $a1,$t3,$sigma1[1]-32,0 ++ insrwi $t2,$t3,$sigma1[2],0 ++ xor $s0,$s0,$a0 ++ lwz $a0,`$XOFF+0+$SZ*(($i+9)%16)`($sp) ++ srwi $t3,$t3,$sigma1[2] ++ xor $s1,$s1,$a1 ++ lwz $a1,`$XOFF+4+$SZ*(($i+9)%16)`($sp) ++ xor $s0,$s0,$t2 ++ addc $x0,$x0,$a0 ; x[i]+=x[i+9] ++ xor $s1,$s1,$t3 ++ adde $x1,$x1,$a1 ++ addc $x0,$x0,$s0 ; x[i]+=sigma1(x[i+14]) ++ adde $x1,$x1,$s1 ++___ ++ ($t0,$t1,$x0,$x1) = ($x0,$x1,$t0,$t1); ++ &ROUND_00_15_ppc32(@_); ++} ++ ++$code.=<<___; ++.align 4 ++Lsha2_block_private: ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); ++ lwz $t1,0($inp) ++ xor $a2,@V[3],@V[5] ; B^C, magic seed ++ lwz $t0,4($inp) ++ xor $a3,@V[2],@V[4] ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $a1,0($inp) ++ xor $a2,@V[3],@V[5] ; B^C, magic seed ++ lwz $a0,4($inp) ++ xor $a3,@V[2],@V[4] ++ rotlwi $t1,$a1,8 ++ rotlwi $t0,$a0,8 ++ rlwimi $t1,$a1,24,0,7 ++ rlwimi $t0,$a0,24,0,7 ++ rlwimi $t1,$a1,24,16,23 ++ rlwimi $t0,$a0,24,16,23 ++___ ++for($i=0;$i<16;$i++) { ++ &ROUND_00_15_ppc32($i,@V); ++ unshift(@V,pop(@V)); unshift(@V,pop(@V)); ++ ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1); ++} ++$code.=<<___; ++ li $a0,`$rounds/16-1` ++ mtctr $a0 ++.align 4 ++Lrounds: ++ addi $Tbl,$Tbl,`16*$SZ` ++___ ++for(;$i<32;$i++) { ++ &ROUND_16_xx_ppc32($i,@V); ++ unshift(@V,pop(@V)); unshift(@V,pop(@V)); ++ ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1); ++} ++$code.=<<___; ++ bdnz- Lrounds ++ ++ $POP $ctx,`$FRAME-$SIZE_T*22`($sp) ++ $POP $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer ++ $POP $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer ++ subi $Tbl,$Tbl,`($rounds-16)*$SZ` ; rewind Tbl ++ ++ lwz $t0,`$LITTLE_ENDIAN^0`($ctx) ++ lwz $t1,`$LITTLE_ENDIAN^4`($ctx) ++ lwz $t2,`$LITTLE_ENDIAN^8`($ctx) ++ lwz $t3,`$LITTLE_ENDIAN^12`($ctx) ++ lwz $a0,`$LITTLE_ENDIAN^16`($ctx) ++ lwz $a1,`$LITTLE_ENDIAN^20`($ctx) ++ lwz $a2,`$LITTLE_ENDIAN^24`($ctx) ++ addc @V[1],@V[1],$t1 ++ lwz $a3,`$LITTLE_ENDIAN^28`($ctx) ++ adde @V[0],@V[0],$t0 ++ lwz $t0,`$LITTLE_ENDIAN^32`($ctx) ++ addc @V[3],@V[3],$t3 ++ lwz $t1,`$LITTLE_ENDIAN^36`($ctx) ++ adde @V[2],@V[2],$t2 ++ lwz $t2,`$LITTLE_ENDIAN^40`($ctx) ++ addc @V[5],@V[5],$a1 ++ lwz $t3,`$LITTLE_ENDIAN^44`($ctx) ++ adde @V[4],@V[4],$a0 ++ lwz $a0,`$LITTLE_ENDIAN^48`($ctx) ++ addc @V[7],@V[7],$a3 ++ lwz $a1,`$LITTLE_ENDIAN^52`($ctx) ++ adde @V[6],@V[6],$a2 ++ lwz $a2,`$LITTLE_ENDIAN^56`($ctx) ++ addc @V[9],@V[9],$t1 ++ lwz $a3,`$LITTLE_ENDIAN^60`($ctx) ++ adde @V[8],@V[8],$t0 ++ stw @V[0],`$LITTLE_ENDIAN^0`($ctx) ++ stw @V[1],`$LITTLE_ENDIAN^4`($ctx) ++ addc @V[11],@V[11],$t3 ++ stw @V[2],`$LITTLE_ENDIAN^8`($ctx) ++ stw @V[3],`$LITTLE_ENDIAN^12`($ctx) ++ adde @V[10],@V[10],$t2 ++ stw @V[4],`$LITTLE_ENDIAN^16`($ctx) ++ stw @V[5],`$LITTLE_ENDIAN^20`($ctx) ++ addc @V[13],@V[13],$a1 ++ stw @V[6],`$LITTLE_ENDIAN^24`($ctx) ++ stw @V[7],`$LITTLE_ENDIAN^28`($ctx) ++ adde @V[12],@V[12],$a0 ++ stw @V[8],`$LITTLE_ENDIAN^32`($ctx) ++ stw @V[9],`$LITTLE_ENDIAN^36`($ctx) ++ addc @V[15],@V[15],$a3 ++ stw @V[10],`$LITTLE_ENDIAN^40`($ctx) ++ stw @V[11],`$LITTLE_ENDIAN^44`($ctx) ++ adde @V[14],@V[14],$a2 ++ stw @V[12],`$LITTLE_ENDIAN^48`($ctx) ++ stw @V[13],`$LITTLE_ENDIAN^52`($ctx) ++ stw @V[14],`$LITTLE_ENDIAN^56`($ctx) ++ stw @V[15],`$LITTLE_ENDIAN^60`($ctx) ++ ++ addi $inp,$inp,`16*$SZ` ; advance inp ++ $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ++ $UCMP $inp,$num ++ bne Lsha2_block_private ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.size $func,.-$func ++___ ++} + + # Ugly hack here, because PPC assembler syntax seem to vary too + # much from platforms to platform... +@@ -395,46 +727,46 @@ LPICmeup: + .space `64-9*4` + ___ + $code.=<<___ if ($SZ==8); +- .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd +- .long 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc +- .long 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 +- .long 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 +- .long 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe +- .long 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 +- .long 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 +- .long 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 +- .long 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 +- .long 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 +- .long 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 +- .long 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 +- .long 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 +- .long 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 +- .long 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 +- .long 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 +- .long 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 +- .long 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df +- .long 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 +- .long 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b +- .long 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 +- .long 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 +- .long 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 +- .long 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 +- .long 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 +- .long 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 +- .long 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb +- .long 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 +- .long 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 +- .long 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec +- .long 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 +- .long 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b +- .long 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 +- .long 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 +- .long 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 +- .long 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b +- .long 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 +- .long 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c +- .long 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a +- .long 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 ++ .quad 0x428a2f98d728ae22,0x7137449123ef65cd ++ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc ++ .quad 0x3956c25bf348b538,0x59f111f1b605d019 ++ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 ++ .quad 0xd807aa98a3030242,0x12835b0145706fbe ++ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 ++ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 ++ .quad 0x9bdc06a725c71235,0xc19bf174cf692694 ++ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 ++ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 ++ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 ++ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 ++ .quad 0x983e5152ee66dfab,0xa831c66d2db43210 ++ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 ++ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 ++ .quad 0x06ca6351e003826f,0x142929670a0e6e70 ++ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 ++ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df ++ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 ++ .quad 0x81c2c92e47edaee6,0x92722c851482353b ++ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 ++ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 ++ .quad 0xd192e819d6ef5218,0xd69906245565a910 ++ .quad 0xf40e35855771202a,0x106aa07032bbd1b8 ++ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 ++ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 ++ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb ++ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 ++ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 ++ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec ++ .quad 0x90befffa23631e28,0xa4506cebde82bde9 ++ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b ++ .quad 0xca273eceea26619c,0xd186b8c721c0c207 ++ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 ++ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 ++ .quad 0x113f9804bef90dae,0x1b710b35131c471b ++ .quad 0x28db77f523047d84,0x32caab7b40c72493 ++ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c ++ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a ++ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + ___ + $code.=<<___ if ($SZ==4); + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +diff --git a/crypto/sha/asm/sha512p8-ppc.pl b/crypto/sha/asm/sha512p8-ppc.pl +new file mode 100755 +index 0000000..a316b31 +--- /dev/null ++++ b/crypto/sha/asm/sha512p8-ppc.pl +@@ -0,0 +1,423 @@ ++#!/usr/bin/env perl ++ ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++ ++# SHA256/512 for PowerISA v2.07. ++# ++# Accurate performance measurements are problematic, because it's ++# always virtualized setup with possibly throttled processor. ++# Relative comparison is therefore more informative. This module is ++# ~60% faster than integer-only sha512-ppc.pl. To anchor to something ++# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than ++# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than ++# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting ++# result is degree of computational resources' utilization. POWER8 is ++# "massively multi-threaded chip" and difference between single- and ++# maximum multi-process benchmark results tells that utlization is ++# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and ++# for sha1-ppc.pl - 73%. 100% means that multi-process result equals ++# to single-process one, given that all threads end up on the same ++# physical core. ++ ++$flavour=shift; ++$output =shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T=8; ++ $LRSAVE=2*$SIZE_T; ++ $STU="stdu"; ++ $POP="ld"; ++ $PUSH="std"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T=4; ++ $LRSAVE=$SIZE_T; ++ $STU="stwu"; ++ $POP="lwz"; ++ $PUSH="stw"; ++} else { die "nonsense $flavour"; } ++ ++$LENDIAN=($flavour=~/le/); ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; ++ ++if ($output =~ /512/) { ++ $bits=512; ++ $SZ=8; ++ $sz="d"; ++ $rounds=80; ++} else { ++ $bits=256; ++ $SZ=4; ++ $sz="w"; ++ $rounds=64; ++} ++ ++$func="sha${bits}_block_p8"; ++$FRAME=8*$SIZE_T; ++ ++$sp ="r1"; ++$toc="r2"; ++$ctx="r3"; ++$inp="r4"; ++$num="r5"; ++$Tbl="r6"; ++$idx="r7"; ++$lrsave="r8"; ++$offload="r11"; ++$vrsave="r12"; ++($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,10,26..31)); ++ ++@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7)); ++@X=map("v$_",(8..23)); ++($Ki,$Func,$S0,$S1,$s0,$s1,$lemask)=map("v$_",(24..31)); ++ ++sub ROUND { ++my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; ++my $j=($i+1)%16; ++ ++$code.=<<___ if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1)); ++ lvx_u @X[$i+1],0,$inp ; load X[i] in advance ++ addi $inp,$inp,16 ++___ ++$code.=<<___ if ($i<16 && ($i%(16/$SZ))); ++ vsldoi @X[$i],@X[$i-1],@X[$i-1],$SZ ++___ ++$code.=<<___ if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0); ++ vperm @X[$i],@X[$i],@X[$i],$lemask ++___ ++$code.=<<___; ++ `"vshasigma${sz} $s0,@X[($j+1)%16],0,0" if ($i>=15)` ++ vsel $Func,$g,$f,$e ; Ch(e,f,g) ++ vshasigma${sz} $S1,$e,1,15 ; Sigma1(e) ++ vaddu${sz}m $h,$h,@X[$i%16] ; h+=X[i] ++ vshasigma${sz} $S0,$a,1,0 ; Sigma0(a) ++ `"vshasigma${sz} $s1,@X[($j+14)%16],0,15" if ($i>=15)` ++ vaddu${sz}m $h,$h,$Func ; h+=Ch(e,f,g) ++ vxor $Func,$a,$b ++ `"vaddu${sz}m @X[$j],@X[$j],@X[($j+9)%16]" if ($i>=15)` ++ vaddu${sz}m $h,$h,$S1 ; h+=Sigma1(e) ++ vsel $Func,$b,$c,$Func ; Maj(a,b,c) ++ vaddu${sz}m $g,$g,$Ki ; future h+=K[i] ++ vaddu${sz}m $d,$d,$h ; d+=h ++ vaddu${sz}m $S0,$S0,$Func ; Sigma0(a)+Maj(a,b,c) ++ `"vaddu${sz}m @X[$j],@X[$j],$s0" if ($i>=15)` ++ lvx $Ki,$idx,$Tbl ; load next K[i] ++ addi $idx,$idx,16 ++ vaddu${sz}m $h,$h,$S0 ; h+=Sigma0(a)+Maj(a,b,c) ++ `"vaddu${sz}m @X[$j],@X[$j],$s1" if ($i>=15)` ++___ ++} ++ ++$code=<<___; ++.machine "any" ++.text ++ ++.globl $func ++.align 6 ++$func: ++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) ++ mflr $lrsave ++ li r10,`$FRAME+8*16+15` ++ li r11,`$FRAME+8*16+31` ++ stvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ mfspr $vrsave,256 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ li r11,-1 ++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave ++ li $x10,0x10 ++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ li $x20,0x20 ++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ li $x30,0x30 ++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ li $x40,0x40 ++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ li $x50,0x50 ++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ li $x60,0x60 ++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ li $x70,0x70 ++ $PUSH $lrsave,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) ++ mtspr 256,r11 ++ ++ bl LPICmeup ++ addi $offload,$sp,$FRAME+15 ++___ ++$code.=<<___ if ($LENDIAN); ++ li $idx,8 ++ lvsl $lemask,0,$idx ++ vspltisb $Ki,0x0f ++ vxor $lemask,$lemask,$Ki ++___ ++$code.=<<___ if ($SZ==4); ++ lvx_4w $A,$x00,$ctx ++ lvx_4w $E,$x10,$ctx ++ vsldoi $B,$A,$A,4 # unpack ++ vsldoi $C,$A,$A,8 ++ vsldoi $D,$A,$A,12 ++ vsldoi $F,$E,$E,4 ++ vsldoi $G,$E,$E,8 ++ vsldoi $H,$E,$E,12 ++___ ++$code.=<<___ if ($SZ==8); ++ lvx_u $A,$x00,$ctx ++ lvx_u $C,$x10,$ctx ++ lvx_u $E,$x20,$ctx ++ vsldoi $B,$A,$A,8 # unpack ++ lvx_u $G,$x30,$ctx ++ vsldoi $D,$C,$C,8 ++ vsldoi $F,$E,$E,8 ++ vsldoi $H,$G,$G,8 ++___ ++$code.=<<___; ++ li r0,`($rounds-16)/16` # inner loop counter ++ b Loop ++.align 5 ++Loop: ++ lvx $Ki,$x00,$Tbl ++ li $idx,16 ++ lvx_u @X[0],0,$inp ++ addi $inp,$inp,16 ++ stvx $A,$x00,$offload # offload $A-$H ++ stvx $B,$x10,$offload ++ stvx $C,$x20,$offload ++ stvx $D,$x30,$offload ++ stvx $E,$x40,$offload ++ stvx $F,$x50,$offload ++ stvx $G,$x60,$offload ++ stvx $H,$x70,$offload ++ vaddu${sz}m $H,$H,$Ki # h+K[i] ++ lvx $Ki,$idx,$Tbl ++ addi $idx,$idx,16 ++___ ++for ($i=0;$i<16;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } ++$code.=<<___; ++ mtctr r0 ++ b L16_xx ++.align 5 ++L16_xx: ++___ ++for (;$i<32;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } ++$code.=<<___; ++ bdnz L16_xx ++ ++ lvx @X[2],$x00,$offload ++ subic. $num,$num,1 ++ lvx @X[3],$x10,$offload ++ vaddu${sz}m $A,$A,@X[2] ++ lvx @X[4],$x20,$offload ++ vaddu${sz}m $B,$B,@X[3] ++ lvx @X[5],$x30,$offload ++ vaddu${sz}m $C,$C,@X[4] ++ lvx @X[6],$x40,$offload ++ vaddu${sz}m $D,$D,@X[5] ++ lvx @X[7],$x50,$offload ++ vaddu${sz}m $E,$E,@X[6] ++ lvx @X[8],$x60,$offload ++ vaddu${sz}m $F,$F,@X[7] ++ lvx @X[9],$x70,$offload ++ vaddu${sz}m $G,$G,@X[8] ++ vaddu${sz}m $H,$H,@X[9] ++ bne Loop ++___ ++$code.=<<___ if ($SZ==4); ++ lvx @X[0],$idx,$Tbl ++ addi $idx,$idx,16 ++ vperm $A,$A,$B,$Ki # pack the answer ++ lvx @X[1],$idx,$Tbl ++ vperm $E,$E,$F,$Ki ++ vperm $A,$A,$C,@X[0] ++ vperm $E,$E,$G,@X[0] ++ vperm $A,$A,$D,@X[1] ++ vperm $E,$E,$H,@X[1] ++ stvx_4w $A,$x00,$ctx ++ stvx_4w $E,$x10,$ctx ++___ ++$code.=<<___ if ($SZ==8); ++ vperm $A,$A,$B,$Ki # pack the answer ++ vperm $C,$C,$D,$Ki ++ vperm $E,$E,$F,$Ki ++ vperm $G,$G,$H,$Ki ++ stvx_u $A,$x00,$ctx ++ stvx_u $C,$x10,$ctx ++ stvx_u $E,$x20,$ctx ++ stvx_u $G,$x30,$ctx ++___ ++$code.=<<___; ++ li r10,`$FRAME+8*16+15` ++ mtlr $lrsave ++ li r11,`$FRAME+8*16+31` ++ mtspr 256,$vrsave ++ lvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` ++ blr ++ .long 0 ++ .byte 0,12,4,1,0x80,6,3,0 ++ .long 0 ++.size $func,.-$func ++___ ++ ++# Ugly hack here, because PPC assembler syntax seem to vary too ++# much from platforms to platform... ++$code.=<<___; ++.align 6 ++LPICmeup: ++ mflr r0 ++ bcl 20,31,\$+4 ++ mflr $Tbl ; vvvvvv "distance" between . and 1st data entry ++ addi $Tbl,$Tbl,`64-8` ++ mtlr r0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ .space `64-9*4` ++___ ++ ++if ($SZ==8) { ++ local *table = sub { ++ foreach(@_) { $code.=".quad $_,$_\n"; } ++ }; ++ table( ++ "0x428a2f98d728ae22","0x7137449123ef65cd", ++ "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc", ++ "0x3956c25bf348b538","0x59f111f1b605d019", ++ "0x923f82a4af194f9b","0xab1c5ed5da6d8118", ++ "0xd807aa98a3030242","0x12835b0145706fbe", ++ "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2", ++ "0x72be5d74f27b896f","0x80deb1fe3b1696b1", ++ "0x9bdc06a725c71235","0xc19bf174cf692694", ++ "0xe49b69c19ef14ad2","0xefbe4786384f25e3", ++ "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65", ++ "0x2de92c6f592b0275","0x4a7484aa6ea6e483", ++ "0x5cb0a9dcbd41fbd4","0x76f988da831153b5", ++ "0x983e5152ee66dfab","0xa831c66d2db43210", ++ "0xb00327c898fb213f","0xbf597fc7beef0ee4", ++ "0xc6e00bf33da88fc2","0xd5a79147930aa725", ++ "0x06ca6351e003826f","0x142929670a0e6e70", ++ "0x27b70a8546d22ffc","0x2e1b21385c26c926", ++ "0x4d2c6dfc5ac42aed","0x53380d139d95b3df", ++ "0x650a73548baf63de","0x766a0abb3c77b2a8", ++ "0x81c2c92e47edaee6","0x92722c851482353b", ++ "0xa2bfe8a14cf10364","0xa81a664bbc423001", ++ "0xc24b8b70d0f89791","0xc76c51a30654be30", ++ "0xd192e819d6ef5218","0xd69906245565a910", ++ "0xf40e35855771202a","0x106aa07032bbd1b8", ++ "0x19a4c116b8d2d0c8","0x1e376c085141ab53", ++ "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8", ++ "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb", ++ "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3", ++ "0x748f82ee5defb2fc","0x78a5636f43172f60", ++ "0x84c87814a1f0ab72","0x8cc702081a6439ec", ++ "0x90befffa23631e28","0xa4506cebde82bde9", ++ "0xbef9a3f7b2c67915","0xc67178f2e372532b", ++ "0xca273eceea26619c","0xd186b8c721c0c207", ++ "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178", ++ "0x06f067aa72176fba","0x0a637dc5a2c898a6", ++ "0x113f9804bef90dae","0x1b710b35131c471b", ++ "0x28db77f523047d84","0x32caab7b40c72493", ++ "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c", ++ "0x4cc5d4becb3e42b6","0x597f299cfc657e2a", ++ "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0"); ++$code.=<<___ if (!$LENDIAN); ++.quad 0x0001020304050607,0x1011121314151617 ++___ ++$code.=<<___ if ($LENDIAN); # quad-swapped ++.quad 0x1011121314151617,0x0001020304050607 ++___ ++} else { ++ local *table = sub { ++ foreach(@_) { $code.=".long $_,$_,$_,$_\n"; } ++ }; ++ table( ++ "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5", ++ "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5", ++ "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3", ++ "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174", ++ "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc", ++ "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da", ++ "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7", ++ "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967", ++ "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13", ++ "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85", ++ "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3", ++ "0xd192e819","0xd6990624","0xf40e3585","0x106aa070", ++ "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5", ++ "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3", ++ "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208", ++ "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0"); ++$code.=<<___ if (!$LENDIAN); ++.long 0x00010203,0x10111213,0x10111213,0x10111213 ++.long 0x00010203,0x04050607,0x10111213,0x10111213 ++.long 0x00010203,0x04050607,0x08090a0b,0x10111213 ++___ ++$code.=<<___ if ($LENDIAN); # word-swapped ++.long 0x10111213,0x10111213,0x10111213,0x00010203 ++.long 0x10111213,0x10111213,0x04050607,0x00010203 ++.long 0x10111213,0x08090a0b,0x04050607,0x00010203 ++___ ++} ++$code.=<<___; ++.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by " ++.align 2 ++___ ++ ++$code =~ s/\`([^\`]*)\`/eval $1/gem; ++print $code; ++close STDOUT; diff --git a/openssl-1.0.1e-ppc64le-target.patch b/openssl-1.0.1e-ppc64le-target.patch deleted file mode 100644 index 00d0079..0000000 --- a/openssl-1.0.1e-ppc64le-target.patch +++ /dev/null @@ -1,10 +0,0 @@ ---- openssl-1.0.1e.orig/Configure 2013-08-20 13:42:58.996358664 +1000 -+++ openssl-1.0.1e/Configure 2013-08-20 13:43:54.246608197 +1000 -@@ -357,6 +357,7 @@ - #### - "linux-generic64","gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", - "linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", -+"linux-ppc64le", "gcc:-m64 -DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${no_asm}:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", - "linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", - "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", - "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", diff --git a/openssl-1.0.1e-req-keylen.patch b/openssl-1.0.1e-req-keylen.patch deleted file mode 100644 index 1574bb8..0000000 --- a/openssl-1.0.1e-req-keylen.patch +++ /dev/null @@ -1,38 +0,0 @@ -diff -up openssl-1.0.1e/apps/req.c.keylen openssl-1.0.1e/apps/req.c ---- openssl-1.0.1e/apps/req.c.keylen 2014-02-12 14:58:29.000000000 +0100 -+++ openssl-1.0.1e/apps/req.c 2014-02-14 13:52:48.692325000 +0100 -@@ -644,6 +644,12 @@ bad: - if (inrand) - app_RAND_load_files(inrand); - -+ if (newkey <= 0) -+ { -+ if (!NCONF_get_number(req_conf,SECTION,BITS, &newkey)) -+ newkey=DEFAULT_KEY_LENGTH; -+ } -+ - if (keyalg) - { - genctx = set_keygen_ctx(bio_err, keyalg, &pkey_type, &newkey, -@@ -651,12 +657,6 @@ bad: - if (!genctx) - goto end; - } -- -- if (newkey <= 0) -- { -- if (!NCONF_get_number(req_conf,SECTION,BITS, &newkey)) -- newkey=DEFAULT_KEY_LENGTH; -- } - - if (newkey < MIN_KEY_LENGTH && (pkey_type == EVP_PKEY_RSA || pkey_type == EVP_PKEY_DSA)) - { -@@ -1649,6 +1649,8 @@ static EVP_PKEY_CTX *set_keygen_ctx(BIO - keylen = atol(p + 1); - *pkeylen = keylen; - } -+ else -+ keylen = *pkeylen; - } - else if (p) - paramfile = p + 1; diff --git a/openssl-1.0.1-beta2-rpmbuild.patch b/openssl-1.0.1e-rpmbuild.patch similarity index 91% rename from openssl-1.0.1-beta2-rpmbuild.patch rename to openssl-1.0.1e-rpmbuild.patch index a4bb691..14b2ba9 100644 --- a/openssl-1.0.1-beta2-rpmbuild.patch +++ b/openssl-1.0.1e-rpmbuild.patch @@ -1,7 +1,7 @@ -diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure ---- openssl-1.0.1-beta2/Configure.rpmbuild 2012-01-05 01:07:34.000000000 +0100 -+++ openssl-1.0.1-beta2/Configure 2012-02-02 12:43:56.547409325 +0100 -@@ -343,23 +343,23 @@ my %table=( +diff -up openssl-1.0.1e/Configure.rpmbuild openssl-1.0.1e/Configure +--- openssl-1.0.1e/Configure.rpmbuild 2014-08-13 19:19:53.211005598 +0200 ++++ openssl-1.0.1e/Configure 2014-08-13 19:29:21.704099285 +0200 +@@ -345,24 +345,24 @@ my %table=( #### # *-generic* is endian-neutral target, but ./config is free to # throw in -D[BL]_ENDIAN, whichever appropriate... @@ -21,9 +21,11 @@ diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure #### -"linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", +-"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::", -"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux-generic64","gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", +"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", ++"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64le:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", +"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", @@ -34,7 +36,7 @@ diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure #### So called "highgprs" target for z/Architecture CPUs # "Highgprs" is kernel feature first implemented in Linux 2.6.32, see # /proc/cpuinfo. The idea is to preserve most significant bits of -@@ -373,16 +373,17 @@ my %table=( +@@ -376,16 +376,17 @@ my %table=( # ldconfig and run-time linker to autodiscover. Unfortunately it # doesn't work just yet, because of couple of bugs in glibc # sysdeps/s390/dl-procinfo.c affecting ldconfig and ld.so.1... @@ -56,7 +58,7 @@ diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure #### Alpha Linux with GNU C and Compaq C setups # Special notes: # - linux-alpha+bwx-gcc is ment to be used from ./config only. If you -@@ -396,8 +397,8 @@ my %table=( +@@ -399,8 +400,8 @@ my %table=( # # # @@ -67,7 +69,7 @@ diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure "linux-alpha-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}", "linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}", -@@ -1678,7 +1679,7 @@ while () +@@ -1675,7 +1676,7 @@ while () elsif ($shared_extension ne "" && $shared_extension =~ /^\.s([ol])\.[^\.]*\.[^\.]*$/) { my $sotmp = $1; @@ -76,9 +78,9 @@ diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure } elsif ($shared_extension ne "" && $shared_extension =~ /^\.[^\.]*\.[^\.]*\.dylib$/) { -diff -up openssl-1.0.1-beta2/Makefile.org.rpmbuild openssl-1.0.1-beta2/Makefile.org ---- openssl-1.0.1-beta2/Makefile.org.rpmbuild 2011-12-27 16:17:50.000000000 +0100 -+++ openssl-1.0.1-beta2/Makefile.org 2012-02-02 12:30:23.652495435 +0100 +diff -up openssl-1.0.1e/Makefile.org.rpmbuild openssl-1.0.1e/Makefile.org +--- openssl-1.0.1e/Makefile.org.rpmbuild 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/Makefile.org 2014-08-13 19:19:53.218005759 +0200 @@ -10,6 +10,7 @@ SHLIB_VERSION_HISTORY= SHLIB_MAJOR= SHLIB_MINOR= diff --git a/openssl-1.0.1e-fips.patch b/openssl-1.0.1g-fips.patch similarity index 95% rename from openssl-1.0.1e-fips.patch rename to openssl-1.0.1g-fips.patch index f5496a0..449ca1c 100644 --- a/openssl-1.0.1e-fips.patch +++ b/openssl-1.0.1g-fips.patch @@ -1,31 +1,6 @@ -diff -up openssl-1.0.1e/apps/pkcs12.c.fips openssl-1.0.1e/apps/pkcs12.c ---- openssl-1.0.1e/apps/pkcs12.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/apps/pkcs12.c 2013-10-04 11:48:04.172693955 +0200 -@@ -67,6 +67,9 @@ - #include - #include - #include -+#ifdef OPENSSL_FIPS -+#include -+#endif - - #define PROG pkcs12_main - -@@ -130,6 +133,11 @@ int MAIN(int argc, char **argv) - - apps_startup(); - -+#ifdef OPENSSL_FIPS -+ if (FIPS_mode()) -+ cert_pbe = key_pbe; /* cannot use RC2 in the FIPS mode */ -+#endif -+ - enc = EVP_des_ede3_cbc(); - if (bio_err == NULL ) bio_err = BIO_new_fp (stderr, BIO_NOCLOSE); - -diff -up openssl-1.0.1e/apps/speed.c.fips openssl-1.0.1e/apps/speed.c ---- openssl-1.0.1e/apps/speed.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/apps/speed.c 2013-10-04 11:49:56.384227859 +0200 +diff -up openssl-1.0.1g/apps/speed.c.fips openssl-1.0.1g/apps/speed.c +--- openssl-1.0.1g/apps/speed.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/apps/speed.c 2014-05-06 16:29:50.536922993 +0200 @@ -195,7 +195,6 @@ #ifdef OPENSSL_DOING_MAKEDEPEND #undef AES_set_encrypt_key @@ -151,10 +126,10 @@ diff -up openssl-1.0.1e/apps/speed.c.fips openssl-1.0.1e/apps/speed.c HMAC_Init_ex(&hctx,(unsigned char *)"This is a key...", 16,EVP_md5(), NULL); -diff -up openssl-1.0.1e/Configure.fips openssl-1.0.1e/Configure ---- openssl-1.0.1e/Configure.fips 2013-10-04 11:48:04.153693526 +0200 -+++ openssl-1.0.1e/Configure 2013-10-04 11:48:04.173693978 +0200 -@@ -995,11 +995,6 @@ if (defined($disabled{"md5"}) || defined +diff -up openssl-1.0.1g/Configure.fips openssl-1.0.1g/Configure +--- openssl-1.0.1g/Configure.fips 2014-05-06 16:29:50.523922693 +0200 ++++ openssl-1.0.1g/Configure 2014-05-06 16:29:50.536922993 +0200 +@@ -997,11 +997,6 @@ if (defined($disabled{"md5"}) || defined $disabled{"ssl2"} = "forced"; } @@ -166,7 +141,7 @@ diff -up openssl-1.0.1e/Configure.fips openssl-1.0.1e/Configure # RSAX ENGINE sets default non-FIPS RSA method. if ($fips) { -@@ -1474,7 +1469,6 @@ $cflags.=" -DOPENSSL_BN_ASM_GF2m" if ($b +@@ -1476,7 +1471,6 @@ $cflags.=" -DOPENSSL_BN_ASM_GF2m" if ($b if ($fips) { $openssl_other_defines.="#define OPENSSL_FIPS\n"; @@ -174,7 +149,7 @@ diff -up openssl-1.0.1e/Configure.fips openssl-1.0.1e/Configure } $cpuid_obj="mem_clr.o" unless ($cpuid_obj =~ /\.o$/); -@@ -1661,9 +1655,12 @@ while () +@@ -1663,9 +1657,12 @@ while () s/^FIPSDIR=.*/FIPSDIR=$fipsdir/; s/^FIPSLIBDIR=.*/FIPSLIBDIR=$fipslibdir/; @@ -188,9 +163,9 @@ diff -up openssl-1.0.1e/Configure.fips openssl-1.0.1e/Configure s/^SHLIB_TARGET=.*/SHLIB_TARGET=$shared_target/; s/^SHLIB_MARK=.*/SHLIB_MARK=$shared_mark/; s/^SHARED_LIBS=.*/SHARED_LIBS=\$(SHARED_CRYPTO) \$(SHARED_SSL)/ if (!$no_shared); -diff -up openssl-1.0.1e/crypto/aes/aes_misc.c.fips openssl-1.0.1e/crypto/aes/aes_misc.c ---- openssl-1.0.1e/crypto/aes/aes_misc.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/aes/aes_misc.c 2013-10-04 11:48:04.173693978 +0200 +diff -up openssl-1.0.1g/crypto/aes/aes_misc.c.fips openssl-1.0.1g/crypto/aes/aes_misc.c +--- openssl-1.0.1g/crypto/aes/aes_misc.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/aes/aes_misc.c 2014-05-06 16:29:50.536922993 +0200 @@ -69,17 +69,11 @@ const char *AES_options(void) { int AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key) @@ -209,9 +184,9 @@ diff -up openssl-1.0.1e/crypto/aes/aes_misc.c.fips openssl-1.0.1e/crypto/aes/aes -#endif return private_AES_set_decrypt_key(userKey, bits, key); } -diff -up openssl-1.0.1e/crypto/cmac/cmac.c.fips openssl-1.0.1e/crypto/cmac/cmac.c ---- openssl-1.0.1e/crypto/cmac/cmac.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/cmac/cmac.c 2013-10-04 11:48:04.173693978 +0200 +diff -up openssl-1.0.1g/crypto/cmac/cmac.c.fips openssl-1.0.1g/crypto/cmac/cmac.c +--- openssl-1.0.1g/crypto/cmac/cmac.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/cmac/cmac.c 2014-05-06 16:29:50.537923016 +0200 @@ -107,13 +107,6 @@ CMAC_CTX *CMAC_CTX_new(void) void CMAC_CTX_cleanup(CMAC_CTX *ctx) @@ -260,9 +235,9 @@ diff -up openssl-1.0.1e/crypto/cmac/cmac.c.fips openssl-1.0.1e/crypto/cmac/cmac. if (ctx->nlast_block == -1) return 0; bl = EVP_CIPHER_CTX_block_size(&ctx->cctx); -diff -up openssl-1.0.1e/crypto/crypto.h.fips openssl-1.0.1e/crypto/crypto.h ---- openssl-1.0.1e/crypto/crypto.h.fips 2013-10-04 11:48:04.058691381 +0200 -+++ openssl-1.0.1e/crypto/crypto.h 2013-10-04 11:48:04.173693978 +0200 +diff -up openssl-1.0.1g/crypto/crypto.h.fips openssl-1.0.1g/crypto/crypto.h +--- openssl-1.0.1g/crypto/crypto.h.fips 2014-05-06 16:29:50.419920288 +0200 ++++ openssl-1.0.1g/crypto/crypto.h 2014-05-06 16:29:50.537923016 +0200 @@ -553,24 +553,29 @@ int FIPS_mode_set(int r); void OPENSSL_init(void); @@ -308,9 +283,9 @@ diff -up openssl-1.0.1e/crypto/crypto.h.fips openssl-1.0.1e/crypto/crypto.h /* Error codes for the CRYPTO functions. */ /* Function codes. */ -diff -up openssl-1.0.1e/crypto/des/des.h.fips openssl-1.0.1e/crypto/des/des.h ---- openssl-1.0.1e/crypto/des/des.h.fips 2013-10-04 11:48:04.088692058 +0200 -+++ openssl-1.0.1e/crypto/des/des.h 2013-10-04 11:48:04.173693978 +0200 +diff -up openssl-1.0.1g/crypto/des/des.h.fips openssl-1.0.1g/crypto/des/des.h +--- openssl-1.0.1g/crypto/des/des.h.fips 2014-05-06 16:29:50.449920982 +0200 ++++ openssl-1.0.1g/crypto/des/des.h 2014-05-06 16:29:50.537923016 +0200 @@ -224,9 +224,6 @@ int DES_set_key(const_DES_cblock *key,DE int DES_key_sched(const_DES_cblock *key,DES_key_schedule *schedule); int DES_set_key_checked(const_DES_cblock *key,DES_key_schedule *schedule); @@ -321,9 +296,9 @@ diff -up openssl-1.0.1e/crypto/des/des.h.fips openssl-1.0.1e/crypto/des/des.h void DES_string_to_key(const char *str,DES_cblock *key); void DES_string_to_2keys(const char *str,DES_cblock *key1,DES_cblock *key2); void DES_cfb64_encrypt(const unsigned char *in,unsigned char *out,long length, -diff -up openssl-1.0.1e/crypto/des/set_key.c.fips openssl-1.0.1e/crypto/des/set_key.c ---- openssl-1.0.1e/crypto/des/set_key.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/des/set_key.c 2013-10-04 11:48:04.174694001 +0200 +diff -up openssl-1.0.1g/crypto/des/set_key.c.fips openssl-1.0.1g/crypto/des/set_key.c +--- openssl-1.0.1g/crypto/des/set_key.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/des/set_key.c 2014-05-06 16:29:50.537923016 +0200 @@ -336,13 +336,6 @@ int DES_set_key_checked(const_DES_cblock } @@ -338,9 +313,9 @@ diff -up openssl-1.0.1e/crypto/des/set_key.c.fips openssl-1.0.1e/crypto/des/set_ { static const int shifts2[16]={0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0}; register DES_LONG c,d,t,s,t2; -diff -up openssl-1.0.1e/crypto/dh/dh_gen.c.fips openssl-1.0.1e/crypto/dh/dh_gen.c ---- openssl-1.0.1e/crypto/dh/dh_gen.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/dh/dh_gen.c 2013-10-04 11:48:04.174694001 +0200 +diff -up openssl-1.0.1g/crypto/dh/dh_gen.c.fips openssl-1.0.1g/crypto/dh/dh_gen.c +--- openssl-1.0.1g/crypto/dh/dh_gen.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/dh/dh_gen.c 2014-05-06 16:29:50.537923016 +0200 @@ -84,11 +84,6 @@ int DH_generate_parameters_ex(DH *ret, i #endif if(ret->meth->generate_params) @@ -374,9 +349,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh_gen.c.fips openssl-1.0.1e/crypto/dh/dh_gen. ctx=BN_CTX_new(); if (ctx == NULL) goto err; BN_CTX_start(ctx); -diff -up openssl-1.0.1e/crypto/dh/dh.h.fips openssl-1.0.1e/crypto/dh/dh.h ---- openssl-1.0.1e/crypto/dh/dh.h.fips 2014-02-06 18:04:19.000000000 +0100 -+++ openssl-1.0.1e/crypto/dh/dh.h 2014-02-11 16:01:17.039345356 +0100 +diff -up openssl-1.0.1g/crypto/dh/dh.h.fips openssl-1.0.1g/crypto/dh/dh.h +--- openssl-1.0.1g/crypto/dh/dh.h.fips 2014-05-06 16:29:50.394919710 +0200 ++++ openssl-1.0.1g/crypto/dh/dh.h 2014-05-06 16:29:50.537923016 +0200 @@ -77,6 +77,8 @@ # define OPENSSL_DH_MAX_MODULUS_BITS 10000 #endif @@ -394,9 +369,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh.h.fips openssl-1.0.1e/crypto/dh/dh.h DH * d2i_DHparams(DH **a,const unsigned char **pp, long length); int i2d_DHparams(const DH *a,unsigned char **pp); #ifndef OPENSSL_NO_FP_API -diff -up openssl-1.0.1e/crypto/dh/dh_key.c.fips openssl-1.0.1e/crypto/dh/dh_key.c ---- openssl-1.0.1e/crypto/dh/dh_key.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/dh/dh_key.c 2014-02-11 15:57:55.266840301 +0100 +diff -up openssl-1.0.1g/crypto/dh/dh_key.c.fips openssl-1.0.1g/crypto/dh/dh_key.c +--- openssl-1.0.1g/crypto/dh/dh_key.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/dh/dh_key.c 2014-05-06 16:29:50.538923040 +0200 @@ -61,6 +61,9 @@ #include #include @@ -477,9 +452,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh_key.c.fips openssl-1.0.1e/crypto/dh/dh_key. dh->flags |= DH_FLAG_CACHE_MONT_P; return(1); } -diff -up openssl-1.0.1e/crypto/dh/dh_lib.c.fips openssl-1.0.1e/crypto/dh/dh_lib.c ---- openssl-1.0.1e/crypto/dh/dh_lib.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/dh/dh_lib.c 2013-10-04 11:48:04.174694001 +0200 +diff -up openssl-1.0.1g/crypto/dh/dh_lib.c.fips openssl-1.0.1g/crypto/dh/dh_lib.c +--- openssl-1.0.1g/crypto/dh/dh_lib.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/dh/dh_lib.c 2014-05-06 16:29:50.538923040 +0200 @@ -81,14 +81,7 @@ const DH_METHOD *DH_get_default_method(v { if(!default_DH_method) @@ -495,9 +470,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh_lib.c.fips openssl-1.0.1e/crypto/dh/dh_lib. } return default_DH_method; } -diff -up openssl-1.0.1e/crypto/dsa/dsa_err.c.fips openssl-1.0.1e/crypto/dsa/dsa_err.c ---- openssl-1.0.1e/crypto/dsa/dsa_err.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/dsa/dsa_err.c 2013-10-04 11:48:04.174694001 +0200 +diff -up openssl-1.0.1g/crypto/dsa/dsa_err.c.fips openssl-1.0.1g/crypto/dsa/dsa_err.c +--- openssl-1.0.1g/crypto/dsa/dsa_err.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/dsa/dsa_err.c 2014-05-06 16:29:50.538923040 +0200 @@ -74,6 +74,8 @@ static ERR_STRING_DATA DSA_str_functs[]= {ERR_FUNC(DSA_F_DO_DSA_PRINT), "DO_DSA_PRINT"}, {ERR_FUNC(DSA_F_DSAPARAMS_PRINT), "DSAparams_print"}, @@ -516,9 +491,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_err.c.fips openssl-1.0.1e/crypto/dsa/dsa_ {ERR_REASON(DSA_R_MISSING_PARAMETERS) ,"missing parameters"}, {ERR_REASON(DSA_R_MODULUS_TOO_LARGE) ,"modulus too large"}, {ERR_REASON(DSA_R_NEED_NEW_SETUP_VALUES) ,"need new setup values"}, -diff -up openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips openssl-1.0.1e/crypto/dsa/dsa_gen.c ---- openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/dsa/dsa_gen.c 2013-10-04 11:48:04.175694023 +0200 +diff -up openssl-1.0.1g/crypto/dsa/dsa_gen.c.fips openssl-1.0.1g/crypto/dsa/dsa_gen.c +--- openssl-1.0.1g/crypto/dsa/dsa_gen.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/dsa/dsa_gen.c 2014-05-06 16:29:50.538923040 +0200 @@ -85,6 +85,14 @@ #include #endif @@ -925,9 +900,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips openssl-1.0.1e/crypto/dsa/dsa_ } if (mont != NULL) BN_MONT_CTX_free(mont); return ok; -diff -up openssl-1.0.1e/crypto/dsa/dsa.h.fips openssl-1.0.1e/crypto/dsa/dsa.h ---- openssl-1.0.1e/crypto/dsa/dsa.h.fips 2013-10-04 11:48:03.956689078 +0200 -+++ openssl-1.0.1e/crypto/dsa/dsa.h 2013-10-04 11:48:04.175694023 +0200 +diff -up openssl-1.0.1g/crypto/dsa/dsa.h.fips openssl-1.0.1g/crypto/dsa/dsa.h +--- openssl-1.0.1g/crypto/dsa/dsa.h.fips 2014-05-06 16:29:50.316917907 +0200 ++++ openssl-1.0.1g/crypto/dsa/dsa.h 2014-05-06 16:29:50.538923040 +0200 @@ -88,6 +88,8 @@ # define OPENSSL_DSA_MAX_MODULUS_BITS 10000 #endif @@ -988,9 +963,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa.h.fips openssl-1.0.1e/crypto/dsa/dsa.h #define DSA_R_PARAMETER_ENCODING_ERROR 105 #ifdef __cplusplus -diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips openssl-1.0.1e/crypto/dsa/dsa_key.c ---- openssl-1.0.1e/crypto/dsa/dsa_key.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/dsa/dsa_key.c 2013-10-04 11:48:04.175694023 +0200 +diff -up openssl-1.0.1g/crypto/dsa/dsa_key.c.fips openssl-1.0.1g/crypto/dsa/dsa_key.c +--- openssl-1.0.1g/crypto/dsa/dsa_key.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/dsa/dsa_key.c 2014-05-06 16:29:50.539923063 +0200 @@ -66,6 +66,35 @@ #ifdef OPENSSL_FIPS @@ -1008,7 +983,7 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips openssl-1.0.1e/crypto/dsa/dsa_ + + EVP_PKEY_set1_DSA(pk, dsa); + -+ if (fips_pkey_signature_test(pk, tbs, 0, NULL, 0, NULL, 0, NULL)) ++ if (fips_pkey_signature_test(pk, tbs, -1, NULL, 0, NULL, 0, NULL)) + ret = 1; + + err: @@ -1069,9 +1044,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips openssl-1.0.1e/crypto/dsa/dsa_ ok=1; err: -diff -up openssl-1.0.1e/crypto/dsa/dsa_lib.c.fips openssl-1.0.1e/crypto/dsa/dsa_lib.c ---- openssl-1.0.1e/crypto/dsa/dsa_lib.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/dsa/dsa_lib.c 2013-10-04 11:48:04.175694023 +0200 +diff -up openssl-1.0.1g/crypto/dsa/dsa_lib.c.fips openssl-1.0.1g/crypto/dsa/dsa_lib.c +--- openssl-1.0.1g/crypto/dsa/dsa_lib.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/dsa/dsa_lib.c 2014-05-06 16:29:50.539923063 +0200 @@ -87,14 +87,7 @@ const DSA_METHOD *DSA_get_default_method { if(!default_DSA_method) @@ -1087,18 +1062,18 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_lib.c.fips openssl-1.0.1e/crypto/dsa/dsa_ } return default_DSA_method; } -diff -up openssl-1.0.1e/crypto/dsa/dsa_locl.h.fips openssl-1.0.1e/crypto/dsa/dsa_locl.h ---- openssl-1.0.1e/crypto/dsa/dsa_locl.h.fips 2013-10-04 11:48:03.958689123 +0200 -+++ openssl-1.0.1e/crypto/dsa/dsa_locl.h 2013-10-04 11:48:04.175694023 +0200 +diff -up openssl-1.0.1g/crypto/dsa/dsa_locl.h.fips openssl-1.0.1g/crypto/dsa/dsa_locl.h +--- openssl-1.0.1g/crypto/dsa/dsa_locl.h.fips 2014-05-06 16:29:50.317917930 +0200 ++++ openssl-1.0.1g/crypto/dsa/dsa_locl.h 2014-05-06 16:29:50.539923063 +0200 @@ -56,5 +56,4 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, const EVP_MD *evpmd, const unsigned char *seed_in, size_t seed_len, - unsigned char *seed_out, int *counter_ret, unsigned long *h_ret, BN_GENCB *cb); -diff -up openssl-1.0.1e/crypto/dsa/dsa_ossl.c.fips openssl-1.0.1e/crypto/dsa/dsa_ossl.c ---- openssl-1.0.1e/crypto/dsa/dsa_ossl.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/dsa/dsa_ossl.c 2013-10-04 11:48:04.175694023 +0200 +diff -up openssl-1.0.1g/crypto/dsa/dsa_ossl.c.fips openssl-1.0.1g/crypto/dsa/dsa_ossl.c +--- openssl-1.0.1g/crypto/dsa/dsa_ossl.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/dsa/dsa_ossl.c 2014-05-06 16:29:50.539923063 +0200 @@ -65,6 +65,9 @@ #include #include @@ -1172,9 +1147,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_ossl.c.fips openssl-1.0.1e/crypto/dsa/dsa dsa->flags|=DSA_FLAG_CACHE_MONT_P; return(1); } -diff -up openssl-1.0.1e/crypto/dsa/dsa_pmeth.c.fips openssl-1.0.1e/crypto/dsa/dsa_pmeth.c ---- openssl-1.0.1e/crypto/dsa/dsa_pmeth.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/dsa/dsa_pmeth.c 2013-10-04 11:48:04.175694023 +0200 +diff -up openssl-1.0.1g/crypto/dsa/dsa_pmeth.c.fips openssl-1.0.1g/crypto/dsa/dsa_pmeth.c +--- openssl-1.0.1g/crypto/dsa/dsa_pmeth.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/dsa/dsa_pmeth.c 2014-05-06 16:29:50.539923063 +0200 @@ -255,7 +255,7 @@ static int pkey_dsa_paramgen(EVP_PKEY_CT if (!dsa) return 0; @@ -1184,9 +1159,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_pmeth.c.fips openssl-1.0.1e/crypto/dsa/ds if (ret) EVP_PKEY_assign_DSA(pkey, dsa); else -diff -up openssl-1.0.1e/crypto/dsa/dsatest.c.fips openssl-1.0.1e/crypto/dsa/dsatest.c ---- openssl-1.0.1e/crypto/dsa/dsatest.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/dsa/dsatest.c 2013-10-04 11:48:04.176694045 +0200 +diff -up openssl-1.0.1g/crypto/dsa/dsatest.c.fips openssl-1.0.1g/crypto/dsa/dsatest.c +--- openssl-1.0.1g/crypto/dsa/dsatest.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/dsa/dsatest.c 2014-05-06 16:29:50.539923063 +0200 @@ -96,36 +96,41 @@ static int MS_CALLBACK dsa_cb(int p, int /* seed, out_p, out_q, out_g are taken from the updated Appendix 5 to * FIPS PUB 186 and also appear in Appendix 5 to FIPS PIB 186-1 */ @@ -1271,9 +1246,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsatest.c.fips openssl-1.0.1e/crypto/dsa/dsat goto end; } if (h != 2) -diff -up openssl-1.0.1e/crypto/engine/eng_all.c.fips openssl-1.0.1e/crypto/engine/eng_all.c ---- openssl-1.0.1e/crypto/engine/eng_all.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/engine/eng_all.c 2013-10-04 11:48:04.176694045 +0200 +diff -up openssl-1.0.1g/crypto/engine/eng_all.c.fips openssl-1.0.1g/crypto/engine/eng_all.c +--- openssl-1.0.1g/crypto/engine/eng_all.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/engine/eng_all.c 2014-05-06 16:29:50.539923063 +0200 @@ -58,11 +58,25 @@ #include "cryptlib.h" @@ -1300,9 +1275,9 @@ diff -up openssl-1.0.1e/crypto/engine/eng_all.c.fips openssl-1.0.1e/crypto/engin #if 0 /* There's no longer any need for an "openssl" ENGINE unless, one day, * it is the *only* way for standard builtin implementations to be be -diff -up openssl-1.0.1e/crypto/evp/c_allc.c.fips openssl-1.0.1e/crypto/evp/c_allc.c ---- openssl-1.0.1e/crypto/evp/c_allc.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/c_allc.c 2013-10-04 11:48:04.176694045 +0200 +diff -up openssl-1.0.1g/crypto/evp/c_allc.c.fips openssl-1.0.1g/crypto/evp/c_allc.c +--- openssl-1.0.1g/crypto/evp/c_allc.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/c_allc.c 2014-05-06 16:29:50.540923086 +0200 @@ -65,6 +65,11 @@ void OpenSSL_add_all_ciphers(void) { @@ -1376,9 +1351,9 @@ diff -up openssl-1.0.1e/crypto/evp/c_allc.c.fips openssl-1.0.1e/crypto/evp/c_all + } +#endif } -diff -up openssl-1.0.1e/crypto/evp/c_alld.c.fips openssl-1.0.1e/crypto/evp/c_alld.c ---- openssl-1.0.1e/crypto/evp/c_alld.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/c_alld.c 2013-10-04 11:48:04.176694045 +0200 +diff -up openssl-1.0.1g/crypto/evp/c_alld.c.fips openssl-1.0.1g/crypto/evp/c_alld.c +--- openssl-1.0.1g/crypto/evp/c_alld.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/c_alld.c 2014-05-06 16:29:50.540923086 +0200 @@ -64,6 +64,11 @@ void OpenSSL_add_all_digests(void) @@ -1424,9 +1399,9 @@ diff -up openssl-1.0.1e/crypto/evp/c_alld.c.fips openssl-1.0.1e/crypto/evp/c_all + } +#endif } -diff -up openssl-1.0.1e/crypto/evp/digest.c.fips openssl-1.0.1e/crypto/evp/digest.c ---- openssl-1.0.1e/crypto/evp/digest.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/digest.c 2013-10-04 11:48:04.176694045 +0200 +diff -up openssl-1.0.1g/crypto/evp/digest.c.fips openssl-1.0.1g/crypto/evp/digest.c +--- openssl-1.0.1g/crypto/evp/digest.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/digest.c 2014-05-06 16:29:50.540923086 +0200 @@ -142,9 +142,50 @@ int EVP_DigestInit(EVP_MD_CTX *ctx, cons return EVP_DigestInit_ex(ctx, type, NULL); } @@ -1549,7 +1524,7 @@ diff -up openssl-1.0.1e/crypto/evp/digest.c.fips openssl-1.0.1e/crypto/evp/diges } int EVP_MD_CTX_copy(EVP_MD_CTX *out, const EVP_MD_CTX *in) -@@ -373,7 +414,6 @@ void EVP_MD_CTX_destroy(EVP_MD_CTX *ctx) +@@ -376,7 +417,6 @@ void EVP_MD_CTX_destroy(EVP_MD_CTX *ctx) /* This call frees resources associated with the context */ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx) { @@ -1557,7 +1532,7 @@ diff -up openssl-1.0.1e/crypto/evp/digest.c.fips openssl-1.0.1e/crypto/evp/diges /* Don't assume ctx->md_data was cleaned in EVP_Digest_Final, * because sometimes only copies of the context are ever finalised. */ -@@ -386,7 +426,6 @@ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx) +@@ -389,7 +429,6 @@ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx) OPENSSL_cleanse(ctx->md_data,ctx->digest->ctx_size); OPENSSL_free(ctx->md_data); } @@ -1565,7 +1540,7 @@ diff -up openssl-1.0.1e/crypto/evp/digest.c.fips openssl-1.0.1e/crypto/evp/diges if (ctx->pctx) EVP_PKEY_CTX_free(ctx->pctx); #ifndef OPENSSL_NO_ENGINE -@@ -395,9 +434,6 @@ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx) +@@ -398,9 +437,6 @@ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx) * functional reference we held for this reason. */ ENGINE_finish(ctx->engine); #endif @@ -1575,9 +1550,9 @@ diff -up openssl-1.0.1e/crypto/evp/digest.c.fips openssl-1.0.1e/crypto/evp/diges memset(ctx,'\0',sizeof *ctx); return 1; -diff -up openssl-1.0.1e/crypto/evp/e_aes.c.fips openssl-1.0.1e/crypto/evp/e_aes.c ---- openssl-1.0.1e/crypto/evp/e_aes.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/e_aes.c 2013-10-04 11:48:04.177694068 +0200 +diff -up openssl-1.0.1g/crypto/evp/e_aes.c.fips openssl-1.0.1g/crypto/evp/e_aes.c +--- openssl-1.0.1g/crypto/evp/e_aes.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/e_aes.c 2014-05-06 16:29:50.540923086 +0200 @@ -56,7 +56,6 @@ #include #include @@ -1595,7 +1570,7 @@ diff -up openssl-1.0.1e/crypto/evp/e_aes.c.fips openssl-1.0.1e/crypto/evp/e_aes. && arg < 12) return 0; #endif -@@ -1128,7 +1127,7 @@ static int aes_xts_cipher(EVP_CIPHER_CTX +@@ -1134,7 +1133,7 @@ static int aes_xts_cipher(EVP_CIPHER_CTX return 0; #ifdef OPENSSL_FIPS /* Requirement of SP800-38E */ @@ -1604,14 +1579,14 @@ diff -up openssl-1.0.1e/crypto/evp/e_aes.c.fips openssl-1.0.1e/crypto/evp/e_aes. (len > (1UL<<20)*16)) { EVPerr(EVP_F_AES_XTS_CIPHER, EVP_R_TOO_LARGE); -@@ -1311,4 +1310,3 @@ BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm +@@ -1317,4 +1316,3 @@ BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm BLOCK_CIPHER_custom(NID_aes,256,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) #endif -#endif -diff -up openssl-1.0.1e/crypto/evp/e_des3.c.fips openssl-1.0.1e/crypto/evp/e_des3.c ---- openssl-1.0.1e/crypto/evp/e_des3.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/e_des3.c 2013-10-04 11:48:04.177694068 +0200 +diff -up openssl-1.0.1g/crypto/evp/e_des3.c.fips openssl-1.0.1g/crypto/evp/e_des3.c +--- openssl-1.0.1g/crypto/evp/e_des3.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/e_des3.c 2014-05-06 16:29:50.540923086 +0200 @@ -65,8 +65,6 @@ #include #include @@ -1670,9 +1645,9 @@ diff -up openssl-1.0.1e/crypto/evp/e_des3.c.fips openssl-1.0.1e/crypto/evp/e_des } #endif -#endif -diff -up openssl-1.0.1e/crypto/evp/e_null.c.fips openssl-1.0.1e/crypto/evp/e_null.c ---- openssl-1.0.1e/crypto/evp/e_null.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/e_null.c 2013-10-04 11:48:04.177694068 +0200 +diff -up openssl-1.0.1g/crypto/evp/e_null.c.fips openssl-1.0.1g/crypto/evp/e_null.c +--- openssl-1.0.1g/crypto/evp/e_null.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/e_null.c 2014-05-06 16:29:50.540923086 +0200 @@ -61,8 +61,6 @@ #include #include @@ -1696,9 +1671,9 @@ diff -up openssl-1.0.1e/crypto/evp/e_null.c.fips openssl-1.0.1e/crypto/evp/e_nul return 1; } -#endif -diff -up openssl-1.0.1e/crypto/evp/evp_enc.c.fips openssl-1.0.1e/crypto/evp/evp_enc.c ---- openssl-1.0.1e/crypto/evp/evp_enc.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/evp_enc.c 2013-10-04 11:48:04.177694068 +0200 +diff -up openssl-1.0.1g/crypto/evp/evp_enc.c.fips openssl-1.0.1g/crypto/evp/evp_enc.c +--- openssl-1.0.1g/crypto/evp/evp_enc.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/evp_enc.c 2014-05-06 16:29:50.541923109 +0200 @@ -69,17 +69,58 @@ #endif #include "evp_locl.h" @@ -1847,9 +1822,9 @@ diff -up openssl-1.0.1e/crypto/evp/evp_enc.c.fips openssl-1.0.1e/crypto/evp/evp_ memset(c,0,sizeof(EVP_CIPHER_CTX)); return 1; } -diff -up openssl-1.0.1e/crypto/evp/evp.h.fips openssl-1.0.1e/crypto/evp/evp.h ---- openssl-1.0.1e/crypto/evp/evp.h.fips 2013-10-04 11:48:04.071691675 +0200 -+++ openssl-1.0.1e/crypto/evp/evp.h 2013-10-04 11:48:04.177694068 +0200 +diff -up openssl-1.0.1g/crypto/evp/evp.h.fips openssl-1.0.1g/crypto/evp/evp.h +--- openssl-1.0.1g/crypto/evp/evp.h.fips 2014-05-06 16:29:50.432920589 +0200 ++++ openssl-1.0.1g/crypto/evp/evp.h 2014-05-06 16:29:50.541923109 +0200 @@ -75,6 +75,10 @@ #include #endif @@ -1902,9 +1877,9 @@ diff -up openssl-1.0.1e/crypto/evp/evp.h.fips openssl-1.0.1e/crypto/evp/evp.h /* Cipher handles any and all padding logic as well * as finalisation. */ -diff -up openssl-1.0.1e/crypto/evp/evp_lib.c.fips openssl-1.0.1e/crypto/evp/evp_lib.c ---- openssl-1.0.1e/crypto/evp/evp_lib.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/evp_lib.c 2013-10-04 11:48:04.177694068 +0200 +diff -up openssl-1.0.1g/crypto/evp/evp_lib.c.fips openssl-1.0.1g/crypto/evp/evp_lib.c +--- openssl-1.0.1g/crypto/evp/evp_lib.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/evp_lib.c 2014-05-06 16:29:50.541923109 +0200 @@ -190,6 +190,9 @@ int EVP_CIPHER_CTX_block_size(const EVP_ int EVP_Cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, unsigned int inl) @@ -1915,9 +1890,9 @@ diff -up openssl-1.0.1e/crypto/evp/evp_lib.c.fips openssl-1.0.1e/crypto/evp/evp_ return ctx->cipher->do_cipher(ctx,out,in,inl); } -diff -up openssl-1.0.1e/crypto/evp/evp_locl.h.fips openssl-1.0.1e/crypto/evp/evp_locl.h ---- openssl-1.0.1e/crypto/evp/evp_locl.h.fips 2013-10-04 11:48:04.067691584 +0200 -+++ openssl-1.0.1e/crypto/evp/evp_locl.h 2013-10-04 11:48:04.178694091 +0200 +diff -up openssl-1.0.1g/crypto/evp/evp_locl.h.fips openssl-1.0.1g/crypto/evp/evp_locl.h +--- openssl-1.0.1g/crypto/evp/evp_locl.h.fips 2014-05-06 16:29:50.428920496 +0200 ++++ openssl-1.0.1g/crypto/evp/evp_locl.h 2014-05-06 16:29:50.541923109 +0200 @@ -258,10 +258,9 @@ const EVP_CIPHER *EVP_##cname##_ecb(void BLOCK_CIPHER_func_cfb(cipher##_##keysize,cprefix,cbits,kstruct,ksched) \ BLOCK_CIPHER_def_cfb(cipher##_##keysize,kstruct, \ @@ -1952,9 +1927,9 @@ diff -up openssl-1.0.1e/crypto/evp/evp_locl.h.fips openssl-1.0.1e/crypto/evp/evp #define Camellia_set_key private_Camellia_set_key #endif -diff -up openssl-1.0.1e/crypto/evp/Makefile.fips openssl-1.0.1e/crypto/evp/Makefile ---- openssl-1.0.1e/crypto/evp/Makefile.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/Makefile 2013-10-04 11:48:04.178694091 +0200 +diff -up openssl-1.0.1g/crypto/evp/Makefile.fips openssl-1.0.1g/crypto/evp/Makefile +--- openssl-1.0.1g/crypto/evp/Makefile.fips 2014-04-07 18:55:33.000000000 +0200 ++++ openssl-1.0.1g/crypto/evp/Makefile 2014-05-06 16:29:50.541923109 +0200 @@ -28,7 +28,7 @@ LIBSRC= encode.c digest.c evp_enc.c evp_ bio_md.c bio_b64.c bio_enc.c evp_err.c e_null.c \ c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ @@ -1973,9 +1948,9 @@ diff -up openssl-1.0.1e/crypto/evp/Makefile.fips openssl-1.0.1e/crypto/evp/Makef e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o SRC= $(LIBSRC) -diff -up openssl-1.0.1e/crypto/evp/m_dss.c.fips openssl-1.0.1e/crypto/evp/m_dss.c ---- openssl-1.0.1e/crypto/evp/m_dss.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/m_dss.c 2013-10-04 11:48:04.178694091 +0200 +diff -up openssl-1.0.1g/crypto/evp/m_dss.c.fips openssl-1.0.1g/crypto/evp/m_dss.c +--- openssl-1.0.1g/crypto/evp/m_dss.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/m_dss.c 2014-05-06 16:29:50.542923132 +0200 @@ -66,7 +66,6 @@ #endif @@ -1998,9 +1973,9 @@ diff -up openssl-1.0.1e/crypto/evp/m_dss.c.fips openssl-1.0.1e/crypto/evp/m_dss. } #endif -#endif -diff -up openssl-1.0.1e/crypto/evp/m_dss1.c.fips openssl-1.0.1e/crypto/evp/m_dss1.c ---- openssl-1.0.1e/crypto/evp/m_dss1.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/m_dss1.c 2013-10-04 11:48:04.178694091 +0200 +diff -up openssl-1.0.1g/crypto/evp/m_dss1.c.fips openssl-1.0.1g/crypto/evp/m_dss1.c +--- openssl-1.0.1g/crypto/evp/m_dss1.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/m_dss1.c 2014-05-06 16:29:50.542923132 +0200 @@ -68,8 +68,6 @@ #include #endif @@ -2024,9 +1999,9 @@ diff -up openssl-1.0.1e/crypto/evp/m_dss1.c.fips openssl-1.0.1e/crypto/evp/m_dss } #endif -#endif -diff -up openssl-1.0.1e/crypto/evp/m_md2.c.fips openssl-1.0.1e/crypto/evp/m_md2.c ---- openssl-1.0.1e/crypto/evp/m_md2.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/m_md2.c 2013-10-04 11:48:04.178694091 +0200 +diff -up openssl-1.0.1g/crypto/evp/m_md2.c.fips openssl-1.0.1g/crypto/evp/m_md2.c +--- openssl-1.0.1g/crypto/evp/m_md2.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/m_md2.c 2014-05-06 16:29:50.542923132 +0200 @@ -68,6 +68,7 @@ #ifndef OPENSSL_NO_RSA #include @@ -2035,9 +2010,9 @@ diff -up openssl-1.0.1e/crypto/evp/m_md2.c.fips openssl-1.0.1e/crypto/evp/m_md2. static int init(EVP_MD_CTX *ctx) { return MD2_Init(ctx->md_data); } -diff -up openssl-1.0.1e/crypto/evp/m_sha1.c.fips openssl-1.0.1e/crypto/evp/m_sha1.c ---- openssl-1.0.1e/crypto/evp/m_sha1.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/m_sha1.c 2013-10-04 11:48:04.178694091 +0200 +diff -up openssl-1.0.1g/crypto/evp/m_sha1.c.fips openssl-1.0.1g/crypto/evp/m_sha1.c +--- openssl-1.0.1g/crypto/evp/m_sha1.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/m_sha1.c 2014-05-06 16:29:50.542923132 +0200 @@ -59,8 +59,6 @@ #include #include "cryptlib.h" @@ -2102,9 +2077,9 @@ diff -up openssl-1.0.1e/crypto/evp/m_sha1.c.fips openssl-1.0.1e/crypto/evp/m_sha #endif /* ifndef OPENSSL_NO_SHA512 */ -#endif -diff -up openssl-1.0.1e/crypto/evp/p_sign.c.fips openssl-1.0.1e/crypto/evp/p_sign.c ---- openssl-1.0.1e/crypto/evp/p_sign.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/p_sign.c 2013-10-04 11:48:04.179694114 +0200 +diff -up openssl-1.0.1g/crypto/evp/p_sign.c.fips openssl-1.0.1g/crypto/evp/p_sign.c +--- openssl-1.0.1g/crypto/evp/p_sign.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/p_sign.c 2014-05-06 16:29:50.542923132 +0200 @@ -61,6 +61,7 @@ #include #include @@ -2136,9 +2111,9 @@ diff -up openssl-1.0.1e/crypto/evp/p_sign.c.fips openssl-1.0.1e/crypto/evp/p_sig if (EVP_PKEY_sign(pkctx, sigret, &sltmp, m, m_len) <= 0) goto err; *siglen = sltmp; -diff -up openssl-1.0.1e/crypto/evp/p_verify.c.fips openssl-1.0.1e/crypto/evp/p_verify.c ---- openssl-1.0.1e/crypto/evp/p_verify.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/evp/p_verify.c 2013-10-04 11:48:04.179694114 +0200 +diff -up openssl-1.0.1g/crypto/evp/p_verify.c.fips openssl-1.0.1g/crypto/evp/p_verify.c +--- openssl-1.0.1g/crypto/evp/p_verify.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/evp/p_verify.c 2014-05-06 16:29:50.542923132 +0200 @@ -61,6 +61,7 @@ #include #include @@ -2170,9 +2145,9 @@ diff -up openssl-1.0.1e/crypto/evp/p_verify.c.fips openssl-1.0.1e/crypto/evp/p_v i = EVP_PKEY_verify(pkctx, sigbuf, siglen, m, m_len); err: EVP_PKEY_CTX_free(pkctx); -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_aesavs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_aesavs.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_aesavs.c.fips 2013-10-04 11:48:04.179694114 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_aesavs.c 2013-10-04 11:48:04.179694114 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_aesavs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_aesavs.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_aesavs.c.fips 2014-05-06 16:29:50.543923155 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_aesavs.c 2014-05-06 16:29:50.543923155 +0200 @@ -0,0 +1,939 @@ +/* ==================================================================== + * Copyright (c) 2004 The OpenSSL Project. All rights reserved. @@ -3113,9 +3088,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_aesavs.c.fips openssl-1.0.1e/crypt + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_cmactest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_cmactest.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_cmactest.c.fips 2013-10-04 11:48:04.179694114 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_cmactest.c 2013-10-04 11:48:04.179694114 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_cmactest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_cmactest.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_cmactest.c.fips 2014-05-06 16:29:50.543923155 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_cmactest.c 2014-05-06 16:29:50.543923155 +0200 @@ -0,0 +1,517 @@ +/* fips_cmactest.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -3634,9 +3609,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_cmactest.c.fips openssl-1.0.1e/cry + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_desmovs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_desmovs.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_desmovs.c.fips 2013-10-04 11:48:04.180694136 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_desmovs.c 2013-10-04 11:48:04.180694136 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_desmovs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_desmovs.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_desmovs.c.fips 2014-05-06 16:29:50.543923155 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_desmovs.c 2014-05-06 16:29:50.543923155 +0200 @@ -0,0 +1,702 @@ +/* ==================================================================== + * Copyright (c) 2004 The OpenSSL Project. All rights reserved. @@ -4340,9 +4315,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_desmovs.c.fips openssl-1.0.1e/cryp + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_dhvs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_dhvs.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_dhvs.c.fips 2013-10-04 11:48:04.180694136 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_dhvs.c 2013-10-04 11:48:04.180694136 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_dhvs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_dhvs.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_dhvs.c.fips 2014-05-06 16:29:50.543923155 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_dhvs.c 2014-05-06 16:29:50.543923155 +0200 @@ -0,0 +1,292 @@ +/* fips/dh/fips_dhvs.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -4636,9 +4611,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_dhvs.c.fips openssl-1.0.1e/crypto/ + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_drbgvs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_drbgvs.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_drbgvs.c.fips 2013-10-04 11:48:04.180694136 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_drbgvs.c 2013-10-04 11:48:04.180694136 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_drbgvs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_drbgvs.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_drbgvs.c.fips 2014-05-06 16:29:50.544923178 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_drbgvs.c 2014-05-06 16:29:50.544923178 +0200 @@ -0,0 +1,416 @@ +/* fips/rand/fips_drbgvs.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -5056,9 +5031,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_drbgvs.c.fips openssl-1.0.1e/crypt + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_dssvs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_dssvs.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_dssvs.c.fips 2013-10-04 11:48:04.180694136 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_dssvs.c 2013-10-04 11:48:04.180694136 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_dssvs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_dssvs.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_dssvs.c.fips 2014-05-06 16:29:50.544923178 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_dssvs.c 2014-05-06 16:29:50.544923178 +0200 @@ -0,0 +1,537 @@ +#include + @@ -5597,9 +5572,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_dssvs.c.fips openssl-1.0.1e/crypto + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_gcmtest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_gcmtest.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_gcmtest.c.fips 2013-10-04 11:48:04.181694158 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_gcmtest.c 2013-10-04 11:48:04.181694158 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_gcmtest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_gcmtest.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_gcmtest.c.fips 2014-05-06 16:29:50.544923178 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_gcmtest.c 2014-05-06 16:29:50.544923178 +0200 @@ -0,0 +1,571 @@ +/* fips/aes/fips_gcmtest.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -6172,9 +6147,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_gcmtest.c.fips openssl-1.0.1e/cryp +} + +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rngvs.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_rngvs.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_rngvs.c.fips 2013-10-04 11:48:04.181694158 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_rngvs.c 2013-10-04 11:48:04.181694158 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_rngvs.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_rngvs.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_rngvs.c.fips 2014-05-06 16:29:50.544923178 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_rngvs.c 2014-05-06 16:29:50.544923178 +0200 @@ -0,0 +1,230 @@ +/* + * Crude test driver for processing the VST and MCT testvector files @@ -6406,9 +6381,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rngvs.c.fips openssl-1.0.1e/crypto + return 0; + } +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsagtest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_rsagtest.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_rsagtest.c.fips 2013-10-04 11:48:04.181694158 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_rsagtest.c 2013-10-04 11:48:04.181694158 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_rsagtest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_rsagtest.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_rsagtest.c.fips 2014-05-06 16:29:50.545923201 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_rsagtest.c 2014-05-06 16:29:50.545923201 +0200 @@ -0,0 +1,390 @@ +/* fips_rsagtest.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -6800,9 +6775,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsagtest.c.fips openssl-1.0.1e/cry + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsastest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_rsastest.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_rsastest.c.fips 2013-10-04 11:48:04.181694158 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_rsastest.c 2013-10-04 11:48:04.181694158 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_rsastest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_rsastest.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_rsastest.c.fips 2014-05-06 16:29:50.545923201 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_rsastest.c 2014-05-06 16:29:50.545923201 +0200 @@ -0,0 +1,370 @@ +/* fips_rsastest.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -7174,9 +7149,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsastest.c.fips openssl-1.0.1e/cry + return ret; + } +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsavtest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_rsavtest.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_rsavtest.c.fips 2013-10-04 11:48:04.181694158 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_rsavtest.c 2013-10-04 11:48:04.181694158 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_rsavtest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_rsavtest.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_rsavtest.c.fips 2014-05-06 16:29:50.545923201 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_rsavtest.c 2014-05-06 16:29:50.545923201 +0200 @@ -0,0 +1,377 @@ +/* fips_rsavtest.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -7555,9 +7530,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_rsavtest.c.fips openssl-1.0.1e/cry + return ret; + } +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_shatest.c.fips openssl-1.0.1e/crypto/fips/cavs/fips_shatest.c ---- openssl-1.0.1e/crypto/fips/cavs/fips_shatest.c.fips 2013-10-04 11:48:04.182694181 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_shatest.c 2013-10-04 11:48:04.182694181 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_shatest.c.fips openssl-1.0.1g/crypto/fips/cavs/fips_shatest.c +--- openssl-1.0.1g/crypto/fips/cavs/fips_shatest.c.fips 2014-05-06 16:29:50.545923201 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_shatest.c 2014-05-06 16:29:50.545923201 +0200 @@ -0,0 +1,388 @@ +/* fips_shatest.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -7947,9 +7922,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_shatest.c.fips openssl-1.0.1e/cryp + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/cavs/fips_utl.h.fips openssl-1.0.1e/crypto/fips/cavs/fips_utl.h ---- openssl-1.0.1e/crypto/fips/cavs/fips_utl.h.fips 2013-10-04 11:48:04.182694181 +0200 -+++ openssl-1.0.1e/crypto/fips/cavs/fips_utl.h 2013-10-04 11:48:04.182694181 +0200 +diff -up openssl-1.0.1g/crypto/fips/cavs/fips_utl.h.fips openssl-1.0.1g/crypto/fips/cavs/fips_utl.h +--- openssl-1.0.1g/crypto/fips/cavs/fips_utl.h.fips 2014-05-06 16:29:50.545923201 +0200 ++++ openssl-1.0.1g/crypto/fips/cavs/fips_utl.h 2014-05-06 16:29:50.545923201 +0200 @@ -0,0 +1,343 @@ +/* ==================================================================== + * Copyright (c) 2007 The OpenSSL Project. All rights reserved. @@ -8294,9 +8269,9 @@ diff -up openssl-1.0.1e/crypto/fips/cavs/fips_utl.h.fips openssl-1.0.1e/crypto/f +#endif + } + -diff -up openssl-1.0.1e/crypto/fips/fips_aes_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_aes_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_aes_selftest.c.fips 2013-10-04 11:48:04.182694181 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_aes_selftest.c 2013-10-04 11:48:04.182694181 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_aes_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_aes_selftest.c +--- openssl-1.0.1g/crypto/fips/fips_aes_selftest.c.fips 2014-05-06 16:29:50.546923224 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_aes_selftest.c 2014-05-06 16:29:50.546923224 +0200 @@ -0,0 +1,359 @@ +/* ==================================================================== + * Copyright (c) 2003 The OpenSSL Project. All rights reserved. @@ -8657,10 +8632,10 @@ diff -up openssl-1.0.1e/crypto/fips/fips_aes_selftest.c.fips openssl-1.0.1e/cryp + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/fips.c.fips openssl-1.0.1e/crypto/fips/fips.c ---- openssl-1.0.1e/crypto/fips/fips.c.fips 2013-10-04 11:48:04.182694181 +0200 -+++ openssl-1.0.1e/crypto/fips/fips.c 2013-10-04 11:48:04.182694181 +0200 -@@ -0,0 +1,489 @@ +diff -up openssl-1.0.1g/crypto/fips/fips.c.fips openssl-1.0.1g/crypto/fips/fips.c +--- openssl-1.0.1g/crypto/fips/fips.c.fips 2014-05-06 16:29:50.546923224 +0200 ++++ openssl-1.0.1g/crypto/fips/fips.c 2014-05-06 16:33:24.309865160 +0200 +@@ -0,0 +1,491 @@ +/* ==================================================================== + * Copyright (c) 2003 The OpenSSL Project. All rights reserved. + * @@ -8990,6 +8965,8 @@ diff -up openssl-1.0.1e/crypto/fips/fips.c.fips openssl-1.0.1e/crypto/fips/fips. + } + free(buf); + free(hex); ++ } else { ++ rv = -1; + } + +end: @@ -9150,9 +9127,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips.c.fips openssl-1.0.1e/crypto/fips/fips. + + +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_cmac_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_cmac_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_cmac_selftest.c.fips 2013-10-04 11:48:04.183694204 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_cmac_selftest.c 2013-10-04 11:48:04.183694204 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_cmac_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_cmac_selftest.c +--- openssl-1.0.1g/crypto/fips/fips_cmac_selftest.c.fips 2014-05-06 16:29:50.546923224 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_cmac_selftest.c 2014-05-06 16:29:50.546923224 +0200 @@ -0,0 +1,161 @@ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. @@ -9315,9 +9292,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_cmac_selftest.c.fips openssl-1.0.1e/cry + return rv; + } +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_des_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_des_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_des_selftest.c.fips 2013-10-04 11:48:04.183694204 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_des_selftest.c 2013-10-04 11:48:04.183694204 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_des_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_des_selftest.c +--- openssl-1.0.1g/crypto/fips/fips_des_selftest.c.fips 2014-05-06 16:29:50.546923224 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_des_selftest.c 2014-05-06 16:29:50.546923224 +0200 @@ -0,0 +1,147 @@ +/* ==================================================================== + * Copyright (c) 2003 The OpenSSL Project. All rights reserved. @@ -9466,9 +9443,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_des_selftest.c.fips openssl-1.0.1e/cryp + return ret; + } +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_drbg_ctr.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_ctr.c ---- openssl-1.0.1e/crypto/fips/fips_drbg_ctr.c.fips 2013-10-04 11:48:04.183694204 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_drbg_ctr.c 2013-10-04 11:48:04.183694204 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_drbg_ctr.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_ctr.c +--- openssl-1.0.1g/crypto/fips/fips_drbg_ctr.c.fips 2014-05-06 16:29:50.546923224 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_drbg_ctr.c 2014-05-06 16:29:50.546923224 +0200 @@ -0,0 +1,436 @@ +/* fips/rand/fips_drbg_ctr.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -9906,9 +9883,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_ctr.c.fips openssl-1.0.1e/crypto/f + + return 1; + } -diff -up openssl-1.0.1e/crypto/fips/fips_drbg_hash.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_hash.c ---- openssl-1.0.1e/crypto/fips/fips_drbg_hash.c.fips 2013-10-04 11:48:04.183694204 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_drbg_hash.c 2013-10-04 11:48:04.183694204 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_drbg_hash.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_hash.c +--- openssl-1.0.1g/crypto/fips/fips_drbg_hash.c.fips 2014-05-06 16:29:50.547923248 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_drbg_hash.c 2014-05-06 16:29:50.547923248 +0200 @@ -0,0 +1,378 @@ +/* fips/rand/fips_drbg_hash.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -10288,9 +10265,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_hash.c.fips openssl-1.0.1e/crypto/ + + return 1; + } -diff -up openssl-1.0.1e/crypto/fips/fips_drbg_hmac.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_hmac.c ---- openssl-1.0.1e/crypto/fips/fips_drbg_hmac.c.fips 2013-10-04 11:48:04.183694204 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_drbg_hmac.c 2013-10-04 11:48:04.183694204 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_drbg_hmac.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_hmac.c +--- openssl-1.0.1g/crypto/fips/fips_drbg_hmac.c.fips 2014-05-06 16:29:50.547923248 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_drbg_hmac.c 2014-05-06 16:29:50.547923248 +0200 @@ -0,0 +1,281 @@ +/* fips/rand/fips_drbg_hmac.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -10573,9 +10550,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_hmac.c.fips openssl-1.0.1e/crypto/ + + return 1; + } -diff -up openssl-1.0.1e/crypto/fips/fips_drbg_lib.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_lib.c ---- openssl-1.0.1e/crypto/fips/fips_drbg_lib.c.fips 2013-10-04 11:48:04.184694226 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_drbg_lib.c 2013-10-04 11:48:04.184694226 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_drbg_lib.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_lib.c +--- openssl-1.0.1g/crypto/fips/fips_drbg_lib.c.fips 2014-05-06 16:29:50.547923248 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_drbg_lib.c 2014-05-06 16:29:50.547923248 +0200 @@ -0,0 +1,578 @@ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. @@ -11155,9 +11132,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_lib.c.fips openssl-1.0.1e/crypto/f + memcpy(dctx->lb, out, dctx->blocklength); + return 1; + } -diff -up openssl-1.0.1e/crypto/fips/fips_drbg_rand.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_rand.c ---- openssl-1.0.1e/crypto/fips/fips_drbg_rand.c.fips 2013-10-04 11:48:04.184694226 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_drbg_rand.c 2013-10-04 11:48:04.184694226 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_drbg_rand.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_rand.c +--- openssl-1.0.1g/crypto/fips/fips_drbg_rand.c.fips 2014-05-06 16:29:50.547923248 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_drbg_rand.c 2014-05-06 16:29:50.547923248 +0200 @@ -0,0 +1,172 @@ +/* fips/rand/fips_drbg_rand.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -11331,9 +11308,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_rand.c.fips openssl-1.0.1e/crypto/ + return &rand_drbg_meth; + } + -diff -up openssl-1.0.1e/crypto/fips/fips_drbg_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_drbg_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_drbg_selftest.c.fips 2013-10-04 11:48:04.184694226 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_drbg_selftest.c 2013-10-04 11:48:04.184694226 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_drbg_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_drbg_selftest.c +--- openssl-1.0.1g/crypto/fips/fips_drbg_selftest.c.fips 2014-05-06 16:29:50.548923271 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_drbg_selftest.c 2014-05-06 16:29:50.548923271 +0200 @@ -0,0 +1,862 @@ +/* fips/rand/fips_drbg_selftest.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -12197,9 +12174,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_selftest.c.fips openssl-1.0.1e/cry + return rv; + } + -diff -up openssl-1.0.1e/crypto/fips/fips_drbg_selftest.h.fips openssl-1.0.1e/crypto/fips/fips_drbg_selftest.h ---- openssl-1.0.1e/crypto/fips/fips_drbg_selftest.h.fips 2013-10-04 11:48:04.185694249 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_drbg_selftest.h 2013-10-04 11:48:04.185694249 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_drbg_selftest.h.fips openssl-1.0.1g/crypto/fips/fips_drbg_selftest.h +--- openssl-1.0.1g/crypto/fips/fips_drbg_selftest.h.fips 2014-05-06 16:29:50.548923271 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_drbg_selftest.h 2014-05-06 16:29:50.548923271 +0200 @@ -0,0 +1,2335 @@ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. @@ -14536,9 +14513,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_drbg_selftest.h.fips openssl-1.0.1e/cry + 0xc2,0xd6,0xfd,0xa5 + }; + -diff -up openssl-1.0.1e/crypto/fips/fips_dsa_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_dsa_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_dsa_selftest.c.fips 2013-10-04 11:48:04.185694249 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_dsa_selftest.c 2013-10-04 11:48:04.185694249 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_dsa_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_dsa_selftest.c +--- openssl-1.0.1g/crypto/fips/fips_dsa_selftest.c.fips 2014-05-06 16:29:50.549923294 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_dsa_selftest.c 2014-05-06 16:29:50.548923271 +0200 @@ -0,0 +1,193 @@ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. @@ -14733,9 +14710,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_dsa_selftest.c.fips openssl-1.0.1e/cryp + return ret; + } +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_enc.c.fips openssl-1.0.1e/crypto/fips/fips_enc.c ---- openssl-1.0.1e/crypto/fips/fips_enc.c.fips 2013-10-04 11:48:04.185694249 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_enc.c 2013-10-04 11:48:04.185694249 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_enc.c.fips openssl-1.0.1g/crypto/fips/fips_enc.c +--- openssl-1.0.1g/crypto/fips/fips_enc.c.fips 2014-05-06 16:29:50.549923294 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_enc.c 2014-05-06 16:29:50.549923294 +0200 @@ -0,0 +1,191 @@ +/* fipe/evp/fips_enc.c */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) @@ -14928,9 +14905,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_enc.c.fips openssl-1.0.1e/crypto/fips/f + } + } + -diff -up openssl-1.0.1e/crypto/fips/fips.h.fips openssl-1.0.1e/crypto/fips/fips.h ---- openssl-1.0.1e/crypto/fips/fips.h.fips 2013-10-04 11:48:04.186694271 +0200 -+++ openssl-1.0.1e/crypto/fips/fips.h 2013-10-04 11:48:04.186694271 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips.h.fips openssl-1.0.1g/crypto/fips/fips.h +--- openssl-1.0.1g/crypto/fips/fips.h.fips 2014-05-06 16:29:50.549923294 +0200 ++++ openssl-1.0.1g/crypto/fips/fips.h 2014-05-06 16:29:50.549923294 +0200 @@ -0,0 +1,279 @@ +/* ==================================================================== + * Copyright (c) 2003 The OpenSSL Project. All rights reserved. @@ -15211,9 +15188,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips.h.fips openssl-1.0.1e/crypto/fips/fips. +} +#endif +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_hmac_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_hmac_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_hmac_selftest.c.fips 2013-10-04 11:48:04.186694271 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_hmac_selftest.c 2013-10-04 11:48:04.186694271 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_hmac_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_hmac_selftest.c +--- openssl-1.0.1g/crypto/fips/fips_hmac_selftest.c.fips 2014-05-06 16:29:50.549923294 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_hmac_selftest.c 2014-05-06 16:29:50.549923294 +0200 @@ -0,0 +1,137 @@ +/* ==================================================================== + * Copyright (c) 2005 The OpenSSL Project. All rights reserved. @@ -15352,9 +15329,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_hmac_selftest.c.fips openssl-1.0.1e/cry + return 1; + } +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_locl.h.fips openssl-1.0.1e/crypto/fips/fips_locl.h ---- openssl-1.0.1e/crypto/fips/fips_locl.h.fips 2013-10-04 11:48:04.186694271 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_locl.h 2013-10-04 11:48:04.186694271 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_locl.h.fips openssl-1.0.1g/crypto/fips/fips_locl.h +--- openssl-1.0.1g/crypto/fips/fips_locl.h.fips 2014-05-06 16:29:50.549923294 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_locl.h 2014-05-06 16:29:50.549923294 +0200 @@ -0,0 +1,71 @@ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. @@ -15427,9 +15404,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_locl.h.fips openssl-1.0.1e/crypto/fips/ +} +#endif +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_md.c.fips openssl-1.0.1e/crypto/fips/fips_md.c ---- openssl-1.0.1e/crypto/fips/fips_md.c.fips 2013-10-04 11:48:04.186694271 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_md.c 2013-10-04 11:48:04.186694271 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_md.c.fips openssl-1.0.1g/crypto/fips/fips_md.c +--- openssl-1.0.1g/crypto/fips/fips_md.c.fips 2014-05-06 16:29:50.549923294 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_md.c 2014-05-06 16:29:50.549923294 +0200 @@ -0,0 +1,145 @@ +/* fips/evp/fips_md.c */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) @@ -15576,9 +15553,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_md.c.fips openssl-1.0.1e/crypto/fips/fi + return NULL; + } + } -diff -up openssl-1.0.1e/crypto/fips/fips_post.c.fips openssl-1.0.1e/crypto/fips/fips_post.c ---- openssl-1.0.1e/crypto/fips/fips_post.c.fips 2013-10-04 11:48:04.186694271 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_post.c 2013-10-04 11:48:04.186694271 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_post.c.fips openssl-1.0.1g/crypto/fips/fips_post.c +--- openssl-1.0.1g/crypto/fips/fips_post.c.fips 2014-05-06 16:29:50.549923294 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_post.c 2014-05-06 16:29:50.549923294 +0200 @@ -0,0 +1,205 @@ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. @@ -15785,9 +15762,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_post.c.fips openssl-1.0.1e/crypto/fips/ + return 1; + } +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_rand.c.fips openssl-1.0.1e/crypto/fips/fips_rand.c ---- openssl-1.0.1e/crypto/fips/fips_rand.c.fips 2013-10-04 11:48:04.187694294 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_rand.c 2013-10-04 11:48:04.187694294 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_rand.c.fips openssl-1.0.1g/crypto/fips/fips_rand.c +--- openssl-1.0.1g/crypto/fips/fips_rand.c.fips 2014-05-06 16:29:50.550923317 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_rand.c 2014-05-06 16:29:50.550923317 +0200 @@ -0,0 +1,457 @@ +/* ==================================================================== + * Copyright (c) 2007 The OpenSSL Project. All rights reserved. @@ -16246,9 +16223,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rand.c.fips openssl-1.0.1e/crypto/fips/ +} + +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_rand.h.fips openssl-1.0.1e/crypto/fips/fips_rand.h ---- openssl-1.0.1e/crypto/fips/fips_rand.h.fips 2013-10-04 11:48:04.187694294 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_rand.h 2013-10-04 11:48:04.187694294 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_rand.h.fips openssl-1.0.1g/crypto/fips/fips_rand.h +--- openssl-1.0.1g/crypto/fips/fips_rand.h.fips 2014-05-06 16:29:50.550923317 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_rand.h 2014-05-06 16:29:50.550923317 +0200 @@ -0,0 +1,145 @@ +/* ==================================================================== + * Copyright (c) 2003 The OpenSSL Project. All rights reserved. @@ -16395,9 +16372,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rand.h.fips openssl-1.0.1e/crypto/fips/ +#endif +#endif +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_rand_lcl.h.fips openssl-1.0.1e/crypto/fips/fips_rand_lcl.h ---- openssl-1.0.1e/crypto/fips/fips_rand_lcl.h.fips 2013-10-04 11:48:04.187694294 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_rand_lcl.h 2013-10-04 11:48:04.187694294 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_rand_lcl.h.fips openssl-1.0.1g/crypto/fips/fips_rand_lcl.h +--- openssl-1.0.1g/crypto/fips/fips_rand_lcl.h.fips 2014-05-06 16:29:50.550923317 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_rand_lcl.h 2014-05-06 16:29:50.550923317 +0200 @@ -0,0 +1,219 @@ +/* fips/rand/fips_rand_lcl.h */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL @@ -16618,9 +16595,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rand_lcl.h.fips openssl-1.0.1e/crypto/f +#define FIPS_digestupdate EVP_DigestUpdate +#define FIPS_digestfinal EVP_DigestFinal +#define M_EVP_MD_size EVP_MD_size -diff -up openssl-1.0.1e/crypto/fips/fips_rand_lib.c.fips openssl-1.0.1e/crypto/fips/fips_rand_lib.c ---- openssl-1.0.1e/crypto/fips/fips_rand_lib.c.fips 2013-10-04 11:48:04.187694294 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_rand_lib.c 2013-10-04 11:48:04.187694294 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_rand_lib.c.fips openssl-1.0.1g/crypto/fips/fips_rand_lib.c +--- openssl-1.0.1g/crypto/fips/fips_rand_lib.c.fips 2014-05-06 16:29:50.550923317 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_rand_lib.c 2014-05-06 16:29:50.550923317 +0200 @@ -0,0 +1,191 @@ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. @@ -16813,9 +16790,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rand_lib.c.fips openssl-1.0.1e/crypto/f + } + return 0; + } -diff -up openssl-1.0.1e/crypto/fips/fips_rand_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_rand_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_rand_selftest.c.fips 2013-10-04 11:48:04.187694294 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_rand_selftest.c 2013-10-04 11:48:04.187694294 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_rand_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_rand_selftest.c +--- openssl-1.0.1g/crypto/fips/fips_rand_selftest.c.fips 2014-05-06 16:29:50.550923317 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_rand_selftest.c 2014-05-06 16:29:50.550923317 +0200 @@ -0,0 +1,183 @@ +/* ==================================================================== + * Copyright (c) 2003 The OpenSSL Project. All rights reserved. @@ -17000,9 +16977,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rand_selftest.c.fips openssl-1.0.1e/cry + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_randtest.c.fips openssl-1.0.1e/crypto/fips/fips_randtest.c ---- openssl-1.0.1e/crypto/fips/fips_randtest.c.fips 2013-10-04 11:48:04.188694316 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_randtest.c 2013-10-04 11:48:04.187694294 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_randtest.c.fips openssl-1.0.1g/crypto/fips/fips_randtest.c +--- openssl-1.0.1g/crypto/fips/fips_randtest.c.fips 2014-05-06 16:29:50.551923340 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_randtest.c 2014-05-06 16:29:50.551923340 +0200 @@ -0,0 +1,250 @@ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. @@ -17254,9 +17231,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_randtest.c.fips openssl-1.0.1e/crypto/f + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips 2013-10-04 11:48:04.188694316 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c 2013-10-04 11:48:04.188694316 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_rsa_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_rsa_selftest.c +--- openssl-1.0.1g/crypto/fips/fips_rsa_selftest.c.fips 2014-05-06 16:29:50.551923340 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_rsa_selftest.c 2014-05-06 16:29:50.551923340 +0200 @@ -0,0 +1,444 @@ +/* ==================================================================== + * Copyright (c) 2003-2007 The OpenSSL Project. All rights reserved. @@ -17702,9 +17679,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips openssl-1.0.1e/cryp + } + +#endif /* def OPENSSL_FIPS */ -diff -up openssl-1.0.1e/crypto/fips/fips_rsa_x931g.c.fips openssl-1.0.1e/crypto/fips/fips_rsa_x931g.c ---- openssl-1.0.1e/crypto/fips/fips_rsa_x931g.c.fips 2013-10-04 11:48:04.188694316 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_rsa_x931g.c 2013-10-04 11:48:04.188694316 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_rsa_x931g.c.fips openssl-1.0.1g/crypto/fips/fips_rsa_x931g.c +--- openssl-1.0.1g/crypto/fips/fips_rsa_x931g.c.fips 2014-05-06 16:29:50.551923340 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_rsa_x931g.c 2014-05-06 16:29:50.551923340 +0200 @@ -0,0 +1,282 @@ +/* crypto/rsa/rsa_gen.c */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) @@ -17988,9 +17965,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_x931g.c.fips openssl-1.0.1e/crypto/ + return 0; + + } -diff -up openssl-1.0.1e/crypto/fips/fips_sha_selftest.c.fips openssl-1.0.1e/crypto/fips/fips_sha_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_sha_selftest.c.fips 2013-10-04 11:48:04.188694316 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_sha_selftest.c 2013-10-04 11:48:04.188694316 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_sha_selftest.c.fips openssl-1.0.1g/crypto/fips/fips_sha_selftest.c +--- openssl-1.0.1g/crypto/fips/fips_sha_selftest.c.fips 2014-05-06 16:29:50.551923340 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_sha_selftest.c 2014-05-06 16:29:50.551923340 +0200 @@ -0,0 +1,140 @@ +/* ==================================================================== + * Copyright (c) 2003 The OpenSSL Project. All rights reserved. @@ -18132,10 +18109,10 @@ diff -up openssl-1.0.1e/crypto/fips/fips_sha_selftest.c.fips openssl-1.0.1e/cryp + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c ---- openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips 2013-10-04 11:48:04.188694316 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c 2013-10-04 11:48:04.188694316 +0200 -@@ -0,0 +1,180 @@ +diff -up openssl-1.0.1g/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1g/crypto/fips/fips_standalone_hmac.c +--- openssl-1.0.1g/crypto/fips/fips_standalone_hmac.c.fips 2014-05-06 16:29:50.551923340 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_standalone_hmac.c 2014-05-06 16:29:50.551923340 +0200 +@@ -0,0 +1,236 @@ +/* ==================================================================== + * Copyright (c) 2003 The OpenSSL Project. All rights reserved. + * @@ -18195,17 +18172,73 @@ diff -up openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1e/c +#ifndef FIPSCANISTER_O +int FIPS_selftest_failed() { return 0; } +void FIPS_selftest_check() {} -+void OPENSSL_cleanse(void *p,size_t len) {} +#endif + ++#ifdef OPENSSL_FIPS ++int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { return 0; }; ++int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { return 0; }; ++ +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ + defined(__INTEL__) || \ + defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) + +unsigned int OPENSSL_ia32cap_P[2]; ++unsigned long *OPENSSL_ia32cap_loc(void) ++{ if (sizeof(long)==4) ++ /* ++ * If 32-bit application pulls address of OPENSSL_ia32cap_P[0] ++ * clear second element to maintain the illusion that vector ++ * is 32-bit. ++ */ ++ OPENSSL_ia32cap_P[1]=0; ++ return (unsigned long *)OPENSSL_ia32cap_P; ++} ++ ++#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY) ++#define OPENSSL_CPUID_SETUP ++#if defined(_WIN32) ++typedef unsigned __int64 IA32CAP; ++#else ++typedef unsigned long long IA32CAP; ++#endif ++void OPENSSL_cpuid_setup(void) ++{ static int trigger=0; ++ IA32CAP OPENSSL_ia32_cpuid(void); ++ IA32CAP vec; ++ char *env; ++ ++ if (trigger) return; ++ ++ trigger=1; ++ if ((env=getenv("OPENSSL_ia32cap"))) { ++ int off = (env[0]=='~')?1:0; ++#if defined(_WIN32) ++ if (!sscanf(env+off,"%I64i",&vec)) vec = strtoul(env+off,NULL,0); ++#else ++ if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0); ++#endif ++ if (off) vec = OPENSSL_ia32_cpuid()&~vec; ++ } ++ else ++ vec = OPENSSL_ia32_cpuid(); ++ ++ /* ++ * |(1<<10) sets a reserved bit to signal that variable ++ * was initialized already... This is to avoid interference ++ * with cpuid snippets in ELF .init segment. ++ */ ++ OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10); ++ OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32); ++} +#endif + -+#ifdef OPENSSL_FIPS ++#else ++unsigned long *OPENSSL_ia32cap_loc(void) { return NULL; } ++#endif ++int OPENSSL_NONPIC_relocated = 0; ++#if !defined(OPENSSL_CPUID_SETUP) && !defined(OPENSSL_CPUID_OBJ) ++void OPENSSL_cpuid_setup(void) {} ++#endif + +static void hmac_init(SHA256_CTX *md_ctx,SHA256_CTX *o_ctx, + const char *key) @@ -18316,9 +18349,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1e/c + } + + -diff -up openssl-1.0.1e/crypto/fips/fips_test_suite.c.fips openssl-1.0.1e/crypto/fips/fips_test_suite.c ---- openssl-1.0.1e/crypto/fips/fips_test_suite.c.fips 2013-10-04 11:48:04.189694339 +0200 -+++ openssl-1.0.1e/crypto/fips/fips_test_suite.c 2013-10-04 11:48:04.189694339 +0200 +diff -up openssl-1.0.1g/crypto/fips/fips_test_suite.c.fips openssl-1.0.1g/crypto/fips/fips_test_suite.c +--- openssl-1.0.1g/crypto/fips/fips_test_suite.c.fips 2014-05-06 16:29:50.552923363 +0200 ++++ openssl-1.0.1g/crypto/fips/fips_test_suite.c 2014-05-06 16:29:50.552923363 +0200 @@ -0,0 +1,588 @@ +/* ==================================================================== + * Copyright (c) 2003 The OpenSSL Project. All rights reserved. @@ -18908,10 +18941,10 @@ diff -up openssl-1.0.1e/crypto/fips/fips_test_suite.c.fips openssl-1.0.1e/crypto + } + +#endif -diff -up openssl-1.0.1e/crypto/fips/Makefile.fips openssl-1.0.1e/crypto/fips/Makefile ---- openssl-1.0.1e/crypto/fips/Makefile.fips 2013-10-04 11:48:04.189694339 +0200 -+++ openssl-1.0.1e/crypto/fips/Makefile 2013-10-04 11:48:04.189694339 +0200 -@@ -0,0 +1,340 @@ +diff -up openssl-1.0.1g/crypto/fips/Makefile.fips openssl-1.0.1g/crypto/fips/Makefile +--- openssl-1.0.1g/crypto/fips/Makefile.fips 2014-05-06 16:29:50.552923363 +0200 ++++ openssl-1.0.1g/crypto/fips/Makefile 2014-05-06 16:29:50.552923363 +0200 +@@ -0,0 +1,341 @@ +# +# OpenSSL/crypto/fips/Makefile +# @@ -19004,6 +19037,7 @@ diff -up openssl-1.0.1e/crypto/fips/Makefile.fips openssl-1.0.1e/crypto/fips/Mak + +$(EXE): $(PROGRAM).o + FIPS_SHA_ASM=""; for i in $(SHA1_ASM_OBJ) sha256.o; do FIPS_SHA_ASM="$$FIPS_SHA_ASM ../sha/$$i" ; done; \ ++ for i in $(CPUID_OBJ); do FIPS_SHA_ASM="$$FIPS_SHA_ASM ../$$i" ; done; \ + $(CC) -o $@ $(CFLAGS) $(PROGRAM).o $$FIPS_SHA_ASM + +# DO NOT DELETE THIS LINE -- make depend depends on it. @@ -19252,9 +19286,9 @@ diff -up openssl-1.0.1e/crypto/fips/Makefile.fips openssl-1.0.1e/crypto/fips/Mak +fips_sha_selftest.o: ../../include/openssl/safestack.h +fips_sha_selftest.o: ../../include/openssl/sha.h ../../include/openssl/stack.h +fips_sha_selftest.o: ../../include/openssl/symhacks.h fips_sha_selftest.c -diff -up openssl-1.0.1e/crypto/hmac/hmac.c.fips openssl-1.0.1e/crypto/hmac/hmac.c ---- openssl-1.0.1e/crypto/hmac/hmac.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/hmac/hmac.c 2013-10-04 11:48:04.189694339 +0200 +diff -up openssl-1.0.1g/crypto/hmac/hmac.c.fips openssl-1.0.1g/crypto/hmac/hmac.c +--- openssl-1.0.1g/crypto/hmac/hmac.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/hmac/hmac.c 2014-05-06 16:29:50.552923363 +0200 @@ -81,11 +81,6 @@ int HMAC_Init_ex(HMAC_CTX *ctx, const vo EVPerr(EVP_F_HMAC_INIT_EX, EVP_R_DISABLED_FOR_FIPS); return 0; @@ -19317,9 +19351,9 @@ diff -up openssl-1.0.1e/crypto/hmac/hmac.c.fips openssl-1.0.1e/crypto/hmac/hmac. EVP_MD_CTX_cleanup(&ctx->i_ctx); EVP_MD_CTX_cleanup(&ctx->o_ctx); EVP_MD_CTX_cleanup(&ctx->md_ctx); -diff -up openssl-1.0.1e/crypto/mdc2/mdc2dgst.c.fips openssl-1.0.1e/crypto/mdc2/mdc2dgst.c ---- openssl-1.0.1e/crypto/mdc2/mdc2dgst.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/mdc2/mdc2dgst.c 2013-10-04 11:48:04.189694339 +0200 +diff -up openssl-1.0.1g/crypto/mdc2/mdc2dgst.c.fips openssl-1.0.1g/crypto/mdc2/mdc2dgst.c +--- openssl-1.0.1g/crypto/mdc2/mdc2dgst.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/mdc2/mdc2dgst.c 2014-05-06 16:29:50.552923363 +0200 @@ -76,7 +76,7 @@ *((c)++)=(unsigned char)(((l)>>24L)&0xff)) @@ -19329,9 +19363,9 @@ diff -up openssl-1.0.1e/crypto/mdc2/mdc2dgst.c.fips openssl-1.0.1e/crypto/mdc2/m { c->num=0; c->pad_type=1; -diff -up openssl-1.0.1e/crypto/md2/md2_dgst.c.fips openssl-1.0.1e/crypto/md2/md2_dgst.c ---- openssl-1.0.1e/crypto/md2/md2_dgst.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/md2/md2_dgst.c 2013-10-04 11:48:04.189694339 +0200 +diff -up openssl-1.0.1g/crypto/md2/md2_dgst.c.fips openssl-1.0.1g/crypto/md2/md2_dgst.c +--- openssl-1.0.1g/crypto/md2/md2_dgst.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/md2/md2_dgst.c 2014-05-06 16:29:50.552923363 +0200 @@ -62,6 +62,11 @@ #include #include @@ -19353,9 +19387,9 @@ diff -up openssl-1.0.1e/crypto/md2/md2_dgst.c.fips openssl-1.0.1e/crypto/md2/md2 { c->num=0; memset(c->state,0,sizeof c->state); -diff -up openssl-1.0.1e/crypto/md4/md4_dgst.c.fips openssl-1.0.1e/crypto/md4/md4_dgst.c ---- openssl-1.0.1e/crypto/md4/md4_dgst.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/md4/md4_dgst.c 2013-10-04 11:48:04.190694362 +0200 +diff -up openssl-1.0.1g/crypto/md4/md4_dgst.c.fips openssl-1.0.1g/crypto/md4/md4_dgst.c +--- openssl-1.0.1g/crypto/md4/md4_dgst.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/md4/md4_dgst.c 2014-05-06 16:29:50.552923363 +0200 @@ -71,7 +71,7 @@ const char MD4_version[]="MD4" OPENSSL_V #define INIT_DATA_C (unsigned long)0x98badcfeL #define INIT_DATA_D (unsigned long)0x10325476L @@ -19365,9 +19399,9 @@ diff -up openssl-1.0.1e/crypto/md4/md4_dgst.c.fips openssl-1.0.1e/crypto/md4/md4 { memset (c,0,sizeof(*c)); c->A=INIT_DATA_A; -diff -up openssl-1.0.1e/crypto/md5/md5_dgst.c.fips openssl-1.0.1e/crypto/md5/md5_dgst.c ---- openssl-1.0.1e/crypto/md5/md5_dgst.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/md5/md5_dgst.c 2013-10-04 11:48:04.190694362 +0200 +diff -up openssl-1.0.1g/crypto/md5/md5_dgst.c.fips openssl-1.0.1g/crypto/md5/md5_dgst.c +--- openssl-1.0.1g/crypto/md5/md5_dgst.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/md5/md5_dgst.c 2014-05-06 16:29:50.553923386 +0200 @@ -71,7 +71,7 @@ const char MD5_version[]="MD5" OPENSSL_V #define INIT_DATA_C (unsigned long)0x98badcfeL #define INIT_DATA_D (unsigned long)0x10325476L @@ -19377,9 +19411,9 @@ diff -up openssl-1.0.1e/crypto/md5/md5_dgst.c.fips openssl-1.0.1e/crypto/md5/md5 { memset (c,0,sizeof(*c)); c->A=INIT_DATA_A; -diff -up openssl-1.0.1e/crypto/o_fips.c.fips openssl-1.0.1e/crypto/o_fips.c ---- openssl-1.0.1e/crypto/o_fips.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/o_fips.c 2013-10-04 11:48:04.190694362 +0200 +diff -up openssl-1.0.1g/crypto/o_fips.c.fips openssl-1.0.1g/crypto/o_fips.c +--- openssl-1.0.1g/crypto/o_fips.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/o_fips.c 2014-05-06 16:29:50.553923386 +0200 @@ -79,6 +79,8 @@ int FIPS_mode_set(int r) #ifndef FIPS_AUTH_USER_PASS #define FIPS_AUTH_USER_PASS "Default FIPS Crypto User Password" @@ -19389,9 +19423,9 @@ diff -up openssl-1.0.1e/crypto/o_fips.c.fips openssl-1.0.1e/crypto/o_fips.c if (!FIPS_module_mode_set(r, FIPS_AUTH_USER_PASS)) return 0; if (r) -diff -up openssl-1.0.1e/crypto/o_init.c.fips openssl-1.0.1e/crypto/o_init.c ---- openssl-1.0.1e/crypto/o_init.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/o_init.c 2013-10-04 11:48:04.190694362 +0200 +diff -up openssl-1.0.1g/crypto/o_init.c.fips openssl-1.0.1g/crypto/o_init.c +--- openssl-1.0.1g/crypto/o_init.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/o_init.c 2014-05-06 16:29:50.553923386 +0200 @@ -55,28 +55,68 @@ #include #include @@ -19465,9 +19499,9 @@ diff -up openssl-1.0.1e/crypto/o_init.c.fips openssl-1.0.1e/crypto/o_init.c + { + OPENSSL_init_library(); + } -diff -up openssl-1.0.1e/crypto/opensslconf.h.in.fips openssl-1.0.1e/crypto/opensslconf.h.in ---- openssl-1.0.1e/crypto/opensslconf.h.in.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/opensslconf.h.in 2013-10-04 11:48:04.190694362 +0200 +diff -up openssl-1.0.1g/crypto/opensslconf.h.in.fips openssl-1.0.1g/crypto/opensslconf.h.in +--- openssl-1.0.1g/crypto/opensslconf.h.in.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/opensslconf.h.in 2014-05-06 16:29:50.553923386 +0200 @@ -1,5 +1,20 @@ /* crypto/opensslconf.h.in */ @@ -19489,39 +19523,10 @@ diff -up openssl-1.0.1e/crypto/opensslconf.h.in.fips openssl-1.0.1e/crypto/opens /* Generate 80386 code? */ #undef I386_ONLY -diff -up openssl-1.0.1e/crypto/pkcs12/p12_crt.c.fips openssl-1.0.1e/crypto/pkcs12/p12_crt.c ---- openssl-1.0.1e/crypto/pkcs12/p12_crt.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/pkcs12/p12_crt.c 2013-10-04 11:48:04.190694362 +0200 -@@ -59,6 +59,10 @@ - #include - #include "cryptlib.h" - #include -+#ifdef OPENSSL_FIPS -+#include -+#endif -+ - - - static int pkcs12_add_bag(STACK_OF(PKCS12_SAFEBAG) **pbags, PKCS12_SAFEBAG *bag); -@@ -90,7 +94,14 @@ PKCS12 *PKCS12_create(char *pass, char * - - /* Set defaults */ - if (!nid_cert) -+ { -+#ifdef OPENSSL_FIPS -+ if (FIPS_mode()) -+ nid_cert = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; -+ else -+#endif - nid_cert = NID_pbe_WithSHA1And40BitRC2_CBC; -+ } - if (!nid_key) - nid_key = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; - if (!iter) -diff -up openssl-1.0.1e/crypto/rand/md_rand.c.fips openssl-1.0.1e/crypto/rand/md_rand.c ---- openssl-1.0.1e/crypto/rand/md_rand.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/rand/md_rand.c 2013-10-04 11:48:04.190694362 +0200 -@@ -389,7 +389,10 @@ static int ssleay_rand_bytes(unsigned ch +diff -up openssl-1.0.1g/crypto/rand/md_rand.c.fips openssl-1.0.1g/crypto/rand/md_rand.c +--- openssl-1.0.1g/crypto/rand/md_rand.c.fips 2014-04-07 18:54:21.000000000 +0200 ++++ openssl-1.0.1g/crypto/rand/md_rand.c 2014-05-06 16:29:50.553923386 +0200 +@@ -395,7 +395,10 @@ static int ssleay_rand_bytes(unsigned ch CRYPTO_w_unlock(CRYPTO_LOCK_RAND2); crypto_lock_rand = 1; @@ -19533,10 +19538,10 @@ diff -up openssl-1.0.1e/crypto/rand/md_rand.c.fips openssl-1.0.1e/crypto/rand/md { RAND_poll(); initialized = 1; -diff -up openssl-1.0.1e/crypto/rand/rand.h.fips openssl-1.0.1e/crypto/rand/rand.h ---- openssl-1.0.1e/crypto/rand/rand.h.fips 2013-10-04 11:48:03.945688829 +0200 -+++ openssl-1.0.1e/crypto/rand/rand.h 2013-10-04 11:48:04.190694362 +0200 -@@ -133,15 +133,33 @@ void ERR_load_RAND_strings(void); +diff -up openssl-1.0.1g/crypto/rand/rand.h.fips openssl-1.0.1g/crypto/rand/rand.h +--- openssl-1.0.1g/crypto/rand/rand.h.fips 2014-05-06 16:29:50.303917606 +0200 ++++ openssl-1.0.1g/crypto/rand/rand.h 2014-05-06 16:29:50.553923386 +0200 +@@ -133,16 +133,34 @@ void ERR_load_RAND_strings(void); /* Error codes for the RAND functions. */ /* Function codes. */ @@ -19554,9 +19559,11 @@ diff -up openssl-1.0.1e/crypto/rand/rand.h.fips openssl-1.0.1e/crypto/rand/rand. #define RAND_F_SSLEAY_RAND_BYTES 100 /* Reason codes. */ +-#define RAND_R_DUAL_EC_DRBG_DISABLED 104 -#define RAND_R_ERROR_INITIALISING_DRBG 102 -#define RAND_R_ERROR_INSTANTIATING_DRBG 103 -#define RAND_R_NO_FIPS_RANDOM_METHOD_SET 101 ++#define RAND_R_DUAL_EC_DRBG_DISABLED 114 +#define RAND_R_ERROR_INITIALISING_DRBG 112 +#define RAND_R_ERROR_INSTANTIATING_DRBG 113 +#define RAND_R_NON_FIPS_METHOD 105 @@ -19574,9 +19581,9 @@ diff -up openssl-1.0.1e/crypto/rand/rand.h.fips openssl-1.0.1e/crypto/rand/rand. #ifdef __cplusplus } -diff -up openssl-1.0.1e/crypto/ripemd/rmd_dgst.c.fips openssl-1.0.1e/crypto/ripemd/rmd_dgst.c ---- openssl-1.0.1e/crypto/ripemd/rmd_dgst.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/ripemd/rmd_dgst.c 2013-10-04 11:48:04.191694385 +0200 +diff -up openssl-1.0.1g/crypto/ripemd/rmd_dgst.c.fips openssl-1.0.1g/crypto/ripemd/rmd_dgst.c +--- openssl-1.0.1g/crypto/ripemd/rmd_dgst.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/ripemd/rmd_dgst.c 2014-05-06 16:29:50.553923386 +0200 @@ -70,7 +70,7 @@ const char RMD160_version[]="RIPE-MD160" void ripemd160_block(RIPEMD160_CTX *c, unsigned long *p,size_t num); # endif @@ -19586,9 +19593,9 @@ diff -up openssl-1.0.1e/crypto/ripemd/rmd_dgst.c.fips openssl-1.0.1e/crypto/ripe { memset (c,0,sizeof(*c)); c->A=RIPEMD160_A; -diff -up openssl-1.0.1e/crypto/rsa/rsa_crpt.c.fips openssl-1.0.1e/crypto/rsa/rsa_crpt.c ---- openssl-1.0.1e/crypto/rsa/rsa_crpt.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/rsa/rsa_crpt.c 2013-10-04 11:48:04.191694385 +0200 +diff -up openssl-1.0.1g/crypto/rsa/rsa_crpt.c.fips openssl-1.0.1g/crypto/rsa/rsa_crpt.c +--- openssl-1.0.1g/crypto/rsa/rsa_crpt.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/rsa/rsa_crpt.c 2014-05-06 16:29:50.554923409 +0200 @@ -90,10 +90,9 @@ int RSA_private_encrypt(int flen, const RSA *rsa, int padding) { @@ -19615,9 +19622,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_crpt.c.fips openssl-1.0.1e/crypto/rsa/rsa return -1; } #endif -diff -up openssl-1.0.1e/crypto/rsa/rsa_eay.c.fips openssl-1.0.1e/crypto/rsa/rsa_eay.c ---- openssl-1.0.1e/crypto/rsa/rsa_eay.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/rsa/rsa_eay.c 2013-10-04 11:48:04.191694385 +0200 +diff -up openssl-1.0.1g/crypto/rsa/rsa_eay.c.fips openssl-1.0.1g/crypto/rsa/rsa_eay.c +--- openssl-1.0.1g/crypto/rsa/rsa_eay.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/rsa/rsa_eay.c 2014-05-06 16:29:50.554923409 +0200 @@ -114,6 +114,10 @@ #include #include @@ -19748,9 +19755,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_eay.c.fips openssl-1.0.1e/crypto/rsa/rsa_ rsa->flags|=RSA_FLAG_CACHE_PUBLIC|RSA_FLAG_CACHE_PRIVATE; return(1); } -diff -up openssl-1.0.1e/crypto/rsa/rsa_err.c.fips openssl-1.0.1e/crypto/rsa/rsa_err.c ---- openssl-1.0.1e/crypto/rsa/rsa_err.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/rsa/rsa_err.c 2013-10-04 11:48:04.191694385 +0200 +diff -up openssl-1.0.1g/crypto/rsa/rsa_err.c.fips openssl-1.0.1g/crypto/rsa/rsa_err.c +--- openssl-1.0.1g/crypto/rsa/rsa_err.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/rsa/rsa_err.c 2014-05-06 16:29:50.554923409 +0200 @@ -121,6 +121,8 @@ static ERR_STRING_DATA RSA_str_functs[]= {ERR_FUNC(RSA_F_RSA_PUBLIC_ENCRYPT), "RSA_public_encrypt"}, {ERR_FUNC(RSA_F_RSA_PUB_DECODE), "RSA_PUB_DECODE"}, @@ -19760,9 +19767,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_err.c.fips openssl-1.0.1e/crypto/rsa/rsa_ {ERR_FUNC(RSA_F_RSA_SIGN), "RSA_sign"}, {ERR_FUNC(RSA_F_RSA_SIGN_ASN1_OCTET_STRING), "RSA_sign_ASN1_OCTET_STRING"}, {ERR_FUNC(RSA_F_RSA_VERIFY), "RSA_verify"}, -diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips openssl-1.0.1e/crypto/rsa/rsa_gen.c ---- openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/rsa/rsa_gen.c 2013-10-04 11:48:04.191694385 +0200 +diff -up openssl-1.0.1g/crypto/rsa/rsa_gen.c.fips openssl-1.0.1g/crypto/rsa/rsa_gen.c +--- openssl-1.0.1g/crypto/rsa/rsa_gen.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/rsa/rsa_gen.c 2014-05-06 16:29:50.554923409 +0200 @@ -69,6 +69,78 @@ #include #ifdef OPENSSL_FIPS @@ -19904,9 +19911,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips openssl-1.0.1e/crypto/rsa/rsa_ ok=1; err: if (ok == -1) -diff -up openssl-1.0.1e/crypto/rsa/rsa.h.fips openssl-1.0.1e/crypto/rsa/rsa.h ---- openssl-1.0.1e/crypto/rsa/rsa.h.fips 2013-10-04 11:48:04.075691765 +0200 -+++ openssl-1.0.1e/crypto/rsa/rsa.h 2013-10-04 11:48:04.192694407 +0200 +diff -up openssl-1.0.1g/crypto/rsa/rsa.h.fips openssl-1.0.1g/crypto/rsa/rsa.h +--- openssl-1.0.1g/crypto/rsa/rsa.h.fips 2014-05-06 16:29:50.436920681 +0200 ++++ openssl-1.0.1g/crypto/rsa/rsa.h 2014-05-06 16:29:50.554923409 +0200 @@ -164,6 +164,8 @@ struct rsa_st # define OPENSSL_RSA_MAX_MODULUS_BITS 16384 #endif @@ -19991,9 +19998,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa.h.fips openssl-1.0.1e/crypto/rsa/rsa.h #define RSA_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE 148 #define RSA_R_PADDING_CHECK_FAILED 114 #define RSA_R_P_NOT_PRIME 128 -diff -up openssl-1.0.1e/crypto/rsa/rsa_lib.c.fips openssl-1.0.1e/crypto/rsa/rsa_lib.c ---- openssl-1.0.1e/crypto/rsa/rsa_lib.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/rsa/rsa_lib.c 2013-10-04 11:48:04.192694407 +0200 +diff -up openssl-1.0.1g/crypto/rsa/rsa_lib.c.fips openssl-1.0.1g/crypto/rsa/rsa_lib.c +--- openssl-1.0.1g/crypto/rsa/rsa_lib.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/rsa/rsa_lib.c 2014-05-06 16:29:50.554923409 +0200 @@ -84,6 +84,13 @@ RSA *RSA_new(void) void RSA_set_default_method(const RSA_METHOD *meth) @@ -20069,9 +20076,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_lib.c.fips openssl-1.0.1e/crypto/rsa/rsa_ if (!CRYPTO_new_ex_data(CRYPTO_EX_INDEX_RSA, ret, &ret->ex_data)) { #ifndef OPENSSL_NO_ENGINE -diff -up openssl-1.0.1e/crypto/rsa/rsa_pmeth.c.fips openssl-1.0.1e/crypto/rsa/rsa_pmeth.c ---- openssl-1.0.1e/crypto/rsa/rsa_pmeth.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/rsa/rsa_pmeth.c 2013-10-04 11:48:04.192694407 +0200 +diff -up openssl-1.0.1g/crypto/rsa/rsa_pmeth.c.fips openssl-1.0.1g/crypto/rsa/rsa_pmeth.c +--- openssl-1.0.1g/crypto/rsa/rsa_pmeth.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/rsa/rsa_pmeth.c 2014-05-06 16:29:50.555923432 +0200 @@ -206,22 +206,6 @@ static int pkey_rsa_sign(EVP_PKEY_CTX *c RSA_R_INVALID_DIGEST_LENGTH); return -1; @@ -20115,9 +20122,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_pmeth.c.fips openssl-1.0.1e/crypto/rsa/rs if (rctx->pad_mode == RSA_PKCS1_PADDING) return RSA_verify(EVP_MD_type(rctx->md), tbs, tbslen, sig, siglen, rsa); -diff -up openssl-1.0.1e/crypto/rsa/rsa_sign.c.fips openssl-1.0.1e/crypto/rsa/rsa_sign.c ---- openssl-1.0.1e/crypto/rsa/rsa_sign.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/rsa/rsa_sign.c 2013-10-04 11:48:04.192694407 +0200 +diff -up openssl-1.0.1g/crypto/rsa/rsa_sign.c.fips openssl-1.0.1g/crypto/rsa/rsa_sign.c +--- openssl-1.0.1g/crypto/rsa/rsa_sign.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/rsa/rsa_sign.c 2014-05-06 16:29:50.555923432 +0200 @@ -138,7 +138,8 @@ int RSA_sign(int type, const unsigned ch i2d_X509_SIG(&sig,&p); s=tmps; @@ -20149,9 +20156,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_sign.c.fips openssl-1.0.1e/crypto/rsa/rsa if (i <= 0) goto err; /* Oddball MDC2 case: signature can be OCTET STRING. -diff -up openssl-1.0.1e/crypto/sha/sha.h.fips openssl-1.0.1e/crypto/sha/sha.h ---- openssl-1.0.1e/crypto/sha/sha.h.fips 2013-10-04 11:48:03.867687068 +0200 -+++ openssl-1.0.1e/crypto/sha/sha.h 2013-10-04 11:48:04.192694407 +0200 +diff -up openssl-1.0.1g/crypto/sha/sha.h.fips openssl-1.0.1g/crypto/sha/sha.h +--- openssl-1.0.1g/crypto/sha/sha.h.fips 2014-05-06 16:29:50.224915780 +0200 ++++ openssl-1.0.1g/crypto/sha/sha.h 2014-05-06 16:29:50.555923432 +0200 @@ -116,9 +116,6 @@ unsigned char *SHA(const unsigned char * void SHA_Transform(SHA_CTX *c, const unsigned char *data); #endif @@ -20184,9 +20191,9 @@ diff -up openssl-1.0.1e/crypto/sha/sha.h.fips openssl-1.0.1e/crypto/sha/sha.h int SHA384_Init(SHA512_CTX *c); int SHA384_Update(SHA512_CTX *c, const void *data, size_t len); int SHA384_Final(unsigned char *md, SHA512_CTX *c); -diff -up openssl-1.0.1e/crypto/sha/sha_locl.h.fips openssl-1.0.1e/crypto/sha/sha_locl.h ---- openssl-1.0.1e/crypto/sha/sha_locl.h.fips 2013-10-04 11:48:03.870687135 +0200 -+++ openssl-1.0.1e/crypto/sha/sha_locl.h 2013-10-04 11:48:04.192694407 +0200 +diff -up openssl-1.0.1g/crypto/sha/sha_locl.h.fips openssl-1.0.1g/crypto/sha/sha_locl.h +--- openssl-1.0.1g/crypto/sha/sha_locl.h.fips 2014-05-06 16:29:50.226915826 +0200 ++++ openssl-1.0.1g/crypto/sha/sha_locl.h 2014-05-06 16:29:50.555923432 +0200 @@ -123,11 +123,14 @@ void sha1_block_data_order (SHA_CTX *c, #define INIT_DATA_h4 0xc3d2e1f0UL @@ -20203,9 +20210,9 @@ diff -up openssl-1.0.1e/crypto/sha/sha_locl.h.fips openssl-1.0.1e/crypto/sha/sha memset (c,0,sizeof(*c)); c->h0=INIT_DATA_h0; c->h1=INIT_DATA_h1; -diff -up openssl-1.0.1e/crypto/sha/sha256.c.fips openssl-1.0.1e/crypto/sha/sha256.c ---- openssl-1.0.1e/crypto/sha/sha256.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/sha/sha256.c 2013-10-04 11:48:04.193694429 +0200 +diff -up openssl-1.0.1g/crypto/sha/sha256.c.fips openssl-1.0.1g/crypto/sha/sha256.c +--- openssl-1.0.1g/crypto/sha/sha256.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/sha/sha256.c 2014-05-06 16:29:50.555923432 +0200 @@ -12,12 +12,19 @@ #include @@ -20236,9 +20243,9 @@ diff -up openssl-1.0.1e/crypto/sha/sha256.c.fips openssl-1.0.1e/crypto/sha/sha25 memset (c,0,sizeof(*c)); c->h[0]=0x6a09e667UL; c->h[1]=0xbb67ae85UL; c->h[2]=0x3c6ef372UL; c->h[3]=0xa54ff53aUL; -diff -up openssl-1.0.1e/crypto/sha/sha512.c.fips openssl-1.0.1e/crypto/sha/sha512.c ---- openssl-1.0.1e/crypto/sha/sha512.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/sha/sha512.c 2013-10-04 11:48:04.193694429 +0200 +diff -up openssl-1.0.1g/crypto/sha/sha512.c.fips openssl-1.0.1g/crypto/sha/sha512.c +--- openssl-1.0.1g/crypto/sha/sha512.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/sha/sha512.c 2014-05-06 16:29:50.555923432 +0200 @@ -5,6 +5,10 @@ * ==================================================================== */ @@ -20270,9 +20277,9 @@ diff -up openssl-1.0.1e/crypto/sha/sha512.c.fips openssl-1.0.1e/crypto/sha/sha51 c->h[0]=U64(0x6a09e667f3bcc908); c->h[1]=U64(0xbb67ae8584caa73b); c->h[2]=U64(0x3c6ef372fe94f82b); -diff -up openssl-1.0.1e/crypto/whrlpool/wp_dgst.c.fips openssl-1.0.1e/crypto/whrlpool/wp_dgst.c ---- openssl-1.0.1e/crypto/whrlpool/wp_dgst.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/whrlpool/wp_dgst.c 2013-10-04 11:48:04.193694429 +0200 +diff -up openssl-1.0.1g/crypto/whrlpool/wp_dgst.c.fips openssl-1.0.1g/crypto/whrlpool/wp_dgst.c +--- openssl-1.0.1g/crypto/whrlpool/wp_dgst.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/crypto/whrlpool/wp_dgst.c 2014-05-06 16:29:50.555923432 +0200 @@ -55,7 +55,7 @@ #include #include @@ -20282,9 +20289,9 @@ diff -up openssl-1.0.1e/crypto/whrlpool/wp_dgst.c.fips openssl-1.0.1e/crypto/whr { memset (c,0,sizeof(*c)); return(1); -diff -up openssl-1.0.1e/Makefile.org.fips openssl-1.0.1e/Makefile.org ---- openssl-1.0.1e/Makefile.org.fips 2013-10-04 11:48:04.157693616 +0200 -+++ openssl-1.0.1e/Makefile.org 2013-10-04 11:48:04.193694429 +0200 +diff -up openssl-1.0.1g/Makefile.org.fips openssl-1.0.1g/Makefile.org +--- openssl-1.0.1g/Makefile.org.fips 2014-05-06 16:29:50.519922600 +0200 ++++ openssl-1.0.1g/Makefile.org 2014-05-06 16:29:50.556923456 +0200 @@ -136,6 +136,9 @@ FIPSCANLIB= BASEADDR= @@ -20312,10 +20319,10 @@ diff -up openssl-1.0.1e/Makefile.org.fips openssl-1.0.1e/Makefile.org THIS=$${THIS:-$@} MAKEFILE=Makefile MAKEOVERRIDES= # MAKEOVERRIDES= effectively "equalizes" GNU-ish and SysV-ish make flavors, # which in turn eliminates ambiguities in variable treatment with -e. -diff -up openssl-1.0.1e/ssl/d1_srvr.c.fips openssl-1.0.1e/ssl/d1_srvr.c ---- openssl-1.0.1e/ssl/d1_srvr.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/d1_srvr.c 2013-10-04 11:48:04.193694429 +0200 -@@ -1372,6 +1372,8 @@ int dtls1_send_server_key_exchange(SSL * +diff -up openssl-1.0.1g/ssl/d1_srvr.c.fips openssl-1.0.1g/ssl/d1_srvr.c +--- openssl-1.0.1g/ssl/d1_srvr.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/ssl/d1_srvr.c 2014-05-06 16:29:50.556923456 +0200 +@@ -1383,6 +1383,8 @@ int dtls1_send_server_key_exchange(SSL * j=0; for (num=2; num > 0; num--) { @@ -20324,9 +20331,9 @@ diff -up openssl-1.0.1e/ssl/d1_srvr.c.fips openssl-1.0.1e/ssl/d1_srvr.c EVP_DigestInit_ex(&md_ctx,(num == 2) ?s->ctx->md5:s->ctx->sha1, NULL); EVP_DigestUpdate(&md_ctx,&(s->s3->client_random[0]),SSL3_RANDOM_SIZE); -diff -up openssl-1.0.1e/ssl/ssl_algs.c.fips openssl-1.0.1e/ssl/ssl_algs.c ---- openssl-1.0.1e/ssl/ssl_algs.c.fips 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/ssl_algs.c 2013-10-04 11:48:04.193694429 +0200 +diff -up openssl-1.0.1g/ssl/ssl_algs.c.fips openssl-1.0.1g/ssl/ssl_algs.c +--- openssl-1.0.1g/ssl/ssl_algs.c.fips 2014-03-17 17:14:20.000000000 +0100 ++++ openssl-1.0.1g/ssl/ssl_algs.c 2014-05-06 16:29:50.556923456 +0200 @@ -64,6 +64,12 @@ int SSL_library_init(void) { diff --git a/openssl-1.0.1h-disable-sslv2v3.patch b/openssl-1.0.1h-disable-sslv2v3.patch new file mode 100644 index 0000000..7a028aa --- /dev/null +++ b/openssl-1.0.1h-disable-sslv2v3.patch @@ -0,0 +1,13 @@ +diff -up openssl-1.0.1h/ssl/ssl_lib.c.v2v3 openssl-1.0.1h/ssl/ssl_lib.c +--- openssl-1.0.1h/ssl/ssl_lib.c.v2v3 2014-06-11 16:02:52.000000000 +0200 ++++ openssl-1.0.1h/ssl/ssl_lib.c 2014-06-30 14:18:04.290248080 +0200 +@@ -1875,6 +1875,9 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *m + */ + ret->options |= SSL_OP_LEGACY_SERVER_CONNECT; + ++ /* Disable SSLv2 by default (affects the SSLv23_method() only) */ ++ ret->options |= SSL_OP_NO_SSLv2; ++ + return(ret); + err: + SSLerr(SSL_F_SSL_CTX_NEW,ERR_R_MALLOC_FAILURE); diff --git a/openssl-1.0.1c-ipv6-apps.patch b/openssl-1.0.1h-ipv6-apps.patch similarity index 86% rename from openssl-1.0.1c-ipv6-apps.patch rename to openssl-1.0.1h-ipv6-apps.patch index 41e0b36..db6b543 100644 --- a/openssl-1.0.1c-ipv6-apps.patch +++ b/openssl-1.0.1h-ipv6-apps.patch @@ -1,6 +1,6 @@ -diff -up openssl-1.0.1c/apps/s_apps.h.ipv6-apps openssl-1.0.1c/apps/s_apps.h ---- openssl-1.0.1c/apps/s_apps.h.ipv6-apps 2012-07-11 22:46:02.409221206 +0200 -+++ openssl-1.0.1c/apps/s_apps.h 2012-07-11 22:46:02.451222165 +0200 +diff -up openssl-1.0.1h/apps/s_apps.h.ipv6-apps openssl-1.0.1h/apps/s_apps.h +--- openssl-1.0.1h/apps/s_apps.h.ipv6-apps 2014-06-05 14:33:38.515668750 +0200 ++++ openssl-1.0.1h/apps/s_apps.h 2014-06-05 14:33:38.540669335 +0200 @@ -148,7 +148,7 @@ typedef fd_mask fd_set; #define PORT_STR "4433" #define PROTOCOL "tcp" @@ -23,10 +23,10 @@ diff -up openssl-1.0.1c/apps/s_apps.h.ipv6-apps openssl-1.0.1c/apps/s_apps.h long MS_CALLBACK bio_dump_callback(BIO *bio, int cmd, const char *argp, int argi, long argl, long ret); -diff -up openssl-1.0.1c/apps/s_client.c.ipv6-apps openssl-1.0.1c/apps/s_client.c ---- openssl-1.0.1c/apps/s_client.c.ipv6-apps 2012-07-11 22:46:02.433221754 +0200 -+++ openssl-1.0.1c/apps/s_client.c 2012-07-11 22:46:02.452222187 +0200 -@@ -563,7 +563,7 @@ int MAIN(int argc, char **argv) +diff -up openssl-1.0.1h/apps/s_client.c.ipv6-apps openssl-1.0.1h/apps/s_client.c +--- openssl-1.0.1h/apps/s_client.c.ipv6-apps 2014-06-05 14:33:38.533669171 +0200 ++++ openssl-1.0.1h/apps/s_client.c 2014-06-05 14:33:38.540669335 +0200 +@@ -567,7 +567,7 @@ int MAIN(int argc, char **argv) int cbuf_len,cbuf_off; int sbuf_len,sbuf_off; fd_set readfds,writefds; @@ -35,7 +35,7 @@ diff -up openssl-1.0.1c/apps/s_client.c.ipv6-apps openssl-1.0.1c/apps/s_client.c int full_log=1; char *host=SSL_HOST_NAME; char *cert_file=NULL,*key_file=NULL; -@@ -664,13 +664,12 @@ int MAIN(int argc, char **argv) +@@ -668,13 +668,12 @@ int MAIN(int argc, char **argv) else if (strcmp(*argv,"-port") == 0) { if (--argc < 1) goto bad; @@ -51,7 +51,7 @@ diff -up openssl-1.0.1c/apps/s_client.c.ipv6-apps openssl-1.0.1c/apps/s_client.c goto bad; } else if (strcmp(*argv,"-verify") == 0) -@@ -1253,7 +1252,7 @@ bad: +@@ -1267,7 +1266,7 @@ bad: re_start: @@ -60,10 +60,10 @@ diff -up openssl-1.0.1c/apps/s_client.c.ipv6-apps openssl-1.0.1c/apps/s_client.c { BIO_printf(bio_err,"connect:errno=%d\n",get_last_socket_error()); SHUTDOWN(s); -diff -up openssl-1.0.1c/apps/s_server.c.ipv6-apps openssl-1.0.1c/apps/s_server.c ---- openssl-1.0.1c/apps/s_server.c.ipv6-apps 2012-07-11 22:46:02.434221777 +0200 -+++ openssl-1.0.1c/apps/s_server.c 2012-07-11 22:46:02.453222210 +0200 -@@ -929,7 +929,7 @@ int MAIN(int argc, char *argv[]) +diff -up openssl-1.0.1h/apps/s_server.c.ipv6-apps openssl-1.0.1h/apps/s_server.c +--- openssl-1.0.1h/apps/s_server.c.ipv6-apps 2014-06-05 14:33:38.533669171 +0200 ++++ openssl-1.0.1h/apps/s_server.c 2014-06-05 14:33:38.541669358 +0200 +@@ -933,7 +933,7 @@ int MAIN(int argc, char *argv[]) { X509_VERIFY_PARAM *vpm = NULL; int badarg = 0; @@ -72,7 +72,7 @@ diff -up openssl-1.0.1c/apps/s_server.c.ipv6-apps openssl-1.0.1c/apps/s_server.c char *CApath=NULL,*CAfile=NULL; unsigned char *context = NULL; char *dhfile = NULL; -@@ -1000,8 +1000,7 @@ int MAIN(int argc, char *argv[]) +@@ -1004,8 +1004,7 @@ int MAIN(int argc, char *argv[]) (strcmp(*argv,"-accept") == 0)) { if (--argc < 1) goto bad; @@ -82,7 +82,7 @@ diff -up openssl-1.0.1c/apps/s_server.c.ipv6-apps openssl-1.0.1c/apps/s_server.c } else if (strcmp(*argv,"-verify") == 0) { -@@ -1878,9 +1877,9 @@ bad: +@@ -1892,9 +1891,9 @@ bad: BIO_printf(bio_s_out,"ACCEPT\n"); (void)BIO_flush(bio_s_out); if (www) @@ -94,9 +94,9 @@ diff -up openssl-1.0.1c/apps/s_server.c.ipv6-apps openssl-1.0.1c/apps/s_server.c print_stats(bio_s_out,ctx); ret=0; end: -diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c ---- openssl-1.0.1c/apps/s_socket.c.ipv6-apps 2011-12-02 15:39:40.000000000 +0100 -+++ openssl-1.0.1c/apps/s_socket.c 2012-07-11 22:49:05.411400450 +0200 +diff -up openssl-1.0.1h/apps/s_socket.c.ipv6-apps openssl-1.0.1h/apps/s_socket.c +--- openssl-1.0.1h/apps/s_socket.c.ipv6-apps 2014-06-05 11:44:33.000000000 +0200 ++++ openssl-1.0.1h/apps/s_socket.c 2014-06-05 14:39:53.226442195 +0200 @@ -102,9 +102,7 @@ static struct hostent *GetHostByName(cha static void ssl_sock_cleanup(void); #endif @@ -108,7 +108,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c static int do_accept(int acc_sock, int *sock, char **host); static int host_ip(char *str, unsigned char ip[4]); -@@ -234,57 +232,70 @@ static int ssl_sock_init(void) +@@ -234,57 +232,71 @@ static int ssl_sock_init(void) return(1); } @@ -178,7 +178,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c { - i=0; - i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i)); -- if (i < 0) { perror("keepalive"); return(0); } +- if (i < 0) { closesocket(s); perror("keepalive"); return(0); } + int i=0; + i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE, + (char *)&i,sizeof(i)); @@ -207,6 +207,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c + res = res->ai_next; + } + freeaddrinfo(res0); ++ closesocket(s); + + perror(failed_call); + return(0); @@ -216,7 +217,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c { int sock; char *name = NULL; -@@ -322,33 +333,50 @@ int do_server(int port, int type, int *r +@@ -322,33 +334,50 @@ int do_server(int port, int type, int *r } } @@ -288,7 +289,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c #if defined SOL_SOCKET && defined SO_REUSEADDR { int j = 1; -@@ -356,35 +384,49 @@ static int init_server_long(int *sock, i +@@ -356,35 +385,49 @@ static int init_server_long(int *sock, i (void *) &j, sizeof j); } #endif @@ -355,7 +356,16 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c int len; /* struct linger ling; */ -@@ -431,135 +473,58 @@ redoit: +@@ -424,145 +467,66 @@ redoit: + ling.l_onoff=1; + ling.l_linger=0; + i=setsockopt(ret,SOL_SOCKET,SO_LINGER,(char *)&ling,sizeof(ling)); +- if (i < 0) { perror("linger"); return(0); } ++ if (i < 0) { closesocket(ret); perror("linger"); return(0); } + i=0; + i=setsockopt(ret,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i)); +- if (i < 0) { perror("keepalive"); return(0); } ++ if (i < 0) { closesocket(ret); perror("keepalive"); return(0); } */ if (host == NULL) goto end; @@ -384,6 +394,7 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c + if ((*host=(char *)OPENSSL_malloc(strlen(buffer)+1)) == NULL) { perror("OPENSSL_malloc"); + closesocket(ret); return(0); } - BUF_strlcpy(*host,h1->h_name,strlen(h1->h_name)+1); @@ -392,11 +403,13 @@ diff -up openssl-1.0.1c/apps/s_socket.c.ipv6-apps openssl-1.0.1c/apps/s_socket.c - if (h2 == NULL) - { - BIO_printf(bio_err,"gethostbyname failure\n"); +- closesocket(ret); - return(0); - } - if (h2->h_addrtype != AF_INET) - { - BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n"); +- closesocket(ret); - return(0); - } + strcpy(*host, buffer); diff --git a/openssl-1.0.1h-system-cipherlist.patch b/openssl-1.0.1h-system-cipherlist.patch new file mode 100644 index 0000000..c7c5106 --- /dev/null +++ b/openssl-1.0.1h-system-cipherlist.patch @@ -0,0 +1,289 @@ +diff -up openssl-1.0.1h/Configure.system openssl-1.0.1h/Configure +--- openssl-1.0.1h/Configure.system 2014-06-05 14:47:37.509312875 +0200 ++++ openssl-1.0.1h/Configure 2014-06-11 14:05:28.560359069 +0200 +@@ -10,7 +10,7 @@ use strict; + + # see INSTALL for instructions. + +-my $usage="Usage: Configure [no- ...] [enable- ...] [experimental- ...] [-Dxxx] [-lxxx] [-Lxxx] [-fxxx] [-Kxxx] [no-hw-xxx|no-hw] [[no-]threads] [[no-]shared] [[no-]zlib|zlib-dynamic] [no-asm] [no-dso] [no-krb5] [sctp] [386] [--prefix=DIR] [--openssldir=OPENSSLDIR] [--with-xxx[=vvv]] [--test-sanity] os/compiler[:flags]\n"; ++my $usage="Usage: Configure [no- ...] [enable- ...] [experimental- ...] [-Dxxx] [-lxxx] [-Lxxx] [-fxxx] [-Kxxx] [no-hw-xxx|no-hw] [[no-]threads] [[no-]shared] [[no-]zlib|zlib-dynamic] [no-asm] [no-dso] [no-krb5] [sctp] [386] [--prefix=DIR] [--openssldir=OPENSSLDIR] [--system-ciphers-file=SYSTEMCIPHERFILE] [--with-xxx[=vvv]] [--test-sanity] os/compiler[:flags]\n"; + + # Options: + # +@@ -35,6 +35,9 @@ my $usage="Usage: Configure [no- + # --with-krb5-flavor Declare what flavor of Kerberos 5 is used. Currently + # supported values are "MIT" and "Heimdal". A value is required. + # ++# --system-ciphers-file A file to read cipher string from when the PROFILE=SYSTEM ++# cipher is specified (default). ++# + # --test-sanity Make a number of sanity checks on the data in this file. + # This is a debugging tool for OpenSSL developers. + # +@@ -663,6 +666,7 @@ my $prefix=""; + my $libdir=""; + my $openssldir=""; + my $enginesdir=""; ++my $system_ciphers_file=""; + my $exe_ext=""; + my $install_prefix= "$ENV{'INSTALL_PREFIX'}"; + my $cross_compile_prefix=""; +@@ -895,6 +899,10 @@ PROCESS_ARGS: + { + $enginesdir=$1; + } ++ elsif (/^--system-ciphers-file=(.*)$/) ++ { ++ $system_ciphers_file=$1; ++ } + elsif (/^--install.prefix=(.*)$/) + { + $install_prefix=$1; +@@ -1053,6 +1061,7 @@ print "Configuring for $target\n"; + + &usage if (!defined($table{$target})); + ++chop $system_ciphers_file if $system_ciphers_file =~ /\/$/; + + foreach (sort (keys %disabled)) + { +@@ -1607,6 +1616,7 @@ while () + s/^INSTALLTOP=.*$/INSTALLTOP=$prefix/; + s/^MULTILIB=.*$/MULTILIB=$multilib/; + s/^OPENSSLDIR=.*$/OPENSSLDIR=$openssldir/; ++ s/^SYSTEM_CIPHERS_FILE=.*$/SYSTEM_CIPHERS_FILE=$system_ciphers_file/; + s/^LIBDIR=.*$/LIBDIR=$libdir/; + s/^INSTALL_PREFIX=.*$/INSTALL_PREFIX=$install_prefix/; + s/^PLATFORM=.*$/PLATFORM=$target/; +@@ -1812,6 +1822,14 @@ while () + $foo =~ s/\\/\\\\/g; + print OUT "#define ENGINESDIR \"$foo\"\n"; + } ++ elsif (/^#((define)|(undef))\s+SYSTEM_CIPHERS_FILE/) ++ { ++ my $foo = "$system_ciphers_file"; ++ if ($foo ne '') { ++ $foo =~ s/\\/\\\\/g; ++ print OUT "#define SYSTEM_CIPHERS_FILE \"$foo\"\n"; ++ } ++ } + elsif (/^#((define)|(undef))\s+OPENSSL_EXPORT_VAR_AS_FUNCTION/) + { printf OUT "#undef OPENSSL_EXPORT_VAR_AS_FUNCTION\n" + if $export_var_as_fn; +diff -up openssl-1.0.1h/crypto/opensslconf.h.in.system openssl-1.0.1h/crypto/opensslconf.h.in +--- openssl-1.0.1h/crypto/opensslconf.h.in.system 2014-06-05 14:47:37.437311188 +0200 ++++ openssl-1.0.1h/crypto/opensslconf.h.in 2014-06-11 13:31:44.497722345 +0200 +@@ -25,6 +25,8 @@ + #endif + #endif + ++#undef SYSTEM_CIPHERS_FILE ++ + #undef OPENSSL_UNISTD + #define OPENSSL_UNISTD + +diff -up openssl-1.0.1h/ssl/ssl_ciph.c.system openssl-1.0.1h/ssl/ssl_ciph.c +--- openssl-1.0.1h/ssl/ssl_ciph.c.system 2014-06-05 14:47:37.441311282 +0200 ++++ openssl-1.0.1h/ssl/ssl_ciph.c 2014-06-11 13:55:28.194381937 +0200 +@@ -1352,6 +1352,54 @@ static int ssl_cipher_process_rulestr(co + return(retval); + } + ++#ifdef SYSTEM_CIPHERS_FILE ++static char* load_system_str(const char* suffix) ++ { ++ FILE* fp; ++ char buf[1024]; ++ char *new_rules; ++ unsigned len, slen; ++ ++ fp = fopen(SYSTEM_CIPHERS_FILE, "r"); ++ if (fp == NULL || fgets(buf, sizeof(buf), fp) == NULL) ++ { ++ /* cannot open or file is empty */ ++ snprintf(buf, sizeof(buf), "%s", SSL_DEFAULT_CIPHER_LIST); ++ } ++ ++ if (fp) ++ fclose(fp); ++ ++ slen = strlen(suffix); ++ len = strlen(buf); ++ ++ if (buf[len-1] == '\n') ++ { ++ len--; ++ buf[len] = 0; ++ } ++ if (buf[len-1] == '\r') ++ { ++ len--; ++ buf[len] = 0; ++ } ++ ++ new_rules = OPENSSL_malloc(len + slen + 1); ++ if (new_rules == 0) ++ return NULL; ++ ++ memcpy(new_rules, buf, len); ++ if (slen > 0) ++ { ++ memcpy(&new_rules[len], suffix, slen); ++ len += slen; ++ } ++ new_rules[len] = 0; ++ ++ return new_rules; ++ } ++#endif ++ + STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *ssl_method, + STACK_OF(SSL_CIPHER) **cipher_list, + STACK_OF(SSL_CIPHER) **cipher_list_by_id, +@@ -1359,16 +1407,29 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_ + { + int ok, num_of_ciphers, num_of_alias_max, num_of_group_aliases; + unsigned long disabled_mkey, disabled_auth, disabled_enc, disabled_mac, disabled_ssl; +- STACK_OF(SSL_CIPHER) *cipherstack, *tmp_cipher_list; ++ STACK_OF(SSL_CIPHER) *cipherstack = NULL, *tmp_cipher_list; + const char *rule_p; + CIPHER_ORDER *co_list = NULL, *head = NULL, *tail = NULL, *curr; + const SSL_CIPHER **ca_list = NULL; ++#ifdef SYSTEM_CIPHERS_FILE ++ char *new_rules = NULL; ++ ++ if (rule_str != NULL && strncmp(rule_str, "PROFILE=SYSTEM", 14) == 0) ++ { ++ char *p = rule_str + 14; ++ ++ new_rules = load_system_str(p); ++ rule_str = new_rules; ++ } ++#endif + + /* + * Return with error if nothing to do. + */ + if (rule_str == NULL || cipher_list == NULL || cipher_list_by_id == NULL) +- return NULL; ++ { ++ goto end; ++ } + + /* + * To reduce the work to do we only want to process the compiled +@@ -1389,7 +1450,7 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_ + if (co_list == NULL) + { + SSLerr(SSL_F_SSL_CREATE_CIPHER_LIST,ERR_R_MALLOC_FAILURE); +- return(NULL); /* Failure */ ++ goto end; + } + + ssl_cipher_collect_ciphers(ssl_method, num_of_ciphers, +@@ -1431,8 +1492,7 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_ + * in force within each class */ + if (!ssl_cipher_strength_sort(&head, &tail)) + { +- OPENSSL_free(co_list); +- return NULL; ++ goto end; + } + + /* Now disable everything (maintaining the ordering!) */ +@@ -1452,9 +1512,8 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_ + ca_list = OPENSSL_malloc(sizeof(SSL_CIPHER *) * num_of_alias_max); + if (ca_list == NULL) + { +- OPENSSL_free(co_list); + SSLerr(SSL_F_SSL_CREATE_CIPHER_LIST,ERR_R_MALLOC_FAILURE); +- return(NULL); /* Failure */ ++ goto end; + } + ssl_cipher_collect_aliases(ca_list, num_of_group_aliases, + disabled_mkey, disabled_auth, disabled_enc, +@@ -1482,8 +1541,7 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_ + + if (!ok) + { /* Rule processing failure */ +- OPENSSL_free(co_list); +- return(NULL); ++ goto end; + } + + /* +@@ -1492,8 +1550,7 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_ + */ + if ((cipherstack = sk_SSL_CIPHER_new_null()) == NULL) + { +- OPENSSL_free(co_list); +- return(NULL); ++ goto end; + } + + /* +@@ -1514,13 +1571,13 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_ + #endif + } + } +- OPENSSL_free(co_list); /* Not needed any longer */ + + tmp_cipher_list = sk_SSL_CIPHER_dup(cipherstack); + if (tmp_cipher_list == NULL) + { + sk_SSL_CIPHER_free(cipherstack); +- return NULL; ++ cipherstack = NULL; ++ goto end; + } + if (*cipher_list != NULL) + sk_SSL_CIPHER_free(*cipher_list); +@@ -1531,6 +1588,12 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_ + (void)sk_SSL_CIPHER_set_cmp_func(*cipher_list_by_id,ssl_cipher_ptr_id_cmp); + + sk_SSL_CIPHER_sort(*cipher_list_by_id); ++ ++end: ++ OPENSSL_free(co_list); ++#ifdef SYSTEM_CIPHERS_FILE ++ OPENSSL_free(new_rules); ++#endif + return(cipherstack); + } + +diff -up openssl-1.0.1h/ssl/ssl.h.system openssl-1.0.1h/ssl/ssl.h +--- openssl-1.0.1h/ssl/ssl.h.system 2014-06-05 14:47:37.000000000 +0200 ++++ openssl-1.0.1h/ssl/ssl.h 2014-06-11 14:08:35.243461447 +0200 +@@ -338,6 +338,11 @@ extern "C" { + * (The latter are not actually enabled by ALL, but "ALL:RSA" would enable + * some of them.) + */ ++#ifdef SYSTEM_CIPHERS_FILE ++#define SSL_SYSTEM_DEFAULT_CIPHER_LIST "PROFILE=SYSTEM" ++#else ++#define SSL_SYSTEM_DEFAULT_CIPHER_LIST SSL_DEFAULT_CIPHER_LIST ++#endif + + /* Used in SSL_set_shutdown()/SSL_get_shutdown(); */ + #define SSL_SENT_SHUTDOWN 1 +diff -up openssl-1.0.1h/ssl/ssl_lib.c.system openssl-1.0.1h/ssl/ssl_lib.c +--- openssl-1.0.1h/ssl/ssl_lib.c.system 2014-06-05 11:44:33.000000000 +0200 ++++ openssl-1.0.1h/ssl/ssl_lib.c 2014-06-11 13:59:40.696578139 +0200 +@@ -263,7 +263,7 @@ int SSL_CTX_set_ssl_version(SSL_CTX *ctx + + sk=ssl_create_cipher_list(ctx->method,&(ctx->cipher_list), + &(ctx->cipher_list_by_id), +- meth->version == SSL2_VERSION ? "SSLv2" : SSL_DEFAULT_CIPHER_LIST); ++ meth->version == SSL2_VERSION ? "SSLv2" : SSL_SYSTEM_DEFAULT_CIPHER_LIST); + if ((sk == NULL) || (sk_SSL_CIPHER_num(sk) <= 0)) + { + SSLerr(SSL_F_SSL_CTX_SET_SSL_VERSION,SSL_R_SSL_LIBRARY_HAS_NO_CIPHERS); +@@ -1767,7 +1767,7 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *m + + ssl_create_cipher_list(ret->method, + &ret->cipher_list,&ret->cipher_list_by_id, +- meth->version == SSL2_VERSION ? "SSLv2" : SSL_DEFAULT_CIPHER_LIST); ++ meth->version == SSL2_VERSION ? "SSLv2" : SSL_SYSTEM_DEFAULT_CIPHER_LIST); + if (ret->cipher_list == NULL + || sk_SSL_CIPHER_num(ret->cipher_list) <= 0) + { diff --git a/openssl-1.0.1a-algo-doc.patch b/openssl-1.0.1i-algo-doc.patch similarity index 80% rename from openssl-1.0.1a-algo-doc.patch rename to openssl-1.0.1i-algo-doc.patch index c4aaa89..a19877d 100644 --- a/openssl-1.0.1a-algo-doc.patch +++ b/openssl-1.0.1i-algo-doc.patch @@ -1,6 +1,6 @@ -diff -up openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod.algo-doc openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod ---- openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod.algo-doc 2012-04-11 00:28:22.000000000 +0200 -+++ openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod 2012-04-20 09:14:01.865167011 +0200 +diff -up openssl-1.0.1i/doc/crypto/EVP_DigestInit.pod.algo-doc openssl-1.0.1i/doc/crypto/EVP_DigestInit.pod +--- openssl-1.0.1i/doc/crypto/EVP_DigestInit.pod.algo-doc 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/doc/crypto/EVP_DigestInit.pod 2014-08-07 11:18:01.290773970 +0200 @@ -75,7 +75,7 @@ EVP_MD_CTX_create() allocates, initializ EVP_DigestInit_ex() sets up digest context B to use a digest @@ -10,9 +10,9 @@ diff -up openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod.algo-doc openssl-1.0.1a/do If B is NULL then the default implementation of digest B is used. EVP_DigestUpdate() hashes B bytes of data at B into the -@@ -165,7 +165,8 @@ EVP_MD_size(), EVP_MD_block_size(), EVP_ - EVP_MD_CTX_block_size() and EVP_MD_block_size() return the digest or block - size in bytes. +@@ -164,7 +164,8 @@ corresponding OBJECT IDENTIFIER or NID_u + EVP_MD_size(), EVP_MD_block_size(), EVP_MD_CTX_size() and + EVP_MD_CTX_block_size() return the digest or block size in bytes. -EVP_md_null(), EVP_md2(), EVP_md5(), EVP_sha(), EVP_sha1(), EVP_dss(), +EVP_md_null(), EVP_md2(), EVP_md5(), EVP_sha(), EVP_sha1(), @@ -20,9 +20,9 @@ diff -up openssl-1.0.1a/doc/crypto/EVP_DigestInit.pod.algo-doc openssl-1.0.1a/do EVP_dss1(), EVP_mdc2() and EVP_ripemd160() return pointers to the corresponding EVP_MD structures. -diff -up openssl-1.0.1a/doc/crypto/EVP_EncryptInit.pod.algo-doc openssl-1.0.1a/doc/crypto/EVP_EncryptInit.pod ---- openssl-1.0.1a/doc/crypto/EVP_EncryptInit.pod.algo-doc 2005-04-15 18:01:35.000000000 +0200 -+++ openssl-1.0.1a/doc/crypto/EVP_EncryptInit.pod 2012-04-20 09:10:59.114736465 +0200 +diff -up openssl-1.0.1i/doc/crypto/EVP_EncryptInit.pod.algo-doc openssl-1.0.1i/doc/crypto/EVP_EncryptInit.pod +--- openssl-1.0.1i/doc/crypto/EVP_EncryptInit.pod.algo-doc 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/doc/crypto/EVP_EncryptInit.pod 2014-08-07 10:55:25.100638252 +0200 @@ -91,6 +91,32 @@ EVP_CIPHER_CTX_set_padding - EVP cipher int EVP_CIPHER_param_to_asn1(EVP_CIPHER_CTX *c, ASN1_TYPE *type); int EVP_CIPHER_asn1_to_param(EVP_CIPHER_CTX *c, ASN1_TYPE *type); diff --git a/openssl-1.0.1i-manfix.patch b/openssl-1.0.1i-manfix.patch new file mode 100644 index 0000000..f2f8be7 --- /dev/null +++ b/openssl-1.0.1i-manfix.patch @@ -0,0 +1,86 @@ +diff -up openssl-1.0.1i/doc/apps/ec.pod.manfix openssl-1.0.1i/doc/apps/ec.pod +--- openssl-1.0.1i/doc/apps/ec.pod.manfix 2014-07-22 21:41:23.000000000 +0200 ++++ openssl-1.0.1i/doc/apps/ec.pod 2014-08-07 11:21:57.258887741 +0200 +@@ -93,10 +93,6 @@ prints out the public, private key compo + + this option prevents output of the encoded version of the key. + +-=item B<-modulus> +- +-this option prints out the value of the public key component of the key. +- + =item B<-pubin> + + by default a private key is read from the input file: with this option a +diff -up openssl-1.0.1i/doc/apps/openssl.pod.manfix openssl-1.0.1i/doc/apps/openssl.pod +--- openssl-1.0.1i/doc/apps/openssl.pod.manfix 2014-07-22 21:43:11.000000000 +0200 ++++ openssl-1.0.1i/doc/apps/openssl.pod 2014-08-07 11:21:57.259887746 +0200 +@@ -163,7 +163,7 @@ Create or examine a netscape certificate + + Online Certificate Status Protocol utility. + +-=item L|passwd(1)> ++=item L|sslpasswd(1)> + + Generation of hashed passwords. + +@@ -187,7 +187,7 @@ Public key algorithm parameter managemen + + Public key algorithm cryptographic operation utility. + +-=item L|rand(1)> ++=item L|sslrand(1)> + + Generate pseudo-random bytes. + +@@ -401,9 +401,9 @@ L, L, L, L, + L, L, L, + L, L, L, +-L, ++L, + L, L, L, +-L, L, L, ++L, L, L, + L, L, + L, L, + L, L, +diff -up openssl-1.0.1i/doc/apps/s_client.pod.manfix openssl-1.0.1i/doc/apps/s_client.pod +--- openssl-1.0.1i/doc/apps/s_client.pod.manfix 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/doc/apps/s_client.pod 2014-08-07 11:24:28.736604443 +0200 +@@ -34,9 +34,14 @@ B B + [B<-ssl2>] + [B<-ssl3>] + [B<-tls1>] ++[B<-tls1_1>] ++[B<-tls1_2>] ++[B<-dtls1>] + [B<-no_ssl2>] + [B<-no_ssl3>] + [B<-no_tls1>] ++[B<-no_tls1_1>] ++[B<-no_tls1_2>] + [B<-bugs>] + [B<-cipher cipherlist>] + [B<-serverpref>] +@@ -196,7 +201,7 @@ Use the PSK key B when using a PSK + given as a hexadecimal number without leading 0x, for example -psk + 1a2b3c4d. + +-=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1> ++=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-tls1_1>, B<-tls1_2>, B<-dtls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2> + + these options disable the use of certain SSL or TLS protocols. By default + the initial handshake uses a method which should be compatible with all +diff -up openssl-1.0.1i/doc/apps/s_server.pod.manfix openssl-1.0.1i/doc/apps/s_server.pod +--- openssl-1.0.1i/doc/apps/s_server.pod.manfix 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/doc/apps/s_server.pod 2014-08-07 11:21:57.259887746 +0200 +@@ -216,7 +216,7 @@ Use the PSK key B when using a PSK + given as a hexadecimal number without leading 0x, for example -psk + 1a2b3c4d. + +-=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1> ++=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-tls1_1>, B<-tls1_2>, B<-dtls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2> + + these options disable the use of certain SSL or TLS protocols. By default + the initial handshake uses a method which should be compatible with all diff --git a/openssl-1.0.1e-new-fips-reqs.patch b/openssl-1.0.1i-new-fips-reqs.patch similarity index 73% rename from openssl-1.0.1e-new-fips-reqs.patch rename to openssl-1.0.1i-new-fips-reqs.patch index 4afdde1..e33494c 100644 --- a/openssl-1.0.1e-new-fips-reqs.patch +++ b/openssl-1.0.1i-new-fips-reqs.patch @@ -1,6 +1,6 @@ -diff -up openssl-1.0.1e/crypto/bn/bn_rand.c.fips-reqs openssl-1.0.1e/crypto/bn/bn_rand.c ---- openssl-1.0.1e/crypto/bn/bn_rand.c.fips-reqs 2013-02-11 16:02:47.000000000 +0100 -+++ openssl-1.0.1e/crypto/bn/bn_rand.c 2014-02-13 16:50:34.280893285 +0100 +diff -up openssl-1.0.1i/crypto/bn/bn_rand.c.fips-reqs openssl-1.0.1i/crypto/bn/bn_rand.c +--- openssl-1.0.1i/crypto/bn/bn_rand.c.fips-reqs 2014-07-22 21:43:11.000000000 +0200 ++++ openssl-1.0.1i/crypto/bn/bn_rand.c 2014-08-13 19:58:06.818832577 +0200 @@ -138,9 +138,12 @@ static int bnrand(int pseudorand, BIGNUM goto err; } @@ -17,9 +17,9 @@ diff -up openssl-1.0.1e/crypto/bn/bn_rand.c.fips-reqs openssl-1.0.1e/crypto/bn/b if (pseudorand) { -diff -up openssl-1.0.1e/crypto/dh/dh_gen.c.fips-reqs openssl-1.0.1e/crypto/dh/dh_gen.c ---- openssl-1.0.1e/crypto/dh/dh_gen.c.fips-reqs 2013-12-18 12:17:09.748636614 +0100 -+++ openssl-1.0.1e/crypto/dh/dh_gen.c 2013-12-18 12:17:09.798637687 +0100 +diff -up openssl-1.0.1i/crypto/dh/dh_gen.c.fips-reqs openssl-1.0.1i/crypto/dh/dh_gen.c +--- openssl-1.0.1i/crypto/dh/dh_gen.c.fips-reqs 2014-08-13 19:58:06.765831356 +0200 ++++ openssl-1.0.1i/crypto/dh/dh_gen.c 2014-08-13 19:58:06.818832577 +0200 @@ -125,7 +125,7 @@ static int dh_builtin_genparams(DH *ret, return 0; } @@ -29,9 +29,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh_gen.c.fips-reqs openssl-1.0.1e/crypto/dh/dh { DHerr(DH_F_DH_BUILTIN_GENPARAMS, DH_R_KEY_SIZE_TOO_SMALL); goto err; -diff -up openssl-1.0.1e/crypto/dh/dh.h.fips-reqs openssl-1.0.1e/crypto/dh/dh.h ---- openssl-1.0.1e/crypto/dh/dh.h.fips-reqs 2013-12-18 12:17:09.748636614 +0100 -+++ openssl-1.0.1e/crypto/dh/dh.h 2013-12-18 12:17:09.798637687 +0100 +diff -up openssl-1.0.1i/crypto/dh/dh.h.fips-reqs openssl-1.0.1i/crypto/dh/dh.h +--- openssl-1.0.1i/crypto/dh/dh.h.fips-reqs 2014-08-13 19:58:06.765831356 +0200 ++++ openssl-1.0.1i/crypto/dh/dh.h 2014-08-13 19:58:06.818832577 +0200 @@ -78,6 +78,7 @@ #endif @@ -40,9 +40,9 @@ diff -up openssl-1.0.1e/crypto/dh/dh.h.fips-reqs openssl-1.0.1e/crypto/dh/dh.h #define DH_FLAG_CACHE_MONT_P 0x01 #define DH_FLAG_NO_EXP_CONSTTIME 0x02 /* new with 0.9.7h; the built-in DH -diff -up openssl-1.0.1e/crypto/dh/dh_check.c.fips-reqs openssl-1.0.1e/crypto/dh/dh_check.c ---- openssl-1.0.1e/crypto/dh/dh_check.c.fips-reqs 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/dh/dh_check.c 2013-12-18 12:17:09.799637708 +0100 +diff -up openssl-1.0.1i/crypto/dh/dh_check.c.fips-reqs openssl-1.0.1i/crypto/dh/dh_check.c +--- openssl-1.0.1i/crypto/dh/dh_check.c.fips-reqs 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/dh/dh_check.c 2014-08-13 19:58:06.818832577 +0200 @@ -134,7 +134,33 @@ int DH_check_pub_key(const DH *dh, const BN_sub_word(q,1); if (BN_cmp(pub_key,q)>=0) @@ -77,25 +77,26 @@ diff -up openssl-1.0.1e/crypto/dh/dh_check.c.fips-reqs openssl-1.0.1e/crypto/dh/ ok = 1; err: if (q != NULL) BN_free(q); -diff -up openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips-reqs openssl-1.0.1e/crypto/dsa/dsa_gen.c ---- openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips-reqs 2013-12-18 12:17:09.749636636 +0100 -+++ openssl-1.0.1e/crypto/dsa/dsa_gen.c 2013-12-18 12:17:09.799637708 +0100 -@@ -159,7 +159,6 @@ int dsa_builtin_paramgen(DSA *ret, size_ +diff -up openssl-1.0.1i/crypto/dsa/dsa_gen.c.fips-reqs openssl-1.0.1i/crypto/dsa/dsa_gen.c +--- openssl-1.0.1i/crypto/dsa/dsa_gen.c.fips-reqs 2014-08-13 19:58:06.766831380 +0200 ++++ openssl-1.0.1i/crypto/dsa/dsa_gen.c 2014-08-13 19:58:06.818832577 +0200 +@@ -159,7 +159,7 @@ int dsa_builtin_paramgen(DSA *ret, size_ } if (FIPS_module_mode() && - (bits != 1024 || qbits != 160) && ++ (getenv("OPENSSL_ENFORCE_MODULUS_BITS") || bits != 1024 || qbits != 160) && (bits != 2048 || qbits != 224) && (bits != 2048 || qbits != 256) && (bits != 3072 || qbits != 256)) -diff -up openssl-1.0.1e/crypto/dsa/dsa.h.fips-reqs openssl-1.0.1e/crypto/dsa/dsa.h ---- openssl-1.0.1e/crypto/dsa/dsa.h.fips-reqs 2013-12-18 12:17:09.749636636 +0100 -+++ openssl-1.0.1e/crypto/dsa/dsa.h 2013-12-18 12:17:09.799637708 +0100 +diff -up openssl-1.0.1i/crypto/dsa/dsa.h.fips-reqs openssl-1.0.1i/crypto/dsa/dsa.h +--- openssl-1.0.1i/crypto/dsa/dsa.h.fips-reqs 2014-08-13 19:58:06.766831380 +0200 ++++ openssl-1.0.1i/crypto/dsa/dsa.h 2014-08-13 19:58:06.818832577 +0200 @@ -89,6 +89,7 @@ #endif #define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS 1024 -+#define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS_GEN 2048 ++#define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS_GEN (getenv("OPENSSL_ENFORCE_MODULUS_BITS")?2048:1024) #define DSA_FLAG_CACHE_MONT_P 0x01 #define DSA_FLAG_NO_EXP_CONSTTIME 0x02 /* new with 0.9.7h; the built-in DSA @@ -112,9 +113,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa.h.fips-reqs openssl-1.0.1e/crypto/dsa/dsa #define DSA_is_prime(n, callback, cb_arg) \ BN_is_prime(n, DSS_prime_checks, callback, NULL, cb_arg) -diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips-reqs openssl-1.0.1e/crypto/dsa/dsa_key.c ---- openssl-1.0.1e/crypto/dsa/dsa_key.c.fips-reqs 2013-12-18 12:17:09.797637665 +0100 -+++ openssl-1.0.1e/crypto/dsa/dsa_key.c 2013-12-18 12:17:09.799637708 +0100 +diff -up openssl-1.0.1i/crypto/dsa/dsa_key.c.fips-reqs openssl-1.0.1i/crypto/dsa/dsa_key.c +--- openssl-1.0.1i/crypto/dsa/dsa_key.c.fips-reqs 2014-08-13 19:58:06.816832531 +0200 ++++ openssl-1.0.1i/crypto/dsa/dsa_key.c 2014-08-13 19:58:06.818832577 +0200 @@ -127,7 +127,7 @@ static int dsa_builtin_keygen(DSA *dsa) #ifdef OPENSSL_FIPS @@ -124,9 +125,9 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips-reqs openssl-1.0.1e/crypto/dsa { DSAerr(DSA_F_DSA_BUILTIN_KEYGEN, DSA_R_KEY_SIZE_TOO_SMALL); goto err; -diff -up openssl-1.0.1e/crypto/fips/fips_dh_selftest.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_dh_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_dh_selftest.c.fips-reqs 2013-12-18 17:06:36.575114314 +0100 -+++ openssl-1.0.1e/crypto/fips/fips_dh_selftest.c 2013-12-18 17:26:14.409036334 +0100 +diff -up openssl-1.0.1i/crypto/fips/fips_dh_selftest.c.fips-reqs openssl-1.0.1i/crypto/fips/fips_dh_selftest.c +--- openssl-1.0.1i/crypto/fips/fips_dh_selftest.c.fips-reqs 2014-08-13 19:58:06.819832600 +0200 ++++ openssl-1.0.1i/crypto/fips/fips_dh_selftest.c 2014-08-13 19:58:06.819832600 +0200 @@ -0,0 +1,162 @@ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. @@ -290,92 +291,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_dh_selftest.c.fips-reqs openssl-1.0.1e/ + return ret; + } +#endif -diff -up openssl-1.0.1e/crypto/fips/fips_drbg_rand.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_drbg_rand.c ---- openssl-1.0.1e/crypto/fips/fips_drbg_rand.c.fips-reqs 2013-12-18 12:17:09.757636808 +0100 -+++ openssl-1.0.1e/crypto/fips/fips_drbg_rand.c 2013-12-18 18:53:33.263711297 +0100 -@@ -77,7 +77,8 @@ static int fips_drbg_bytes(unsigned char - int rv = 0; - unsigned char *adin = NULL; - size_t adinlen = 0; -- CRYPTO_w_lock(CRYPTO_LOCK_RAND); -+ int locked; -+ locked = private_RAND_lock(1); - do - { - size_t rcnt; -@@ -109,7 +110,8 @@ static int fips_drbg_bytes(unsigned char - while (count); - rv = 1; - err: -- CRYPTO_w_unlock(CRYPTO_LOCK_RAND); -+ if (locked) -+ private_RAND_lock(0); - return rv; - } - -@@ -124,35 +126,51 @@ static int fips_drbg_status(void) - { - DRBG_CTX *dctx = &ossl_dctx; - int rv; -- CRYPTO_r_lock(CRYPTO_LOCK_RAND); -+ int locked; -+ locked = private_RAND_lock(1); - rv = dctx->status == DRBG_STATUS_READY ? 1 : 0; -- CRYPTO_r_unlock(CRYPTO_LOCK_RAND); -+ if (locked) -+ private_RAND_lock(0); - return rv; - } - - static void fips_drbg_cleanup(void) - { - DRBG_CTX *dctx = &ossl_dctx; -- CRYPTO_w_lock(CRYPTO_LOCK_RAND); -+ int locked; -+ locked = private_RAND_lock(1); - FIPS_drbg_uninstantiate(dctx); -- CRYPTO_w_unlock(CRYPTO_LOCK_RAND); -+ if (locked) -+ private_RAND_lock(0); - } - - static int fips_drbg_seed(const void *seed, int seedlen) - { - DRBG_CTX *dctx = &ossl_dctx; -+ int locked; -+ int ret = 1; -+ -+ locked = private_RAND_lock(1); - if (dctx->rand_seed_cb) -- return dctx->rand_seed_cb(dctx, seed, seedlen); -- return 1; -+ ret = dctx->rand_seed_cb(dctx, seed, seedlen); -+ if (locked) -+ private_RAND_lock(0); -+ return ret; - } - - static int fips_drbg_add(const void *seed, int seedlen, - double add_entropy) - { - DRBG_CTX *dctx = &ossl_dctx; -+ int locked; -+ int ret = 1; -+ -+ locked = private_RAND_lock(1); - if (dctx->rand_add_cb) -- return dctx->rand_add_cb(dctx, seed, seedlen, add_entropy); -- return 1; -+ ret = dctx->rand_add_cb(dctx, seed, seedlen, add_entropy); -+ if (locked) -+ private_RAND_lock(0); -+ return ret; - } - - static const RAND_METHOD rand_drbg_meth = -diff -up openssl-1.0.1e/crypto/fips/fips.h.fips-reqs openssl-1.0.1e/crypto/fips/fips.h ---- openssl-1.0.1e/crypto/fips/fips.h.fips-reqs 2013-12-18 12:17:09.000000000 +0100 -+++ openssl-1.0.1e/crypto/fips/fips.h 2013-12-18 17:13:00.928586689 +0100 +diff -up openssl-1.0.1i/crypto/fips/fips.h.fips-reqs openssl-1.0.1i/crypto/fips/fips.h +--- openssl-1.0.1i/crypto/fips/fips.h.fips-reqs 2014-08-13 19:58:06.812832439 +0200 ++++ openssl-1.0.1i/crypto/fips/fips.h 2014-08-13 19:58:06.819832600 +0200 @@ -96,6 +96,7 @@ void FIPS_corrupt_dsa_keygen(void); int FIPS_selftest_dsa(void); int FIPS_selftest_ecdsa(void); @@ -384,9 +302,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips.h.fips-reqs openssl-1.0.1e/crypto/fips/ void FIPS_corrupt_rng(void); void FIPS_rng_stick(void); void FIPS_x931_stick(int onoff); -diff -up openssl-1.0.1e/crypto/fips/fips_post.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_post.c ---- openssl-1.0.1e/crypto/fips/fips_post.c.fips-reqs 2013-12-18 12:17:09.000000000 +0100 -+++ openssl-1.0.1e/crypto/fips/fips_post.c 2013-12-18 17:12:26.721832716 +0100 +diff -up openssl-1.0.1i/crypto/fips/fips_post.c.fips-reqs openssl-1.0.1i/crypto/fips/fips_post.c +--- openssl-1.0.1i/crypto/fips/fips_post.c.fips-reqs 2014-08-13 19:58:06.809832370 +0200 ++++ openssl-1.0.1i/crypto/fips/fips_post.c 2014-08-13 19:58:06.819832600 +0200 @@ -99,6 +99,8 @@ int FIPS_selftest(void) rv = 0; if (!FIPS_selftest_dsa()) @@ -396,9 +314,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_post.c.fips-reqs openssl-1.0.1e/crypto/ if (!FIPS_selftest_ecdh()) rv = 0; return rv; -diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs 2014-03-14 14:47:18.809259727 +0100 -+++ openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c 2014-03-14 15:37:26.295687852 +0100 +diff -up openssl-1.0.1i/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1i/crypto/fips/fips_rsa_selftest.c +--- openssl-1.0.1i/crypto/fips/fips_rsa_selftest.c.fips-reqs 2014-08-13 19:58:06.779831679 +0200 ++++ openssl-1.0.1i/crypto/fips/fips_rsa_selftest.c 2014-08-13 19:59:16.491437297 +0200 @@ -60,69 +60,113 @@ #ifdef OPENSSL_FIPS @@ -557,7 +475,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e key->e = BN_bin2bn(e, sizeof(e)-1, key->e); key->d = BN_bin2bn(d, sizeof(d)-1, key->d); key->p = BN_bin2bn(p, sizeof(p)-1, key->p); -@@ -145,201 +189,391 @@ void FIPS_corrupt_rsa() +@@ -145,201 +189,291 @@ void FIPS_corrupt_rsa() static const unsigned char kat_tbs[] = "OpenSSL FIPS 140-2 Public Key RSA KAT"; static const unsigned char kat_RSA_PSS_SHA1[] = { @@ -920,7 +838,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e + 0x43, 0xA8, 0x34, 0x0A }; - static const unsigned char kat_RSA_X931_SHA1[] = { +-static const unsigned char kat_RSA_X931_SHA1[] = { - 0x86, 0xB4, 0x18, 0xBA, 0xD1, 0x80, 0xB6, 0x7C, 0x42, 0x45, 0x4D, 0xDF, - 0xE9, 0x2D, 0xE1, 0x83, 0x5F, 0xB5, 0x2F, 0xC9, 0xCD, 0xC4, 0xB2, 0x75, - 0x80, 0xA4, 0xF1, 0x4A, 0xE7, 0x83, 0x12, 0x1E, 0x1E, 0x14, 0xB8, 0xAC, @@ -932,31 +850,14 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e - 0x48, 0xAF, 0x82, 0xFE, 0x32, 0x41, 0x9B, 0xB2, 0xDB, 0xEA, 0xED, 0x76, - 0x8E, 0x6E, 0xCA, 0x7E, 0x4E, 0x14, 0xBA, 0x30, 0x84, 0x1C, 0xB3, 0x67, - 0xA3, 0x29, 0x80, 0x70, 0x54, 0x68, 0x7D, 0x49 -+ 0xB1, 0x0E, 0x4F, 0xC6, 0xE0, 0x95, 0x85, 0x7B, 0xBE, 0xDE, 0xC4, 0xE6, -+ 0x1F, 0x12, 0x2E, 0x9B, 0x3E, 0x11, 0xA3, 0xF0, 0xF0, 0xA8, 0x23, 0x1A, -+ 0x96, 0x6E, 0x99, 0xB5, 0x5F, 0x82, 0xC5, 0x87, 0x75, 0xE9, 0xD4, 0xBF, -+ 0x9F, 0xE0, 0xA4, 0xED, 0xC7, 0x01, 0x2A, 0x3F, 0x6F, 0x43, 0x1D, 0x4F, -+ 0xE8, 0x05, 0x34, 0x32, 0x20, 0x36, 0x94, 0xA0, 0x6D, 0xCC, 0xF6, 0x41, -+ 0x49, 0x56, 0x96, 0xEC, 0x9C, 0x7C, 0xD1, 0x0E, 0x9E, 0xD8, 0x1B, 0x48, -+ 0xD9, 0xDF, 0x99, 0x9F, 0x92, 0x17, 0x96, 0xA4, 0xF1, 0x87, 0x64, 0x61, -+ 0x3C, 0xAF, 0x00, 0x24, 0xB3, 0x64, 0x88, 0x8E, 0x41, 0xBF, 0x29, 0x1F, -+ 0xA3, 0x28, 0xAD, 0x21, 0x1E, 0xA3, 0x96, 0x40, 0x0A, 0x0B, 0x82, 0xCD, -+ 0x97, 0x58, 0x33, 0xB6, 0x52, 0xAC, 0xC5, 0x3B, 0x14, 0xE7, 0x1E, 0x5D, -+ 0x09, 0xC9, 0x76, 0xB5, 0x89, 0xC6, 0x9B, 0x4C, 0xC2, 0xC2, 0x31, 0x0E, -+ 0xBA, 0x1E, 0xB5, 0x11, 0xD0, 0xFD, 0xC1, 0xDA, 0x64, 0x17, 0xA8, 0xCB, -+ 0xF0, 0x94, 0xF4, 0xDD, 0x84, 0xB7, 0xEF, 0x9C, 0x13, 0x4F, 0xDD, 0x06, -+ 0x0C, 0xE4, 0xC7, 0xFD, 0x69, 0x10, 0x20, 0xD3, 0x93, 0x5E, 0xF8, 0xBA, -+ 0x21, 0xFB, 0x62, 0xC4, 0x63, 0x76, 0x43, 0xAA, 0x7E, 0x3C, 0x56, 0x5E, -+ 0xB4, 0x47, 0x3A, 0x05, 0x0D, 0xBB, 0x13, 0xC4, 0x93, 0xFB, 0x29, 0xA8, -+ 0x3E, 0x76, 0x41, 0x54, 0x9E, 0x7B, 0xE2, 0xE0, 0x07, 0x1D, 0xA7, 0x9C, -+ 0x85, 0x11, 0xB5, 0xA5, 0x88, 0x58, 0x02, 0xD8, 0xC0, 0x4B, 0x81, 0xBF, -+ 0x2B, 0x38, 0xE2, 0x2F, 0x42, 0xCA, 0x63, 0x8A, 0x0A, 0x78, 0xBA, 0x50, -+ 0xE5, 0x84, 0x35, 0xD3, 0x6A, 0x1E, 0x96, 0x0B, 0x91, 0xB1, 0x0E, 0x85, -+ 0xA8, 0x5C, 0x6E, 0x46, 0x5C, 0x61, 0x8C, 0x4F, 0x5B, 0x61, 0xB6, 0x3C, -+ 0xB7, 0x2C, 0xA5, 0x1A - }; +-}; ++static int fips_rsa_encrypt_test(RSA *rsa, const unsigned char *plaintext, int ptlen) ++ { ++ unsigned char *ctbuf = NULL, *ptbuf = NULL; ++ int ret = 0; ++ int len; - static const unsigned char kat_RSA_X931_SHA256[] = { +-static const unsigned char kat_RSA_X931_SHA256[] = { - 0x7E, 0xA2, 0x77, 0xFE, 0xB8, 0x54, 0x8A, 0xC7, 0x7F, 0x64, 0x54, 0x89, - 0xE5, 0x52, 0x15, 0x8E, 0x52, 0x96, 0x4E, 0xA6, 0x58, 0x92, 0x1C, 0xDD, - 0xEA, 0xA2, 0x2D, 0x5C, 0xD1, 0x62, 0x00, 0x49, 0x05, 0x95, 0x73, 0xCF, @@ -968,31 +869,12 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e - 0x33, 0x1D, 0x82, 0x8C, 0x03, 0xEA, 0x69, 0x88, 0x35, 0xA1, 0x42, 0xBD, - 0x21, 0xED, 0x8D, 0xBC, 0xBC, 0xDB, 0x30, 0xFF, 0x86, 0xF0, 0x5B, 0xDC, - 0xE3, 0xE2, 0xE8, 0x0A, 0x0A, 0x29, 0x94, 0x80 -+ 0xC6, 0x6C, 0x01, 0x7F, 0xB6, 0x8C, 0xD4, 0x61, 0x83, 0xC5, 0xBC, 0x75, -+ 0x39, 0x22, 0xDD, 0x17, 0x5B, 0x95, 0x4B, 0x4C, 0x46, 0x39, 0x37, 0xA7, -+ 0x54, 0x6C, 0x49, 0x5A, 0x67, 0x90, 0x47, 0xF6, 0x59, 0xAE, 0xFC, 0xDD, -+ 0xDF, 0xDB, 0xC7, 0x91, 0xB9, 0xB6, 0xCE, 0xD8, 0xFA, 0x30, 0x01, 0x9F, -+ 0xCA, 0xE5, 0x4A, 0x51, 0xB7, 0xBE, 0xBD, 0x4E, 0x56, 0x25, 0x0B, 0x49, -+ 0xE0, 0x46, 0xBB, 0x81, 0x0E, 0x14, 0x47, 0xFF, 0xCB, 0xBB, 0xA1, 0x6D, -+ 0x44, 0x9B, 0xF7, 0xEE, 0x81, 0xEB, 0xF6, 0x62, 0xEA, 0x0D, 0x76, 0x76, -+ 0x4E, 0x25, 0xD7, 0x9A, 0x2B, 0xB1, 0x92, 0xED, 0x5C, 0x7F, 0x9D, 0x99, -+ 0x07, 0x9E, 0xBF, 0x62, 0x83, 0x12, 0x61, 0x99, 0x3E, 0xF5, 0x6A, 0x4C, -+ 0x58, 0xB0, 0x2A, 0x15, 0x1C, 0xA0, 0xD2, 0x91, 0x87, 0x9C, 0x7D, 0x4F, -+ 0xEF, 0x3B, 0x0F, 0x60, 0xD7, 0x1E, 0xEF, 0x7C, 0xBE, 0x68, 0x95, 0xE6, -+ 0xBA, 0xFA, 0xF6, 0xD1, 0x67, 0x3D, 0x9D, 0x39, 0xAE, 0xC2, 0x85, 0xD2, -+ 0xDE, 0xA5, 0x85, 0x1E, 0x4D, 0x2B, 0x2C, 0x06, 0x44, 0x98, 0x17, 0x46, -+ 0x89, 0x41, 0x13, 0xFC, 0x99, 0xD6, 0x6C, 0xCF, 0x26, 0xA2, 0x77, 0x8A, -+ 0x3F, 0x10, 0xF8, 0xC5, 0xC9, 0x4A, 0xB6, 0x93, 0xF5, 0x38, 0x89, 0xBD, -+ 0xFF, 0xAE, 0x42, 0x06, 0x2D, 0xCD, 0x1B, 0x3D, 0x5A, 0xCD, 0xF2, 0x8A, -+ 0x65, 0xA4, 0xB7, 0xB6, 0xF6, 0x5B, 0xE8, 0xA4, 0x68, 0xB4, 0x27, 0xDA, -+ 0xF1, 0x59, 0x37, 0x24, 0x18, 0xB5, 0x5B, 0x15, 0x62, 0x64, 0x6F, 0x78, -+ 0xBB, 0x17, 0x94, 0x42, 0xAD, 0xB3, 0x0D, 0x18, 0xB0, 0x1B, 0x28, 0x29, -+ 0x3B, 0x15, 0xBF, 0xD1, 0xC8, 0x28, 0x4F, 0xDF, 0x7F, 0x34, 0x49, 0x2A, -+ 0x44, 0xD5, 0x4C, 0x59, 0x90, 0x83, 0x8D, 0xFC, 0x58, 0x7E, 0xEC, 0x4B, -+ 0x54, 0xF0, 0xB5, 0xBD - }; +-}; ++ ctbuf = OPENSSL_malloc(RSA_size(rsa)); ++ if (!ctbuf) ++ goto err; - static const unsigned char kat_RSA_X931_SHA384[] = { +-static const unsigned char kat_RSA_X931_SHA384[] = { - 0x5C, 0x7D, 0x96, 0x35, 0xEC, 0x7E, 0x11, 0x38, 0xBB, 0x7B, 0xEC, 0x7B, - 0xF2, 0x82, 0x8E, 0x99, 0xBD, 0xEF, 0xD8, 0xAE, 0xD7, 0x39, 0x37, 0xCB, - 0xE6, 0x4F, 0x5E, 0x0A, 0x13, 0xE4, 0x2E, 0x40, 0xB9, 0xBE, 0x2E, 0xE3, @@ -1004,31 +886,15 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e - 0x0E, 0x09, 0xEE, 0x2E, 0xE2, 0x37, 0xB9, 0xDE, 0xC5, 0x12, 0x44, 0x35, - 0xEF, 0x01, 0xE6, 0x5E, 0x39, 0x31, 0x2D, 0x71, 0xA5, 0xDC, 0xC6, 0x6D, - 0xE2, 0xCD, 0x85, 0xDB, 0x73, 0x82, 0x65, 0x28 -+ 0x88, 0x85, 0xE1, 0xC1, 0xE2, 0xE5, 0x0B, 0x6C, 0x03, 0x27, 0xAC, 0xC8, -+ 0x3A, 0x72, 0xB4, 0x9A, 0xF3, 0xAE, 0x9C, 0x88, 0x8C, 0xBE, 0x28, 0x0D, -+ 0x89, 0x5F, 0x06, 0x0F, 0x5F, 0x08, 0xE3, 0x9C, 0xF9, 0x28, 0x4F, 0xBB, -+ 0x24, 0xDD, 0x21, 0x4C, 0x44, 0x96, 0x50, 0xB5, 0xD4, 0x8E, 0x13, 0x60, -+ 0x7C, 0xCB, 0xD9, 0x5E, 0x7C, 0xB6, 0xAD, 0xA5, 0x6A, 0x41, 0x04, 0xA7, -+ 0x8E, 0xF0, 0x39, 0x08, 0x7E, 0x18, 0x91, 0xF9, 0x46, 0x97, 0xEF, 0xF2, -+ 0x14, 0xB2, 0x01, 0xFD, 0xB2, 0x2B, 0x3A, 0xF8, 0x4A, 0x59, 0xD1, 0x36, -+ 0x1A, 0x7D, 0x2D, 0xB9, 0xC6, 0x7F, 0xDE, 0x62, 0xB6, 0x56, 0xBA, 0xFA, -+ 0x5A, 0xA1, 0x5B, 0x8C, 0x5F, 0x98, 0xEC, 0xF8, 0x93, 0x13, 0x11, 0x42, -+ 0xEE, 0xC4, 0x6C, 0x4A, 0x87, 0x4E, 0x98, 0x22, 0xB6, 0xBB, 0xB0, 0x3A, -+ 0x70, 0xA9, 0xCC, 0xBC, 0x31, 0x27, 0xE7, 0xBC, 0xCA, 0xEC, 0x52, 0x81, -+ 0x76, 0x9A, 0x3F, 0x18, 0xC1, 0x1C, 0x4A, 0xC7, 0x56, 0xE3, 0xF0, 0x6F, -+ 0x36, 0xBB, 0x9B, 0xF9, 0x43, 0x90, 0xBE, 0x79, 0x59, 0x63, 0x1C, 0xFE, -+ 0xB6, 0x46, 0x8B, 0xBA, 0xBD, 0xAA, 0x28, 0x71, 0x9B, 0xD6, 0xDD, 0x05, -+ 0x00, 0x3B, 0xBC, 0x2D, 0x48, 0xE7, 0x6E, 0x6E, 0x42, 0x95, 0x27, 0xAE, -+ 0x93, 0x92, 0x6D, 0x59, 0x47, 0x10, 0x59, 0xAC, 0xDD, 0x95, 0x29, 0xC3, -+ 0x1B, 0x86, 0x67, 0x12, 0x98, 0x48, 0x10, 0xA6, 0x90, 0xA3, 0x59, 0x9D, -+ 0x10, 0x4E, 0xEA, 0xD8, 0xCB, 0xE3, 0x81, 0xBA, 0xA1, 0x52, 0x55, 0x78, -+ 0xFF, 0x95, 0x40, 0xE0, 0xAE, 0x93, 0x38, 0x5D, 0x21, 0x13, 0x8A, 0xFC, -+ 0x72, 0xC7, 0xFB, 0x70, 0x1C, 0xEE, 0x5D, 0xB0, 0xE5, 0xFA, 0x44, 0x86, -+ 0x67, 0x97, 0x66, 0x64, 0xA4, 0x1E, 0xF8, 0x3A, 0x16, 0xF8, 0xC9, 0xE0, -+ 0x09, 0xF3, 0x61, 0x4F - }; +-}; ++ len = RSA_public_encrypt(ptlen, plaintext, ctbuf, rsa, RSA_PKCS1_PADDING); ++ if (len <= 0) ++ goto err; ++ /* Check ciphertext doesn't match plaintext */ ++ if (len >= ptlen && !memcmp(plaintext, ctbuf, ptlen)) ++ goto err; - static const unsigned char kat_RSA_X931_SHA512[] = { +-static const unsigned char kat_RSA_X931_SHA512[] = { - 0xA6, 0x65, 0xA2, 0x77, 0x4F, 0xB3, 0x86, 0xCB, 0x64, 0x3A, 0xC1, 0x63, - 0xFC, 0xA1, 0xAA, 0xCB, 0x9B, 0x79, 0xDD, 0x4B, 0xE1, 0xD9, 0xDA, 0xAC, - 0xE7, 0x47, 0x09, 0xB2, 0x11, 0x4B, 0x8A, 0xAA, 0x05, 0x9E, 0x77, 0xD7, @@ -1040,47 +906,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e - 0x9F, 0x09, 0xCA, 0x84, 0x15, 0x85, 0xE0, 0xED, 0x04, 0x2D, 0xFB, 0x7C, - 0x36, 0x35, 0x21, 0x31, 0xC3, 0xFD, 0x92, 0x42, 0x11, 0x30, 0x71, 0x1B, - 0x60, 0x83, 0x18, 0x88, 0xA3, 0xF5, 0x59, 0xC3 -+ 0xC9, 0x2B, 0x6D, 0x50, 0xBB, 0xD8, 0x0B, 0x35, 0xE8, 0x78, 0xF5, 0xFC, -+ 0xBB, 0x6A, 0xB4, 0x32, 0x63, 0x9C, 0x75, 0x19, 0x1D, 0xFB, 0x68, 0xC0, -+ 0xFC, 0x34, 0xCE, 0x09, 0xFD, 0xF4, 0x33, 0x42, 0x70, 0x24, 0x57, 0xBC, -+ 0xB3, 0xBD, 0x24, 0x33, 0x9E, 0x4B, 0x00, 0xCE, 0x15, 0xB3, 0x27, 0xC6, -+ 0x39, 0x7C, 0xC1, 0x28, 0x75, 0xFE, 0x7B, 0x76, 0x4F, 0xFB, 0x60, 0xA0, -+ 0x30, 0xBF, 0x74, 0x2C, 0x9D, 0xE4, 0xC8, 0x03, 0xA8, 0xDE, 0xB9, 0x2A, -+ 0xD9, 0x23, 0x24, 0xDC, 0xEE, 0xF0, 0xC1, 0x8B, 0x4D, 0x12, 0x4A, 0x41, -+ 0x33, 0x3B, 0x23, 0xFE, 0xDD, 0xE9, 0xE8, 0x55, 0x2B, 0x3E, 0xA4, 0x1B, -+ 0x95, 0x21, 0x2A, 0xEF, 0x84, 0x2E, 0x13, 0x3D, 0x97, 0x7C, 0x08, 0x86, -+ 0xB1, 0x60, 0xA4, 0xB9, 0xC4, 0x5A, 0x5B, 0x2D, 0x3F, 0xD7, 0x0D, 0xB2, -+ 0x41, 0x72, 0x7A, 0x7F, 0xA3, 0x12, 0xB0, 0xAD, 0x80, 0x2E, 0xD6, 0xD3, -+ 0x8A, 0x71, 0x72, 0x67, 0x94, 0x6F, 0x51, 0x05, 0x39, 0xFD, 0xBE, 0x91, -+ 0xDE, 0x1D, 0x65, 0xE4, 0xA7, 0xA6, 0x0F, 0xA5, 0x08, 0x1F, 0xFC, 0x53, -+ 0x48, 0x7B, 0xB8, 0xCE, 0x79, 0xDA, 0xDC, 0x18, 0xD1, 0xD3, 0x8A, 0x73, -+ 0xCE, 0x5A, 0x62, 0x1E, 0x33, 0xD0, 0x21, 0x9C, 0xF9, 0xDE, 0x9E, 0x7E, -+ 0x4D, 0x0E, 0x24, 0x30, 0x94, 0xB8, 0xDC, 0x8B, 0x57, 0x7E, 0x3B, 0xC6, -+ 0xD7, 0x0F, 0xFC, 0xA6, 0x1F, 0xEB, 0xAF, 0x19, 0xD0, 0xFF, 0x3D, 0x63, -+ 0x03, 0x1D, 0xAB, 0x11, 0x0C, 0xAD, 0x45, 0x46, 0x67, 0x76, 0xC8, 0x26, -+ 0xD4, 0xD4, 0x70, 0x1F, 0xDF, 0xEB, 0xE5, 0x7D, 0x75, 0xD8, 0x3B, 0x52, -+ 0x6C, 0xE7, 0x23, 0xCB, 0xB9, 0x1B, 0xA4, 0x2E, 0x5B, 0xEC, 0xB4, 0xB6, -+ 0xB6, 0x2D, 0x0B, 0x60, 0xE3, 0x7B, 0x05, 0xE8, 0x1E, 0xAD, 0xC7, 0xE7, -+ 0xBE, 0xF4, 0x71, 0xAE - }; - -+static int fips_rsa_encrypt_test(RSA *rsa, const unsigned char *plaintext, int ptlen) -+ { -+ unsigned char *ctbuf = NULL, *ptbuf = NULL; -+ int ret = 0; -+ int len; -+ -+ ctbuf = OPENSSL_malloc(RSA_size(rsa)); -+ if (!ctbuf) -+ goto err; -+ -+ len = RSA_public_encrypt(ptlen, plaintext, ctbuf, rsa, RSA_PKCS1_PADDING); -+ if (len <= 0) -+ goto err; -+ /* Check ciphertext doesn't match plaintext */ -+ if (len >= ptlen && !memcmp(plaintext, ctbuf, ptlen)) -+ goto err; -+ +-}; + ptbuf = OPENSSL_malloc(RSA_size(rsa)); + if (!ptbuf) + goto err; @@ -1092,7 +918,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e + goto err; + + ret = 1; -+ + + err: + if (ctbuf) + OPENSSL_free(ctbuf); @@ -1103,7 +929,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e int FIPS_selftest_rsa() { -@@ -353,7 +587,7 @@ int FIPS_selftest_rsa() +@@ -353,7 +487,7 @@ int FIPS_selftest_rsa() if ((pk=EVP_PKEY_new()) == NULL) goto err; @@ -1112,13 +938,35 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1, kat_RSA_SHA1, sizeof(kat_RSA_SHA1), -@@ -430,13 +664,15 @@ int FIPS_selftest_rsa() - "RSA SHA512 X931")) +@@ -407,36 +541,15 @@ int FIPS_selftest_rsa() + "RSA SHA512 PSS")) goto err; +- +- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1, +- kat_RSA_X931_SHA1, sizeof(kat_RSA_X931_SHA1), +- EVP_sha1(), EVP_MD_CTX_FLAG_PAD_X931, +- "RSA SHA1 X931")) +- goto err; +- /* NB: SHA224 not supported in X9.31 */ +- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1, +- kat_RSA_X931_SHA256, sizeof(kat_RSA_X931_SHA256), +- EVP_sha256(), EVP_MD_CTX_FLAG_PAD_X931, +- "RSA SHA256 X931")) +- goto err; +- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1, +- kat_RSA_X931_SHA384, sizeof(kat_RSA_X931_SHA384), +- EVP_sha384(), EVP_MD_CTX_FLAG_PAD_X931, +- "RSA SHA384 X931")) +- goto err; +- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1, +- kat_RSA_X931_SHA512, sizeof(kat_RSA_X931_SHA512), +- EVP_sha512(), EVP_MD_CTX_FLAG_PAD_X931, +- "RSA SHA512 X931")) + if (!fips_rsa_encrypt_test(key, kat_tbs, sizeof(kat_tbs) - 1)) -+ goto err; + goto err; +- ret = 1; err: @@ -1129,9 +977,9 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e RSA_free(key); return ret; } -diff -up openssl-1.0.1e/crypto/fips/Makefile.fips-reqs openssl-1.0.1e/crypto/fips/Makefile ---- openssl-1.0.1e/crypto/fips/Makefile.fips-reqs 2013-12-18 12:17:20.000000000 +0100 -+++ openssl-1.0.1e/crypto/fips/Makefile 2013-12-18 17:14:20.348337362 +0100 +diff -up openssl-1.0.1i/crypto/fips/Makefile.fips-reqs openssl-1.0.1i/crypto/fips/Makefile +--- openssl-1.0.1i/crypto/fips/Makefile.fips-reqs 2014-08-13 19:58:06.809832370 +0200 ++++ openssl-1.0.1i/crypto/fips/Makefile 2014-08-13 19:58:06.820832624 +0200 @@ -24,13 +24,15 @@ LIBSRC=fips_aes_selftest.c fips_des_self fips_rsa_selftest.c fips_sha_selftest.c fips.c fips_dsa_selftest.c fips_rand.c \ fips_rsa_x931g.c fips_post.c fips_drbg_ctr.c fips_drbg_hash.c fips_drbg_hmac.c \ @@ -1150,10 +998,10 @@ diff -up openssl-1.0.1e/crypto/fips/Makefile.fips-reqs openssl-1.0.1e/crypto/fip LIBCRYPTO=-L.. -lcrypto -diff -up openssl-1.0.1e/crypto/modes/gcm128.c.fips-reqs openssl-1.0.1e/crypto/modes/gcm128.c ---- openssl-1.0.1e/crypto/modes/gcm128.c.fips-reqs 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/modes/gcm128.c 2013-12-18 12:17:09.800637730 +0100 -@@ -898,6 +898,10 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT +diff -up openssl-1.0.1i/crypto/modes/gcm128.c.fips-reqs openssl-1.0.1i/crypto/modes/gcm128.c +--- openssl-1.0.1i/crypto/modes/gcm128.c.fips-reqs 2014-08-13 19:58:06.740830781 +0200 ++++ openssl-1.0.1i/crypto/modes/gcm128.c 2014-08-13 19:58:06.820832624 +0200 +@@ -931,6 +931,10 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT # endif #endif @@ -1164,7 +1012,7 @@ diff -up openssl-1.0.1e/crypto/modes/gcm128.c.fips-reqs openssl-1.0.1e/crypto/mo #if 0 n = (unsigned int)mlen%16; /* alternative to ctx->mres */ #endif -@@ -1200,6 +1204,10 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_C +@@ -1294,6 +1298,10 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_C # endif #endif @@ -1175,10 +1023,10 @@ diff -up openssl-1.0.1e/crypto/modes/gcm128.c.fips-reqs openssl-1.0.1e/crypto/mo mlen += len; if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen 0); - -- if (!do_not_lock) CRYPTO_w_unlock(CRYPTO_LOCK_RAND); -+ if (locked) -+ private_RAND_lock(0); - - EVP_MD_CTX_init(&m); - for (i=0; i 0) - { -@@ -515,10 +494,11 @@ static int ssleay_rand_bytes(unsigned ch - MD_Init(&m); - MD_Update(&m,(unsigned char *)&(md_c[0]),sizeof(md_c)); - MD_Update(&m,local_md,MD_DIGEST_LENGTH); -- CRYPTO_w_lock(CRYPTO_LOCK_RAND); -+ locked = private_RAND_lock(1); - MD_Update(&m,md,MD_DIGEST_LENGTH); - MD_Final(&m,md); -- CRYPTO_w_unlock(CRYPTO_LOCK_RAND); -+ if (locked) -+ private_RAND_lock(0); - - EVP_MD_CTX_cleanup(&m); - if (ok) -@@ -548,32 +528,10 @@ static int ssleay_rand_pseudo_bytes(unsi - - static int ssleay_rand_status(void) - { -- CRYPTO_THREADID cur; - int ret; -- int do_not_lock; -+ int locked; - -- CRYPTO_THREADID_current(&cur); -- /* check if we already have the lock -- * (could happen if a RAND_poll() implementation calls RAND_status()) */ -- if (crypto_lock_rand) -- { -- CRYPTO_r_lock(CRYPTO_LOCK_RAND2); -- do_not_lock = !CRYPTO_THREADID_cmp(&locking_threadid, &cur); -- CRYPTO_r_unlock(CRYPTO_LOCK_RAND2); -- } -- else -- do_not_lock = 0; -- -- if (!do_not_lock) -- { -- CRYPTO_w_lock(CRYPTO_LOCK_RAND); -- -- /* prevent ssleay_rand_bytes() from trying to obtain the lock again */ -- CRYPTO_w_lock(CRYPTO_LOCK_RAND2); -- CRYPTO_THREADID_cpy(&locking_threadid, &cur); -- CRYPTO_w_unlock(CRYPTO_LOCK_RAND2); -- crypto_lock_rand = 1; -- } -+ locked = private_RAND_lock(1); - - if (!initialized) - { -@@ -583,13 +541,8 @@ static int ssleay_rand_status(void) - - ret = entropy >= ENTROPY_NEEDED; - -- if (!do_not_lock) -- { -- /* before unlocking, we must clear 'crypto_lock_rand' */ -- crypto_lock_rand = 0; -- -- CRYPTO_w_unlock(CRYPTO_LOCK_RAND); -- } -+ if (locked) -+ private_RAND_lock(0); - - return ret; - } -diff -up openssl-1.0.1e/crypto/rand/rand.h.fips-reqs openssl-1.0.1e/crypto/rand/rand.h ---- openssl-1.0.1e/crypto/rand/rand.h.fips-reqs 2013-12-18 12:17:09.764636958 +0100 -+++ openssl-1.0.1e/crypto/rand/rand.h 2013-12-18 12:17:09.800637730 +0100 -@@ -124,6 +124,8 @@ void RAND_set_fips_drbg_type(int type, i - int RAND_init_fips(void); - #endif - -+int private_RAND_lock(int lock); -+ - /* BEGIN ERROR CODES */ - /* The following lines are auto generated by the script mkerr.pl. Any changes - * made after this point may be overwritten when the script is next run. -diff -up openssl-1.0.1e/crypto/rand/rand_lcl.h.fips-reqs openssl-1.0.1e/crypto/rand/rand_lcl.h ---- openssl-1.0.1e/crypto/rand/rand_lcl.h.fips-reqs 2013-12-18 12:17:09.507631447 +0100 -+++ openssl-1.0.1e/crypto/rand/rand_lcl.h 2013-12-18 12:17:09.800637730 +0100 +diff -up openssl-1.0.1i/crypto/rand/rand_lcl.h.fips-reqs openssl-1.0.1i/crypto/rand/rand_lcl.h +--- openssl-1.0.1i/crypto/rand/rand_lcl.h.fips-reqs 2014-08-13 19:58:06.525825829 +0200 ++++ openssl-1.0.1i/crypto/rand/rand_lcl.h 2014-08-13 19:58:06.820832624 +0200 @@ -112,7 +112,7 @@ #ifndef HEADER_RAND_LCL_H #define HEADER_RAND_LCL_H @@ -1385,57 +1046,19 @@ diff -up openssl-1.0.1e/crypto/rand/rand_lcl.h.fips-reqs openssl-1.0.1e/crypto/r #if !defined(USE_MD5_RAND) && !defined(USE_SHA1_RAND) && !defined(USE_MDC2_RAND) && !defined(USE_MD2_RAND) -diff -up openssl-1.0.1e/crypto/rand/rand_lib.c.fips-reqs openssl-1.0.1e/crypto/rand/rand_lib.c ---- openssl-1.0.1e/crypto/rand/rand_lib.c.fips-reqs 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/crypto/rand/rand_lib.c 2013-12-18 18:16:45.625850730 +0100 -@@ -181,6 +181,41 @@ int RAND_status(void) - return 0; - } - -+int private_RAND_lock(int lock) -+ { -+ static int crypto_lock_rand; -+ static CRYPTO_THREADID locking_threadid; -+ int do_lock; -+ -+ if (!lock) -+ { -+ crypto_lock_rand = 0; -+ CRYPTO_w_unlock(CRYPTO_LOCK_RAND); -+ return 0; -+ } -+ -+ /* check if we already have the lock */ -+ if (crypto_lock_rand) -+ { -+ CRYPTO_THREADID cur; -+ CRYPTO_THREADID_current(&cur); -+ CRYPTO_r_lock(CRYPTO_LOCK_RAND2); -+ do_lock = !!CRYPTO_THREADID_cmp(&locking_threadid, &cur); -+ CRYPTO_r_unlock(CRYPTO_LOCK_RAND2); -+ } -+ else -+ do_lock = 1; -+ if (do_lock) -+ { -+ CRYPTO_w_lock(CRYPTO_LOCK_RAND); -+ crypto_lock_rand = 1; -+ CRYPTO_w_lock(CRYPTO_LOCK_RAND2); -+ CRYPTO_THREADID_current(&locking_threadid); -+ CRYPTO_w_unlock(CRYPTO_LOCK_RAND2); -+ } -+ return do_lock; -+ } -+ - #ifdef OPENSSL_FIPS - - /* FIPS DRBG initialisation code. This sets up the DRBG for use by the -@@ -239,12 +274,16 @@ static int drbg_rand_add(DRBG_CTX *ctx, +diff -up openssl-1.0.1i/crypto/rand/rand_lib.c.fips-reqs openssl-1.0.1i/crypto/rand/rand_lib.c +--- openssl-1.0.1i/crypto/rand/rand_lib.c.fips-reqs 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/rand/rand_lib.c 2014-08-13 19:58:06.820832624 +0200 +@@ -240,12 +240,24 @@ static int drbg_rand_add(DRBG_CTX *ctx, double entropy) { RAND_SSLeay()->add(in, inlen, entropy); + if (FIPS_rand_status()) ++ { ++ CRYPTO_w_lock(CRYPTO_LOCK_RAND); + FIPS_drbg_reseed(ctx, NULL, 0); ++ CRYPTO_w_unlock(CRYPTO_LOCK_RAND); ++ } return 1; } @@ -1443,13 +1066,17 @@ diff -up openssl-1.0.1e/crypto/rand/rand_lib.c.fips-reqs openssl-1.0.1e/crypto/r { RAND_SSLeay()->seed(in, inlen); + if (FIPS_rand_status()) ++ { ++ CRYPTO_w_lock(CRYPTO_LOCK_RAND); + FIPS_drbg_reseed(ctx, NULL, 0); ++ CRYPTO_w_unlock(CRYPTO_LOCK_RAND); ++ } return 1; } -diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa/rsa_gen.c ---- openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs 2013-12-18 12:17:09.764636958 +0100 -+++ openssl-1.0.1e/crypto/rsa/rsa_gen.c 2013-12-19 17:40:58.483154314 +0100 +diff -up openssl-1.0.1i/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1i/crypto/rsa/rsa_gen.c +--- openssl-1.0.1i/crypto/rsa/rsa_gen.c.fips-reqs 2014-08-13 19:58:06.782831748 +0200 ++++ openssl-1.0.1i/crypto/rsa/rsa_gen.c 2014-08-13 19:58:06.821832646 +0200 @@ -1,5 +1,6 @@ /* crypto/rsa/rsa_gen.c */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) @@ -1480,7 +1107,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa + return 0; + } + -+ if (bits != 2048 && bits != 3072) ++ if ((pbits & 0xFF) || (getenv("OPENSSL_ENFORCE_MODULUS_BITS") && bits != 2048 && bits != 3072)) + { + FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN, FIPS_R_INVALID_KEY_LENGTH); + return 0; @@ -1680,7 +1307,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa static int rsa_builtin_keygen(RSA *rsa, int bits, BIGNUM *e_value, BN_GENCB *cb) { BIGNUM *r0=NULL,*r1=NULL,*r2=NULL,*r3=NULL,*tmp; -@@ -176,17 +393,7 @@ static int rsa_builtin_keygen(RSA *rsa, +@@ -176,17 +393,12 @@ static int rsa_builtin_keygen(RSA *rsa, #ifdef OPENSSL_FIPS if (FIPS_module_mode()) { @@ -1690,16 +1317,16 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa - return 0; - } - -- if (bits < OPENSSL_RSA_FIPS_MIN_MODULUS_BITS) -- { -- FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN,FIPS_R_KEY_TOO_SHORT); -- return 0; -- } + if (bits < OPENSSL_RSA_FIPS_MIN_MODULUS_BITS) + { + FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN,FIPS_R_KEY_TOO_SHORT); + return 0; + } + return FIPS_rsa_builtin_keygen(rsa, bits, e_value, cb); } #endif -@@ -301,17 +508,6 @@ static int rsa_builtin_keygen(RSA *rsa, +@@ -301,17 +513,6 @@ static int rsa_builtin_keygen(RSA *rsa, p = rsa->p; if (!BN_mod_inverse(rsa->iqmp,rsa->q,p,ctx)) goto err; @@ -1717,9 +1344,9 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa ok=1; err: if (ok == -1) -diff -up openssl-1.0.1e/ssl/t1_enc.c.fips-reqs openssl-1.0.1e/ssl/t1_enc.c ---- openssl-1.0.1e/ssl/t1_enc.c.fips-reqs 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/t1_enc.c 2013-12-18 12:17:09.801637751 +0100 +diff -up openssl-1.0.1i/ssl/t1_enc.c.fips-reqs openssl-1.0.1i/ssl/t1_enc.c +--- openssl-1.0.1i/ssl/t1_enc.c.fips-reqs 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/ssl/t1_enc.c 2014-08-13 19:58:06.821832646 +0200 @@ -291,6 +291,27 @@ static int tls1_PRF(long digest_mask, err: return ret; diff --git a/openssl-1.0.1i-ppc-asm-update.patch b/openssl-1.0.1i-ppc-asm-update.patch new file mode 100644 index 0000000..cbf220c --- /dev/null +++ b/openssl-1.0.1i-ppc-asm-update.patch @@ -0,0 +1,6636 @@ +diff -up openssl-1.0.1i/config.ppc-asm openssl-1.0.1i/config +--- openssl-1.0.1i/config.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/config 2014-08-13 19:46:21.092578104 +0200 +@@ -587,13 +587,20 @@ case "$GUESSOS" in + fi + ;; + ppc64-*-linux2) +- echo "WARNING! If you wish to build 64-bit library, then you have to" +- echo " invoke './Configure linux-ppc64' *manually*." +- if [ "$TEST" = "false" -a -t 1 ]; then +- echo " You have about 5 seconds to press Ctrl-C to abort." +- (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1 ++ if [ -z "$KERNEL_BITS" ]; then ++ echo "WARNING! If you wish to build 64-bit library, then you have to" ++ echo " invoke './Configure linux-ppc64' *manually*." ++ if [ "$TEST" = "false" -a -t 1 ]; then ++ echo " You have about 5 seconds to press Ctrl-C to abort." ++ (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1 ++ fi ++ fi ++ if [ "$KERNEL_BITS" = "64" ]; then ++ OUT="linux-ppc64" ++ else ++ OUT="linux-ppc" ++ (echo "__LP64__" | gcc -E -x c - 2>/dev/null | grep "^__LP64__" 2>&1 > /dev/null) || options="$options -m32" + fi +- OUT="linux-ppc" + ;; + ppc-*-linux2) OUT="linux-ppc" ;; + ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;; +diff -up openssl-1.0.1i/Configure.ppc-asm openssl-1.0.1i/Configure +--- openssl-1.0.1i/Configure.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/Configure 2014-08-13 19:46:21.092578104 +0200 +@@ -139,8 +139,8 @@ my $s390x_asm="s390xcap.o s390xcpuid.o:b + my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void"; + my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; + my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; +-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; +-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; ++my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:"; ++my $ppc32_asm=$ppc64_asm; + my $no_asm=":::::::::::::::void"; + + # As for $BSDthreads. Idea is to maintain "collective" set of flags, +@@ -357,6 +357,7 @@ my %table=( + #### + "linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", ++"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::", + "linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +@@ -462,8 +463,8 @@ my %table=( + + #### IBM's AIX. + "aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::", +-"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32", +-"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:${ppc64_asm}:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64", ++"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:$ppc32_asm:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32", ++"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:$ppc64_asm:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64", + # Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE + # at build time. $OBJECT_MODE is respected at ./config stage! + "aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst::-qthreaded -D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-q32 -G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32", +@@ -1526,7 +1527,7 @@ else { + $wp_obj="wp_block.o"; + } + $cmll_obj=$cmll_enc unless ($cmll_obj =~ /.o$/); +-if ($modes_obj =~ /ghash/) ++if ($modes_obj =~ /ghash\-/) + { + $cflags.=" -DGHASH_ASM"; + } +diff -up openssl-1.0.1i/crypto/aes/asm/aes-ppc.pl.ppc-asm openssl-1.0.1i/crypto/aes/asm/aes-ppc.pl +--- openssl-1.0.1i/crypto/aes/asm/aes-ppc.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/aes/asm/aes-ppc.pl 2014-08-13 19:46:21.092578104 +0200 +@@ -45,6 +45,8 @@ if ($flavour =~ /64/) { + $PUSH ="stw"; + } else { die "nonsense $flavour"; } + ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +@@ -68,7 +70,7 @@ $key="r5"; + $Tbl0="r3"; + $Tbl1="r6"; + $Tbl2="r7"; +-$Tbl3="r2"; ++$Tbl3=$out; # stay away from "r2"; $out is offloaded to stack + + $s0="r8"; + $s1="r9"; +@@ -76,7 +78,7 @@ $s2="r10"; + $s3="r11"; + + $t0="r12"; +-$t1="r13"; ++$t1="r0"; # stay away from "r13"; + $t2="r14"; + $t3="r15"; + +@@ -100,9 +102,6 @@ $acc13="r29"; + $acc14="r30"; + $acc15="r31"; + +-# stay away from TLS pointer +-if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; } +-else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; } + $mask80=$Tbl2; + $mask1b=$Tbl3; + +@@ -337,8 +336,7 @@ $code.=<<___; + $STU $sp,-$FRAME($sp) + mflr r0 + +- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) +- $PUSH r13,`$FRAME-$SIZE_T*19`($sp) ++ $PUSH $out,`$FRAME-$SIZE_T*19`($sp) + $PUSH r14,`$FRAME-$SIZE_T*18`($sp) + $PUSH r15,`$FRAME-$SIZE_T*17`($sp) + $PUSH r16,`$FRAME-$SIZE_T*16`($sp) +@@ -365,16 +363,61 @@ $code.=<<___; + bne Lenc_unaligned + + Lenc_unaligned_ok: ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + lwz $s0,0($inp) + lwz $s1,4($inp) + lwz $s2,8($inp) + lwz $s3,12($inp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $t0,0($inp) ++ lwz $t1,4($inp) ++ lwz $t2,8($inp) ++ lwz $t3,12($inp) ++ rotlwi $s0,$t0,8 ++ rotlwi $s1,$t1,8 ++ rotlwi $s2,$t2,8 ++ rotlwi $s3,$t3,8 ++ rlwimi $s0,$t0,24,0,7 ++ rlwimi $s1,$t1,24,0,7 ++ rlwimi $s2,$t2,24,0,7 ++ rlwimi $s3,$t3,24,0,7 ++ rlwimi $s0,$t0,24,16,23 ++ rlwimi $s1,$t1,24,16,23 ++ rlwimi $s2,$t2,24,16,23 ++ rlwimi $s3,$t3,24,16,23 ++___ ++$code.=<<___; + bl LAES_Te + bl Lppc_AES_encrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ rotlwi $t0,$s0,8 ++ rotlwi $t1,$s1,8 ++ rotlwi $t2,$s2,8 ++ rotlwi $t3,$s3,8 ++ rlwimi $t0,$s0,24,0,7 ++ rlwimi $t1,$s1,24,0,7 ++ rlwimi $t2,$s2,24,0,7 ++ rlwimi $t3,$s3,24,0,7 ++ rlwimi $t0,$s0,24,16,23 ++ rlwimi $t1,$s1,24,16,23 ++ rlwimi $t2,$s2,24,16,23 ++ rlwimi $t3,$s3,24,16,23 ++ stw $t0,0($out) ++ stw $t1,4($out) ++ stw $t2,8($out) ++ stw $t3,12($out) ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + stw $s0,0($out) + stw $s1,4($out) + stw $s2,8($out) + stw $s3,12($out) ++___ ++$code.=<<___; + b Lenc_done + + Lenc_unaligned: +@@ -417,6 +460,7 @@ Lenc_xpage: + + bl LAES_Te + bl Lppc_AES_encrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) + + extrwi $acc00,$s0,8,0 + extrwi $acc01,$s0,8,8 +@@ -449,8 +493,6 @@ Lenc_xpage: + + Lenc_done: + $POP r0,`$FRAME+$LRSAVE`($sp) +- $POP $toc,`$FRAME-$SIZE_T*20`($sp) +- $POP r13,`$FRAME-$SIZE_T*19`($sp) + $POP r14,`$FRAME-$SIZE_T*18`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) +@@ -764,6 +806,7 @@ Lenc_compact_done: + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .AES_encrypt,.-.AES_encrypt + + .globl .AES_decrypt + .align 7 +@@ -771,8 +814,7 @@ Lenc_compact_done: + $STU $sp,-$FRAME($sp) + mflr r0 + +- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) +- $PUSH r13,`$FRAME-$SIZE_T*19`($sp) ++ $PUSH $out,`$FRAME-$SIZE_T*19`($sp) + $PUSH r14,`$FRAME-$SIZE_T*18`($sp) + $PUSH r15,`$FRAME-$SIZE_T*17`($sp) + $PUSH r16,`$FRAME-$SIZE_T*16`($sp) +@@ -799,16 +841,61 @@ Lenc_compact_done: + bne Ldec_unaligned + + Ldec_unaligned_ok: ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + lwz $s0,0($inp) + lwz $s1,4($inp) + lwz $s2,8($inp) + lwz $s3,12($inp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $t0,0($inp) ++ lwz $t1,4($inp) ++ lwz $t2,8($inp) ++ lwz $t3,12($inp) ++ rotlwi $s0,$t0,8 ++ rotlwi $s1,$t1,8 ++ rotlwi $s2,$t2,8 ++ rotlwi $s3,$t3,8 ++ rlwimi $s0,$t0,24,0,7 ++ rlwimi $s1,$t1,24,0,7 ++ rlwimi $s2,$t2,24,0,7 ++ rlwimi $s3,$t3,24,0,7 ++ rlwimi $s0,$t0,24,16,23 ++ rlwimi $s1,$t1,24,16,23 ++ rlwimi $s2,$t2,24,16,23 ++ rlwimi $s3,$t3,24,16,23 ++___ ++$code.=<<___; + bl LAES_Td + bl Lppc_AES_decrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ rotlwi $t0,$s0,8 ++ rotlwi $t1,$s1,8 ++ rotlwi $t2,$s2,8 ++ rotlwi $t3,$s3,8 ++ rlwimi $t0,$s0,24,0,7 ++ rlwimi $t1,$s1,24,0,7 ++ rlwimi $t2,$s2,24,0,7 ++ rlwimi $t3,$s3,24,0,7 ++ rlwimi $t0,$s0,24,16,23 ++ rlwimi $t1,$s1,24,16,23 ++ rlwimi $t2,$s2,24,16,23 ++ rlwimi $t3,$s3,24,16,23 ++ stw $t0,0($out) ++ stw $t1,4($out) ++ stw $t2,8($out) ++ stw $t3,12($out) ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + stw $s0,0($out) + stw $s1,4($out) + stw $s2,8($out) + stw $s3,12($out) ++___ ++$code.=<<___; + b Ldec_done + + Ldec_unaligned: +@@ -851,6 +938,7 @@ Ldec_xpage: + + bl LAES_Td + bl Lppc_AES_decrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) + + extrwi $acc00,$s0,8,0 + extrwi $acc01,$s0,8,8 +@@ -883,8 +971,6 @@ Ldec_xpage: + + Ldec_done: + $POP r0,`$FRAME+$LRSAVE`($sp) +- $POP $toc,`$FRAME-$SIZE_T*20`($sp) +- $POP r13,`$FRAME-$SIZE_T*19`($sp) + $POP r14,`$FRAME-$SIZE_T*18`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) +@@ -1355,6 +1441,7 @@ Ldec_compact_done: + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .AES_decrypt,.-.AES_decrypt + + .asciz "AES for PPC, CRYPTOGAMS by " + .align 7 +diff -up openssl-1.0.1i/crypto/aes/asm/aesp8-ppc.pl.ppc-asm openssl-1.0.1i/crypto/aes/asm/aesp8-ppc.pl +--- openssl-1.0.1i/crypto/aes/asm/aesp8-ppc.pl.ppc-asm 2014-08-13 19:46:21.093578128 +0200 ++++ openssl-1.0.1i/crypto/aes/asm/aesp8-ppc.pl 2014-08-13 19:46:21.093578128 +0200 +@@ -0,0 +1,1940 @@ ++#!/usr/bin/env perl ++# ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# This module implements support for AES instructions as per PowerISA ++# specification version 2.07, first implemented by POWER8 processor. ++# The module is endian-agnostic in sense that it supports both big- ++# and little-endian cases. Data alignment in parallelizable modes is ++# handled with VSX loads and stores, which implies MSR.VSX flag being ++# set. It should also be noted that ISA specification doesn't prohibit ++# alignment exceptions for these instructions on page boundaries. ++# Initially alignment was handled in pure AltiVec/VMX way [when data ++# is aligned programmatically, which in turn guarantees exception- ++# free execution], but it turned to hamper performance when vcipher ++# instructions are interleaved. It's reckoned that eventual ++# misalignment penalties at page boundaries are in average lower ++# than additional overhead in pure AltiVec approach. ++ ++$flavour = shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T =8; ++ $LRSAVE =2*$SIZE_T; ++ $STU ="stdu"; ++ $POP ="ld"; ++ $PUSH ="std"; ++ $UCMP ="cmpld"; ++ $SHL ="sldi"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T =4; ++ $LRSAVE =$SIZE_T; ++ $STU ="stwu"; ++ $POP ="lwz"; ++ $PUSH ="stw"; ++ $UCMP ="cmplw"; ++ $SHL ="slwi"; ++} else { die "nonsense $flavour"; } ++ ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; ++ ++$FRAME=8*$SIZE_T; ++$prefix="aes_p8"; ++ ++$sp="r1"; ++$vrsave="r12"; ++ ++######################################################################### ++{{{ # Key setup procedures # ++my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); ++my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); ++my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); ++ ++$code.=<<___; ++.machine "any" ++ ++.text ++ ++.align 7 ++rcon: ++.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev ++.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev ++.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev ++.long 0,0,0,0 ?asis ++Lconsts: ++ mflr r0 ++ bcl 20,31,\$+4 ++ mflr $ptr #vvvvv "distance between . and rcon ++ addi $ptr,$ptr,-0x48 ++ mtlr r0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.asciz "AES for PowerISA 2.07, CRYPTOGAMS by " ++ ++.globl .${prefix}_set_encrypt_key ++.align 5 ++.${prefix}_set_encrypt_key: ++Lset_encrypt_key: ++ mflr r11 ++ $PUSH r11,$LRSAVE($sp) ++ ++ li $ptr,-1 ++ ${UCMP}i $inp,0 ++ beq- Lenc_key_abort # if ($inp==0) return -1; ++ ${UCMP}i $out,0 ++ beq- Lenc_key_abort # if ($out==0) return -1; ++ li $ptr,-2 ++ cmpwi $bits,128 ++ blt- Lenc_key_abort ++ cmpwi $bits,256 ++ bgt- Lenc_key_abort ++ andi. r0,$bits,0x3f ++ bne- Lenc_key_abort ++ ++ lis r0,0xfff0 ++ mfspr $vrsave,256 ++ mtspr 256,r0 ++ ++ bl Lconsts ++ mtlr r11 ++ ++ neg r9,$inp ++ lvx $in0,0,$inp ++ addi $inp,$inp,15 # 15 is not typo ++ lvsr $key,0,r9 # borrow $key ++ li r8,0x20 ++ cmpwi $bits,192 ++ lvx $in1,0,$inp ++ le?vspltisb $mask,0x0f # borrow $mask ++ lvx $rcon,0,$ptr ++ le?vxor $key,$key,$mask # adjust for byte swap ++ lvx $mask,r8,$ptr ++ addi $ptr,$ptr,0x10 ++ vperm $in0,$in0,$in1,$key # align [and byte swap in LE] ++ li $cnt,8 ++ vxor $zero,$zero,$zero ++ mtctr $cnt ++ ++ ?lvsr $outperm,0,$out ++ vspltisb $outmask,-1 ++ lvx $outhead,0,$out ++ ?vperm $outmask,$zero,$outmask,$outperm ++ ++ blt Loop128 ++ addi $inp,$inp,8 ++ beq L192 ++ addi $inp,$inp,8 ++ b L256 ++ ++.align 4 ++Loop128: ++ vperm $key,$in0,$in0,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in0,$in0,$key ++ bdnz Loop128 ++ ++ lvx $rcon,0,$ptr # last two round keys ++ ++ vperm $key,$in0,$in0,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in0,$in0,$key ++ ++ vperm $key,$in0,$in0,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vxor $in0,$in0,$key ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ ++ addi $inp,$out,15 # 15 is not typo ++ addi $out,$out,0x50 ++ ++ li $rounds,10 ++ b Ldone ++ ++.align 4 ++L192: ++ lvx $tmp,0,$inp ++ li $cnt,4 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] ++ vspltisb $key,8 # borrow $key ++ mtctr $cnt ++ vsububm $mask,$mask,$key # adjust the mask ++ ++Loop192: ++ vperm $key,$in1,$in1,$mask # roate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vcipherlast $key,$key,$rcon ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ ++ vsldoi $stage,$zero,$in1,8 ++ vspltw $tmp,$in0,3 ++ vxor $tmp,$tmp,$in1 ++ vsldoi $in1,$zero,$in1,12 # >>32 ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in1,$in1,$tmp ++ vxor $in0,$in0,$key ++ vxor $in1,$in1,$key ++ vsldoi $stage,$stage,$in0,8 ++ ++ vperm $key,$in1,$in1,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$stage,$stage,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vsldoi $stage,$in0,$in1,8 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vperm $outtail,$stage,$stage,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vspltw $tmp,$in0,3 ++ vxor $tmp,$tmp,$in1 ++ vsldoi $in1,$zero,$in1,12 # >>32 ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in1,$in1,$tmp ++ vxor $in0,$in0,$key ++ vxor $in1,$in1,$key ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $inp,$out,15 # 15 is not typo ++ addi $out,$out,16 ++ bdnz Loop192 ++ ++ li $rounds,12 ++ addi $out,$out,0x20 ++ b Ldone ++ ++.align 4 ++L256: ++ lvx $tmp,0,$inp ++ li $cnt,7 ++ li $rounds,14 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] ++ mtctr $cnt ++ ++Loop256: ++ vperm $key,$in1,$in1,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in1,$in1,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in0,$in0,$key ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $inp,$out,15 # 15 is not typo ++ addi $out,$out,16 ++ bdz Ldone ++ ++ vspltw $key,$in0,3 # just splat ++ vsldoi $tmp,$zero,$in1,12 # >>32 ++ vsbox $key,$key ++ ++ vxor $in1,$in1,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in1,$in1,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in1,$in1,$tmp ++ ++ vxor $in1,$in1,$key ++ b Loop256 ++ ++.align 4 ++Ldone: ++ lvx $in1,0,$inp # redundant in aligned case ++ vsel $in1,$outhead,$in1,$outmask ++ stvx $in1,0,$inp ++ li $ptr,0 ++ mtspr 256,$vrsave ++ stw $rounds,0($out) ++ ++Lenc_key_abort: ++ mr r3,$ptr ++ blr ++ .long 0 ++ .byte 0,12,0x14,1,0,0,3,0 ++ .long 0 ++.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key ++ ++.globl .${prefix}_set_decrypt_key ++.align 5 ++.${prefix}_set_decrypt_key: ++ $STU $sp,-$FRAME($sp) ++ mflr r10 ++ $PUSH r10,$FRAME+$LRSAVE($sp) ++ bl Lset_encrypt_key ++ mtlr r10 ++ ++ cmpwi r3,0 ++ bne- Ldec_key_abort ++ ++ slwi $cnt,$rounds,4 ++ subi $inp,$out,240 # first round key ++ srwi $rounds,$rounds,1 ++ add $out,$inp,$cnt # last round key ++ mtctr $rounds ++ ++Ldeckey: ++ lwz r0, 0($inp) ++ lwz r6, 4($inp) ++ lwz r7, 8($inp) ++ lwz r8, 12($inp) ++ addi $inp,$inp,16 ++ lwz r9, 0($out) ++ lwz r10,4($out) ++ lwz r11,8($out) ++ lwz r12,12($out) ++ stw r0, 0($out) ++ stw r6, 4($out) ++ stw r7, 8($out) ++ stw r8, 12($out) ++ subi $out,$out,16 ++ stw r9, -16($inp) ++ stw r10,-12($inp) ++ stw r11,-8($inp) ++ stw r12,-4($inp) ++ bdnz Ldeckey ++ ++ xor r3,r3,r3 # return value ++Ldec_key_abort: ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,4,1,0x80,0,3,0 ++ .long 0 ++.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key ++___ ++}}} ++######################################################################### ++{{{ # Single block en- and decrypt procedures # ++sub gen_block () { ++my $dir = shift; ++my $n = $dir eq "de" ? "n" : ""; ++my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); ++ ++$code.=<<___; ++.globl .${prefix}_${dir}crypt ++.align 5 ++.${prefix}_${dir}crypt: ++ lwz $rounds,240($key) ++ lis r0,0xfc00 ++ mfspr $vrsave,256 ++ li $idx,15 # 15 is not typo ++ mtspr 256,r0 ++ ++ lvx v0,0,$inp ++ neg r11,$out ++ lvx v1,$idx,$inp ++ lvsl v2,0,$inp # inpperm ++ le?vspltisb v4,0x0f ++ ?lvsl v3,0,r11 # outperm ++ le?vxor v2,v2,v4 ++ li $idx,16 ++ vperm v0,v0,v1,v2 # align [and byte swap in LE] ++ lvx v1,0,$key ++ ?lvsl v5,0,$key # keyperm ++ srwi $rounds,$rounds,1 ++ lvx v2,$idx,$key ++ addi $idx,$idx,16 ++ subi $rounds,$rounds,1 ++ ?vperm v1,v1,v2,v5 # align round key ++ ++ vxor v0,v0,v1 ++ lvx v1,$idx,$key ++ addi $idx,$idx,16 ++ mtctr $rounds ++ ++Loop_${dir}c: ++ ?vperm v2,v2,v1,v5 ++ v${n}cipher v0,v0,v2 ++ lvx v2,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm v1,v1,v2,v5 ++ v${n}cipher v0,v0,v1 ++ lvx v1,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_${dir}c ++ ++ ?vperm v2,v2,v1,v5 ++ v${n}cipher v0,v0,v2 ++ lvx v2,$idx,$key ++ ?vperm v1,v1,v2,v5 ++ v${n}cipherlast v0,v0,v1 ++ ++ vspltisb v2,-1 ++ vxor v1,v1,v1 ++ li $idx,15 # 15 is not typo ++ ?vperm v2,v1,v2,v3 # outmask ++ le?vxor v3,v3,v4 ++ lvx v1,0,$out # outhead ++ vperm v0,v0,v0,v3 # rotate [and byte swap in LE] ++ vsel v1,v1,v0,v2 ++ lvx v4,$idx,$out ++ stvx v1,0,$out ++ vsel v0,v0,v4,v2 ++ stvx v0,$idx,$out ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,3,0 ++ .long 0 ++.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt ++___ ++} ++&gen_block("en"); ++&gen_block("de"); ++}}} ++######################################################################### ++{{{ # CBC en- and decrypt procedures # ++my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); ++my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); ++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= ++ map("v$_",(4..10)); ++$code.=<<___; ++.globl .${prefix}_cbc_encrypt ++.align 5 ++.${prefix}_cbc_encrypt: ++ ${UCMP}i $len,16 ++ bltlr- ++ ++ cmpwi $enc,0 # test direction ++ lis r0,0xffe0 ++ mfspr $vrsave,256 ++ mtspr 256,r0 ++ ++ li $idx,15 ++ vxor $rndkey0,$rndkey0,$rndkey0 ++ le?vspltisb $tmp,0x0f ++ ++ lvx $ivec,0,$ivp # load [unaligned] iv ++ lvsl $inpperm,0,$ivp ++ lvx $inptail,$idx,$ivp ++ le?vxor $inpperm,$inpperm,$tmp ++ vperm $ivec,$ivec,$inptail,$inpperm ++ ++ neg r11,$inp ++ ?lvsl $keyperm,0,$key # prepare for unaligned key ++ lwz $rounds,240($key) ++ ++ lvsr $inpperm,0,r11 # prepare for unaligned load ++ lvx $inptail,0,$inp ++ addi $inp,$inp,15 # 15 is not typo ++ le?vxor $inpperm,$inpperm,$tmp ++ ++ ?lvsr $outperm,0,$out # prepare for unaligned store ++ vspltisb $outmask,-1 ++ lvx $outhead,0,$out ++ ?vperm $outmask,$rndkey0,$outmask,$outperm ++ le?vxor $outperm,$outperm,$tmp ++ ++ srwi $rounds,$rounds,1 ++ li $idx,16 ++ subi $rounds,$rounds,1 ++ beq Lcbc_dec ++ ++Lcbc_enc: ++ vmr $inout,$inptail ++ lvx $inptail,0,$inp ++ addi $inp,$inp,16 ++ mtctr $rounds ++ subi $len,$len,16 # len-=16 ++ ++ lvx $rndkey0,0,$key ++ vperm $inout,$inout,$inptail,$inpperm ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vxor $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ vxor $inout,$inout,$ivec ++ ++Loop_cbc_enc: ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vcipher $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_cbc_enc ++ ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ li $idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vcipherlast $ivec,$inout,$rndkey0 ++ ${UCMP}i $len,16 ++ ++ vperm $tmp,$ivec,$ivec,$outperm ++ vsel $inout,$outhead,$tmp,$outmask ++ vmr $outhead,$tmp ++ stvx $inout,0,$out ++ addi $out,$out,16 ++ bge Lcbc_enc ++ ++ b Lcbc_done ++ ++.align 4 ++Lcbc_dec: ++ ${UCMP}i $len,128 ++ bge _aesp8_cbc_decrypt8x ++ vmr $tmp,$inptail ++ lvx $inptail,0,$inp ++ addi $inp,$inp,16 ++ mtctr $rounds ++ subi $len,$len,16 # len-=16 ++ ++ lvx $rndkey0,0,$key ++ vperm $tmp,$tmp,$inptail,$inpperm ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vxor $inout,$tmp,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ ++Loop_cbc_dec: ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vncipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vncipher $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_cbc_dec ++ ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vncipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ li $idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vncipherlast $inout,$inout,$rndkey0 ++ ${UCMP}i $len,16 ++ ++ vxor $inout,$inout,$ivec ++ vmr $ivec,$tmp ++ vperm $tmp,$inout,$inout,$outperm ++ vsel $inout,$outhead,$tmp,$outmask ++ vmr $outhead,$tmp ++ stvx $inout,0,$out ++ addi $out,$out,16 ++ bge Lcbc_dec ++ ++Lcbc_done: ++ addi $out,$out,-1 ++ lvx $inout,0,$out # redundant in aligned case ++ vsel $inout,$outhead,$inout,$outmask ++ stvx $inout,0,$out ++ ++ neg $enc,$ivp # write [unaligned] iv ++ li $idx,15 # 15 is not typo ++ vxor $rndkey0,$rndkey0,$rndkey0 ++ vspltisb $outmask,-1 ++ le?vspltisb $tmp,0x0f ++ ?lvsl $outperm,0,$enc ++ ?vperm $outmask,$rndkey0,$outmask,$outperm ++ le?vxor $outperm,$outperm,$tmp ++ lvx $outhead,0,$ivp ++ vperm $ivec,$ivec,$ivec,$outperm ++ vsel $inout,$outhead,$ivec,$outmask ++ lvx $inptail,$idx,$ivp ++ stvx $inout,0,$ivp ++ vsel $inout,$ivec,$inptail,$outmask ++ stvx $inout,$idx,$ivp ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,6,0 ++ .long 0 ++___ ++######################################################################### ++{{ # Optimized CBC decrypt procedure # ++my $key_="r11"; ++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); ++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); ++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); ++my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys ++ # v26-v31 last 6 round keys ++my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment ++ ++$code.=<<___; ++.align 5 ++_aesp8_cbc_decrypt8x: ++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) ++ li r10,`$FRAME+8*16+15` ++ li r11,`$FRAME+8*16+31` ++ stvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ li r0,-1 ++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave ++ li $x10,0x10 ++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ li $x20,0x20 ++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ li $x30,0x30 ++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ li $x40,0x40 ++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ li $x50,0x50 ++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ li $x60,0x60 ++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ li $x70,0x70 ++ mtspr 256,r0 ++ ++ subi $rounds,$rounds,3 # -4 in total ++ subi $len,$len,128 # bias ++ ++ lvx $rndkey0,$x00,$key # load key schedule ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ lvx v31,$x00,$key ++ ?vperm $rndkey0,$rndkey0,v30,$keyperm ++ addi $key_,$sp,$FRAME+15 ++ mtctr $rounds ++ ++Load_cbc_dec_key: ++ ?vperm v24,v30,v31,$keyperm ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ stvx v24,$x00,$key_ # off-load round[1] ++ ?vperm v25,v31,v30,$keyperm ++ lvx v31,$x00,$key ++ stvx v25,$x10,$key_ # off-load round[2] ++ addi $key_,$key_,0x20 ++ bdnz Load_cbc_dec_key ++ ++ lvx v26,$x10,$key ++ ?vperm v24,v30,v31,$keyperm ++ lvx v27,$x20,$key ++ stvx v24,$x00,$key_ # off-load round[3] ++ ?vperm v25,v31,v26,$keyperm ++ lvx v28,$x30,$key ++ stvx v25,$x10,$key_ # off-load round[4] ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ ?vperm v26,v26,v27,$keyperm ++ lvx v29,$x40,$key ++ ?vperm v27,v27,v28,$keyperm ++ lvx v30,$x50,$key ++ ?vperm v28,v28,v29,$keyperm ++ lvx v31,$x60,$key ++ ?vperm v29,v29,v30,$keyperm ++ lvx $out0,$x70,$key # borrow $out0 ++ ?vperm v30,v30,v31,$keyperm ++ lvx v24,$x00,$key_ # pre-load round[1] ++ ?vperm v31,v31,$out0,$keyperm ++ lvx v25,$x10,$key_ # pre-load round[2] ++ ++ #lvx $inptail,0,$inp # "caller" already did this ++ #addi $inp,$inp,15 # 15 is not typo ++ subi $inp,$inp,15 # undo "caller" ++ ++ le?li $idx,8 ++ lvx_u $in0,$x00,$inp # load first 8 "words" ++ le?lvsl $inpperm,0,$idx ++ le?vspltisb $tmp,0x0f ++ lvx_u $in1,$x10,$inp ++ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u ++ lvx_u $in2,$x20,$inp ++ le?vperm $in0,$in0,$in0,$inpperm ++ lvx_u $in3,$x30,$inp ++ le?vperm $in1,$in1,$in1,$inpperm ++ lvx_u $in4,$x40,$inp ++ le?vperm $in2,$in2,$in2,$inpperm ++ vxor $out0,$in0,$rndkey0 ++ lvx_u $in5,$x50,$inp ++ le?vperm $in3,$in3,$in3,$inpperm ++ vxor $out1,$in1,$rndkey0 ++ lvx_u $in6,$x60,$inp ++ le?vperm $in4,$in4,$in4,$inpperm ++ vxor $out2,$in2,$rndkey0 ++ lvx_u $in7,$x70,$inp ++ addi $inp,$inp,0x80 ++ le?vperm $in5,$in5,$in5,$inpperm ++ vxor $out3,$in3,$rndkey0 ++ le?vperm $in6,$in6,$in6,$inpperm ++ vxor $out4,$in4,$rndkey0 ++ le?vperm $in7,$in7,$in7,$inpperm ++ vxor $out5,$in5,$rndkey0 ++ vxor $out6,$in6,$rndkey0 ++ vxor $out7,$in7,$rndkey0 ++ ++ mtctr $rounds ++ b Loop_cbc_dec8x ++.align 5 ++Loop_cbc_dec8x: ++ vncipher $out0,$out0,v24 ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ lvx v24,$x20,$key_ # round[3] ++ addi $key_,$key_,0x20 ++ ++ vncipher $out0,$out0,v25 ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ lvx v25,$x10,$key_ # round[4] ++ bdnz Loop_cbc_dec8x ++ ++ subic $len,$len,128 # $len-=128 ++ vncipher $out0,$out0,v24 ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ ++ subfe. r0,r0,r0 # borrow?-1:0 ++ vncipher $out0,$out0,v25 ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ ++ and r0,r0,$len ++ vncipher $out0,$out0,v26 ++ vncipher $out1,$out1,v26 ++ vncipher $out2,$out2,v26 ++ vncipher $out3,$out3,v26 ++ vncipher $out4,$out4,v26 ++ vncipher $out5,$out5,v26 ++ vncipher $out6,$out6,v26 ++ vncipher $out7,$out7,v26 ++ ++ add $inp,$inp,r0 # $inp is adjusted in such ++ # way that at exit from the ++ # loop inX-in7 are loaded ++ # with last "words" ++ vncipher $out0,$out0,v27 ++ vncipher $out1,$out1,v27 ++ vncipher $out2,$out2,v27 ++ vncipher $out3,$out3,v27 ++ vncipher $out4,$out4,v27 ++ vncipher $out5,$out5,v27 ++ vncipher $out6,$out6,v27 ++ vncipher $out7,$out7,v27 ++ ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ vncipher $out0,$out0,v28 ++ vncipher $out1,$out1,v28 ++ vncipher $out2,$out2,v28 ++ vncipher $out3,$out3,v28 ++ vncipher $out4,$out4,v28 ++ vncipher $out5,$out5,v28 ++ vncipher $out6,$out6,v28 ++ vncipher $out7,$out7,v28 ++ lvx v24,$x00,$key_ # re-pre-load round[1] ++ ++ vncipher $out0,$out0,v29 ++ vncipher $out1,$out1,v29 ++ vncipher $out2,$out2,v29 ++ vncipher $out3,$out3,v29 ++ vncipher $out4,$out4,v29 ++ vncipher $out5,$out5,v29 ++ vncipher $out6,$out6,v29 ++ vncipher $out7,$out7,v29 ++ lvx v25,$x10,$key_ # re-pre-load round[2] ++ ++ vncipher $out0,$out0,v30 ++ vxor $ivec,$ivec,v31 # xor with last round key ++ vncipher $out1,$out1,v30 ++ vxor $in0,$in0,v31 ++ vncipher $out2,$out2,v30 ++ vxor $in1,$in1,v31 ++ vncipher $out3,$out3,v30 ++ vxor $in2,$in2,v31 ++ vncipher $out4,$out4,v30 ++ vxor $in3,$in3,v31 ++ vncipher $out5,$out5,v30 ++ vxor $in4,$in4,v31 ++ vncipher $out6,$out6,v30 ++ vxor $in5,$in5,v31 ++ vncipher $out7,$out7,v30 ++ vxor $in6,$in6,v31 ++ ++ vncipherlast $out0,$out0,$ivec ++ vncipherlast $out1,$out1,$in0 ++ lvx_u $in0,$x00,$inp # load next input block ++ vncipherlast $out2,$out2,$in1 ++ lvx_u $in1,$x10,$inp ++ vncipherlast $out3,$out3,$in2 ++ le?vperm $in0,$in0,$in0,$inpperm ++ lvx_u $in2,$x20,$inp ++ vncipherlast $out4,$out4,$in3 ++ le?vperm $in1,$in1,$in1,$inpperm ++ lvx_u $in3,$x30,$inp ++ vncipherlast $out5,$out5,$in4 ++ le?vperm $in2,$in2,$in2,$inpperm ++ lvx_u $in4,$x40,$inp ++ vncipherlast $out6,$out6,$in5 ++ le?vperm $in3,$in3,$in3,$inpperm ++ lvx_u $in5,$x50,$inp ++ vncipherlast $out7,$out7,$in6 ++ le?vperm $in4,$in4,$in4,$inpperm ++ lvx_u $in6,$x60,$inp ++ vmr $ivec,$in7 ++ le?vperm $in5,$in5,$in5,$inpperm ++ lvx_u $in7,$x70,$inp ++ addi $inp,$inp,0x80 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $in6,$in6,$in6,$inpperm ++ vxor $out0,$in0,$rndkey0 ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $in7,$in7,$in7,$inpperm ++ vxor $out1,$in1,$rndkey0 ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ vxor $out2,$in2,$rndkey0 ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ vxor $out3,$in3,$rndkey0 ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ vxor $out4,$in4,$rndkey0 ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x50,$out ++ vxor $out5,$in5,$rndkey0 ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x60,$out ++ vxor $out6,$in6,$rndkey0 ++ stvx_u $out7,$x70,$out ++ addi $out,$out,0x80 ++ vxor $out7,$in7,$rndkey0 ++ ++ mtctr $rounds ++ beq Loop_cbc_dec8x # did $len-=128 borrow? ++ ++ addic. $len,$len,128 ++ beq Lcbc_dec8x_done ++ nop ++ nop ++ ++Loop_cbc_dec8x_tail: # up to 7 "words" tail... ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ lvx v24,$x20,$key_ # round[3] ++ addi $key_,$key_,0x20 ++ ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ lvx v25,$x10,$key_ # round[4] ++ bdnz Loop_cbc_dec8x_tail ++ ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ ++ vncipher $out1,$out1,v26 ++ vncipher $out2,$out2,v26 ++ vncipher $out3,$out3,v26 ++ vncipher $out4,$out4,v26 ++ vncipher $out5,$out5,v26 ++ vncipher $out6,$out6,v26 ++ vncipher $out7,$out7,v26 ++ ++ vncipher $out1,$out1,v27 ++ vncipher $out2,$out2,v27 ++ vncipher $out3,$out3,v27 ++ vncipher $out4,$out4,v27 ++ vncipher $out5,$out5,v27 ++ vncipher $out6,$out6,v27 ++ vncipher $out7,$out7,v27 ++ ++ vncipher $out1,$out1,v28 ++ vncipher $out2,$out2,v28 ++ vncipher $out3,$out3,v28 ++ vncipher $out4,$out4,v28 ++ vncipher $out5,$out5,v28 ++ vncipher $out6,$out6,v28 ++ vncipher $out7,$out7,v28 ++ ++ vncipher $out1,$out1,v29 ++ vncipher $out2,$out2,v29 ++ vncipher $out3,$out3,v29 ++ vncipher $out4,$out4,v29 ++ vncipher $out5,$out5,v29 ++ vncipher $out6,$out6,v29 ++ vncipher $out7,$out7,v29 ++ ++ vncipher $out1,$out1,v30 ++ vxor $ivec,$ivec,v31 # last round key ++ vncipher $out2,$out2,v30 ++ vxor $in1,$in1,v31 ++ vncipher $out3,$out3,v30 ++ vxor $in2,$in2,v31 ++ vncipher $out4,$out4,v30 ++ vxor $in3,$in3,v31 ++ vncipher $out5,$out5,v30 ++ vxor $in4,$in4,v31 ++ vncipher $out6,$out6,v30 ++ vxor $in5,$in5,v31 ++ vncipher $out7,$out7,v30 ++ vxor $in6,$in6,v31 ++ ++ cmplwi $len,32 # switch($len) ++ blt Lcbc_dec8x_one ++ nop ++ beq Lcbc_dec8x_two ++ cmplwi $len,64 ++ blt Lcbc_dec8x_three ++ nop ++ beq Lcbc_dec8x_four ++ cmplwi $len,96 ++ blt Lcbc_dec8x_five ++ nop ++ beq Lcbc_dec8x_six ++ ++Lcbc_dec8x_seven: ++ vncipherlast $out1,$out1,$ivec ++ vncipherlast $out2,$out2,$in1 ++ vncipherlast $out3,$out3,$in2 ++ vncipherlast $out4,$out4,$in3 ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out1,$out1,$out1,$inpperm ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x00,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x10,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x20,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x30,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x40,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x50,$out ++ stvx_u $out7,$x60,$out ++ addi $out,$out,0x70 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_six: ++ vncipherlast $out2,$out2,$ivec ++ vncipherlast $out3,$out3,$in2 ++ vncipherlast $out4,$out4,$in3 ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out2,$out2,$out2,$inpperm ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x00,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x10,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x20,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x30,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x40,$out ++ stvx_u $out7,$x50,$out ++ addi $out,$out,0x60 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_five: ++ vncipherlast $out3,$out3,$ivec ++ vncipherlast $out4,$out4,$in3 ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out3,$out3,$out3,$inpperm ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x00,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x10,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x20,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x30,$out ++ stvx_u $out7,$x40,$out ++ addi $out,$out,0x50 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_four: ++ vncipherlast $out4,$out4,$ivec ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out4,$out4,$out4,$inpperm ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x00,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x10,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x20,$out ++ stvx_u $out7,$x30,$out ++ addi $out,$out,0x40 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_three: ++ vncipherlast $out5,$out5,$ivec ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out5,$out5,$out5,$inpperm ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x00,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x10,$out ++ stvx_u $out7,$x20,$out ++ addi $out,$out,0x30 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_two: ++ vncipherlast $out6,$out6,$ivec ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out6,$out6,$out6,$inpperm ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x00,$out ++ stvx_u $out7,$x10,$out ++ addi $out,$out,0x20 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_one: ++ vncipherlast $out7,$out7,$ivec ++ vmr $ivec,$in7 ++ ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out7,0,$out ++ addi $out,$out,0x10 ++ ++Lcbc_dec8x_done: ++ le?vperm $ivec,$ivec,$ivec,$inpperm ++ stvx_u $ivec,0,$ivp # write [unaligned] iv ++ ++ li r10,`$FRAME+15` ++ li r11,`$FRAME+31` ++ stvx $inpperm,r10,$sp # wipe copies of round keys ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ ++ mtspr 256,$vrsave ++ lvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0x80,6,6,0 ++ .long 0 ++.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt ++___ ++}} }}} ++ ++######################################################################### ++{{{ # CTR procedure[s] # ++my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); ++my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); ++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= ++ map("v$_",(4..11)); ++my $dat=$tmp; ++ ++$code.=<<___; ++.globl .${prefix}_ctr32_encrypt_blocks ++.align 5 ++.${prefix}_ctr32_encrypt_blocks: ++ ${UCMP}i $len,1 ++ bltlr- ++ ++ lis r0,0xfff0 ++ mfspr $vrsave,256 ++ mtspr 256,r0 ++ ++ li $idx,15 ++ vxor $rndkey0,$rndkey0,$rndkey0 ++ le?vspltisb $tmp,0x0f ++ ++ lvx $ivec,0,$ivp # load [unaligned] iv ++ lvsl $inpperm,0,$ivp ++ lvx $inptail,$idx,$ivp ++ vspltisb $one,1 ++ le?vxor $inpperm,$inpperm,$tmp ++ vperm $ivec,$ivec,$inptail,$inpperm ++ vsldoi $one,$rndkey0,$one,1 ++ ++ neg r11,$inp ++ ?lvsl $keyperm,0,$key # prepare for unaligned key ++ lwz $rounds,240($key) ++ ++ lvsr $inpperm,0,r11 # prepare for unaligned load ++ lvx $inptail,0,$inp ++ addi $inp,$inp,15 # 15 is not typo ++ le?vxor $inpperm,$inpperm,$tmp ++ ++ srwi $rounds,$rounds,1 ++ li $idx,16 ++ subi $rounds,$rounds,1 ++ ++ ${UCMP}i $len,8 ++ bge _aesp8_ctr32_encrypt8x ++ ++ ?lvsr $outperm,0,$out # prepare for unaligned store ++ vspltisb $outmask,-1 ++ lvx $outhead,0,$out ++ ?vperm $outmask,$rndkey0,$outmask,$outperm ++ le?vxor $outperm,$outperm,$tmp ++ ++ lvx $rndkey0,0,$key ++ mtctr $rounds ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vxor $inout,$ivec,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ b Loop_ctr32_enc ++ ++.align 5 ++Loop_ctr32_enc: ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vcipher $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_ctr32_enc ++ ++ vadduwm $ivec,$ivec,$one ++ vmr $dat,$inptail ++ lvx $inptail,0,$inp ++ addi $inp,$inp,16 ++ subic. $len,$len,1 # blocks-- ++ ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ vperm $dat,$dat,$inptail,$inpperm ++ li $idx,16 ++ ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm ++ lvx $rndkey0,0,$key ++ vxor $dat,$dat,$rndkey1 # last round key ++ vcipherlast $inout,$inout,$dat ++ ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ vperm $inout,$inout,$inout,$outperm ++ vsel $dat,$outhead,$inout,$outmask ++ mtctr $rounds ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vmr $outhead,$inout ++ vxor $inout,$ivec,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ stvx $dat,0,$out ++ addi $out,$out,16 ++ bne Loop_ctr32_enc ++ ++ addi $out,$out,-1 ++ lvx $inout,0,$out # redundant in aligned case ++ vsel $inout,$outhead,$inout,$outmask ++ stvx $inout,0,$out ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,6,0 ++ .long 0 ++___ ++######################################################################### ++{{ # Optimized CTR procedure # ++my $key_="r11"; ++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); ++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); ++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); ++my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys ++ # v26-v31 last 6 round keys ++my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment ++my ($two,$three,$four)=($outhead,$outperm,$outmask); ++ ++$code.=<<___; ++.align 5 ++_aesp8_ctr32_encrypt8x: ++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) ++ li r10,`$FRAME+8*16+15` ++ li r11,`$FRAME+8*16+31` ++ stvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ li r0,-1 ++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave ++ li $x10,0x10 ++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ li $x20,0x20 ++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ li $x30,0x30 ++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ li $x40,0x40 ++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ li $x50,0x50 ++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ li $x60,0x60 ++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ li $x70,0x70 ++ mtspr 256,r0 ++ ++ subi $rounds,$rounds,3 # -4 in total ++ ++ lvx $rndkey0,$x00,$key # load key schedule ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ lvx v31,$x00,$key ++ ?vperm $rndkey0,$rndkey0,v30,$keyperm ++ addi $key_,$sp,$FRAME+15 ++ mtctr $rounds ++ ++Load_ctr32_enc_key: ++ ?vperm v24,v30,v31,$keyperm ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ stvx v24,$x00,$key_ # off-load round[1] ++ ?vperm v25,v31,v30,$keyperm ++ lvx v31,$x00,$key ++ stvx v25,$x10,$key_ # off-load round[2] ++ addi $key_,$key_,0x20 ++ bdnz Load_ctr32_enc_key ++ ++ lvx v26,$x10,$key ++ ?vperm v24,v30,v31,$keyperm ++ lvx v27,$x20,$key ++ stvx v24,$x00,$key_ # off-load round[3] ++ ?vperm v25,v31,v26,$keyperm ++ lvx v28,$x30,$key ++ stvx v25,$x10,$key_ # off-load round[4] ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ ?vperm v26,v26,v27,$keyperm ++ lvx v29,$x40,$key ++ ?vperm v27,v27,v28,$keyperm ++ lvx v30,$x50,$key ++ ?vperm v28,v28,v29,$keyperm ++ lvx v31,$x60,$key ++ ?vperm v29,v29,v30,$keyperm ++ lvx $out0,$x70,$key # borrow $out0 ++ ?vperm v30,v30,v31,$keyperm ++ lvx v24,$x00,$key_ # pre-load round[1] ++ ?vperm v31,v31,$out0,$keyperm ++ lvx v25,$x10,$key_ # pre-load round[2] ++ ++ vadduwm $two,$one,$one ++ subi $inp,$inp,15 # undo "caller" ++ $SHL $len,$len,4 ++ ++ vadduwm $out1,$ivec,$one # counter values ... ++ vadduwm $out2,$ivec,$two ++ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] ++ le?li $idx,8 ++ vadduwm $out3,$out1,$two ++ vxor $out1,$out1,$rndkey0 ++ le?lvsl $inpperm,0,$idx ++ vadduwm $out4,$out2,$two ++ vxor $out2,$out2,$rndkey0 ++ le?vspltisb $tmp,0x0f ++ vadduwm $out5,$out3,$two ++ vxor $out3,$out3,$rndkey0 ++ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u ++ vadduwm $out6,$out4,$two ++ vxor $out4,$out4,$rndkey0 ++ vadduwm $out7,$out5,$two ++ vxor $out5,$out5,$rndkey0 ++ vadduwm $ivec,$out6,$two # next counter value ++ vxor $out6,$out6,$rndkey0 ++ vxor $out7,$out7,$rndkey0 ++ ++ mtctr $rounds ++ b Loop_ctr32_enc8x ++.align 5 ++Loop_ctr32_enc8x: ++ vcipher $out0,$out0,v24 ++ vcipher $out1,$out1,v24 ++ vcipher $out2,$out2,v24 ++ vcipher $out3,$out3,v24 ++ vcipher $out4,$out4,v24 ++ vcipher $out5,$out5,v24 ++ vcipher $out6,$out6,v24 ++ vcipher $out7,$out7,v24 ++Loop_ctr32_enc8x_middle: ++ lvx v24,$x20,$key_ # round[3] ++ addi $key_,$key_,0x20 ++ ++ vcipher $out0,$out0,v25 ++ vcipher $out1,$out1,v25 ++ vcipher $out2,$out2,v25 ++ vcipher $out3,$out3,v25 ++ vcipher $out4,$out4,v25 ++ vcipher $out5,$out5,v25 ++ vcipher $out6,$out6,v25 ++ vcipher $out7,$out7,v25 ++ lvx v25,$x10,$key_ # round[4] ++ bdnz Loop_ctr32_enc8x ++ ++ subic r11,$len,256 # $len-256, borrow $key_ ++ vcipher $out0,$out0,v24 ++ vcipher $out1,$out1,v24 ++ vcipher $out2,$out2,v24 ++ vcipher $out3,$out3,v24 ++ vcipher $out4,$out4,v24 ++ vcipher $out5,$out5,v24 ++ vcipher $out6,$out6,v24 ++ vcipher $out7,$out7,v24 ++ ++ subfe r0,r0,r0 # borrow?-1:0 ++ vcipher $out0,$out0,v25 ++ vcipher $out1,$out1,v25 ++ vcipher $out2,$out2,v25 ++ vcipher $out3,$out3,v25 ++ vcipher $out4,$out4,v25 ++ vcipher $out5,$out5,v25 ++ vcipher $out6,$out6,v25 ++ vcipher $out7,$out7,v25 ++ ++ and r0,r0,r11 ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ vcipher $out0,$out0,v26 ++ vcipher $out1,$out1,v26 ++ vcipher $out2,$out2,v26 ++ vcipher $out3,$out3,v26 ++ vcipher $out4,$out4,v26 ++ vcipher $out5,$out5,v26 ++ vcipher $out6,$out6,v26 ++ vcipher $out7,$out7,v26 ++ lvx v24,$x00,$key_ # re-pre-load round[1] ++ ++ subic $len,$len,129 # $len-=129 ++ vcipher $out0,$out0,v27 ++ addi $len,$len,1 # $len-=128 really ++ vcipher $out1,$out1,v27 ++ vcipher $out2,$out2,v27 ++ vcipher $out3,$out3,v27 ++ vcipher $out4,$out4,v27 ++ vcipher $out5,$out5,v27 ++ vcipher $out6,$out6,v27 ++ vcipher $out7,$out7,v27 ++ lvx v25,$x10,$key_ # re-pre-load round[2] ++ ++ vcipher $out0,$out0,v28 ++ lvx_u $in0,$x00,$inp # load input ++ vcipher $out1,$out1,v28 ++ lvx_u $in1,$x10,$inp ++ vcipher $out2,$out2,v28 ++ lvx_u $in2,$x20,$inp ++ vcipher $out3,$out3,v28 ++ lvx_u $in3,$x30,$inp ++ vcipher $out4,$out4,v28 ++ lvx_u $in4,$x40,$inp ++ vcipher $out5,$out5,v28 ++ lvx_u $in5,$x50,$inp ++ vcipher $out6,$out6,v28 ++ lvx_u $in6,$x60,$inp ++ vcipher $out7,$out7,v28 ++ lvx_u $in7,$x70,$inp ++ addi $inp,$inp,0x80 ++ ++ vcipher $out0,$out0,v29 ++ le?vperm $in0,$in0,$in0,$inpperm ++ vcipher $out1,$out1,v29 ++ le?vperm $in1,$in1,$in1,$inpperm ++ vcipher $out2,$out2,v29 ++ le?vperm $in2,$in2,$in2,$inpperm ++ vcipher $out3,$out3,v29 ++ le?vperm $in3,$in3,$in3,$inpperm ++ vcipher $out4,$out4,v29 ++ le?vperm $in4,$in4,$in4,$inpperm ++ vcipher $out5,$out5,v29 ++ le?vperm $in5,$in5,$in5,$inpperm ++ vcipher $out6,$out6,v29 ++ le?vperm $in6,$in6,$in6,$inpperm ++ vcipher $out7,$out7,v29 ++ le?vperm $in7,$in7,$in7,$inpperm ++ ++ add $inp,$inp,r0 # $inp is adjusted in such ++ # way that at exit from the ++ # loop inX-in7 are loaded ++ # with last "words" ++ subfe. r0,r0,r0 # borrow?-1:0 ++ vcipher $out0,$out0,v30 ++ vxor $in0,$in0,v31 # xor with last round key ++ vcipher $out1,$out1,v30 ++ vxor $in1,$in1,v31 ++ vcipher $out2,$out2,v30 ++ vxor $in2,$in2,v31 ++ vcipher $out3,$out3,v30 ++ vxor $in3,$in3,v31 ++ vcipher $out4,$out4,v30 ++ vxor $in4,$in4,v31 ++ vcipher $out5,$out5,v30 ++ vxor $in5,$in5,v31 ++ vcipher $out6,$out6,v30 ++ vxor $in6,$in6,v31 ++ vcipher $out7,$out7,v30 ++ vxor $in7,$in7,v31 ++ ++ bne Lctr32_enc8x_break # did $len-129 borrow? ++ ++ vcipherlast $in0,$out0,$in0 ++ vcipherlast $in1,$out1,$in1 ++ vadduwm $out1,$ivec,$one # counter values ... ++ vcipherlast $in2,$out2,$in2 ++ vadduwm $out2,$ivec,$two ++ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] ++ vcipherlast $in3,$out3,$in3 ++ vadduwm $out3,$out1,$two ++ vxor $out1,$out1,$rndkey0 ++ vcipherlast $in4,$out4,$in4 ++ vadduwm $out4,$out2,$two ++ vxor $out2,$out2,$rndkey0 ++ vcipherlast $in5,$out5,$in5 ++ vadduwm $out5,$out3,$two ++ vxor $out3,$out3,$rndkey0 ++ vcipherlast $in6,$out6,$in6 ++ vadduwm $out6,$out4,$two ++ vxor $out4,$out4,$rndkey0 ++ vcipherlast $in7,$out7,$in7 ++ vadduwm $out7,$out5,$two ++ vxor $out5,$out5,$rndkey0 ++ le?vperm $in0,$in0,$in0,$inpperm ++ vadduwm $ivec,$out6,$two # next counter value ++ vxor $out6,$out6,$rndkey0 ++ le?vperm $in1,$in1,$in1,$inpperm ++ vxor $out7,$out7,$rndkey0 ++ mtctr $rounds ++ ++ vcipher $out0,$out0,v24 ++ stvx_u $in0,$x00,$out ++ le?vperm $in2,$in2,$in2,$inpperm ++ vcipher $out1,$out1,v24 ++ stvx_u $in1,$x10,$out ++ le?vperm $in3,$in3,$in3,$inpperm ++ vcipher $out2,$out2,v24 ++ stvx_u $in2,$x20,$out ++ le?vperm $in4,$in4,$in4,$inpperm ++ vcipher $out3,$out3,v24 ++ stvx_u $in3,$x30,$out ++ le?vperm $in5,$in5,$in5,$inpperm ++ vcipher $out4,$out4,v24 ++ stvx_u $in4,$x40,$out ++ le?vperm $in6,$in6,$in6,$inpperm ++ vcipher $out5,$out5,v24 ++ stvx_u $in5,$x50,$out ++ le?vperm $in7,$in7,$in7,$inpperm ++ vcipher $out6,$out6,v24 ++ stvx_u $in6,$x60,$out ++ vcipher $out7,$out7,v24 ++ stvx_u $in7,$x70,$out ++ addi $out,$out,0x80 ++ ++ b Loop_ctr32_enc8x_middle ++ ++.align 5 ++Lctr32_enc8x_break: ++ cmpwi $len,-0x60 ++ blt Lctr32_enc8x_one ++ nop ++ beq Lctr32_enc8x_two ++ cmpwi $len,-0x40 ++ blt Lctr32_enc8x_three ++ nop ++ beq Lctr32_enc8x_four ++ cmpwi $len,-0x20 ++ blt Lctr32_enc8x_five ++ nop ++ beq Lctr32_enc8x_six ++ cmpwi $len,0x00 ++ blt Lctr32_enc8x_seven ++ ++Lctr32_enc8x_eight: ++ vcipherlast $out0,$out0,$in0 ++ vcipherlast $out1,$out1,$in1 ++ vcipherlast $out2,$out2,$in2 ++ vcipherlast $out3,$out3,$in3 ++ vcipherlast $out4,$out4,$in4 ++ vcipherlast $out5,$out5,$in5 ++ vcipherlast $out6,$out6,$in6 ++ vcipherlast $out7,$out7,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x50,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x60,$out ++ stvx_u $out7,$x70,$out ++ addi $out,$out,0x80 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_seven: ++ vcipherlast $out0,$out0,$in1 ++ vcipherlast $out1,$out1,$in2 ++ vcipherlast $out2,$out2,$in3 ++ vcipherlast $out3,$out3,$in4 ++ vcipherlast $out4,$out4,$in5 ++ vcipherlast $out5,$out5,$in6 ++ vcipherlast $out6,$out6,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x50,$out ++ stvx_u $out6,$x60,$out ++ addi $out,$out,0x70 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_six: ++ vcipherlast $out0,$out0,$in2 ++ vcipherlast $out1,$out1,$in3 ++ vcipherlast $out2,$out2,$in4 ++ vcipherlast $out3,$out3,$in5 ++ vcipherlast $out4,$out4,$in6 ++ vcipherlast $out5,$out5,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ stvx_u $out5,$x50,$out ++ addi $out,$out,0x60 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_five: ++ vcipherlast $out0,$out0,$in3 ++ vcipherlast $out1,$out1,$in4 ++ vcipherlast $out2,$out2,$in5 ++ vcipherlast $out3,$out3,$in6 ++ vcipherlast $out4,$out4,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ stvx_u $out4,$x40,$out ++ addi $out,$out,0x50 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_four: ++ vcipherlast $out0,$out0,$in4 ++ vcipherlast $out1,$out1,$in5 ++ vcipherlast $out2,$out2,$in6 ++ vcipherlast $out3,$out3,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ stvx_u $out3,$x30,$out ++ addi $out,$out,0x40 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_three: ++ vcipherlast $out0,$out0,$in5 ++ vcipherlast $out1,$out1,$in6 ++ vcipherlast $out2,$out2,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ stvx_u $out2,$x20,$out ++ addi $out,$out,0x30 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lctr32_enc8x_two: ++ vcipherlast $out0,$out0,$in6 ++ vcipherlast $out1,$out1,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ stvx_u $out1,$x10,$out ++ addi $out,$out,0x20 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lctr32_enc8x_one: ++ vcipherlast $out0,$out0,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ stvx_u $out0,0,$out ++ addi $out,$out,0x10 ++ ++Lctr32_enc8x_done: ++ li r10,`$FRAME+15` ++ li r11,`$FRAME+31` ++ stvx $inpperm,r10,$sp # wipe copies of round keys ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ ++ mtspr 256,$vrsave ++ lvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0x80,6,6,0 ++ .long 0 ++.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks ++___ ++}} }}} ++ ++my $consts=1; ++foreach(split("\n",$code)) { ++ s/\`([^\`]*)\`/eval($1)/geo; ++ ++ # constants table endian-specific conversion ++ if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { ++ my $conv=$3; ++ my @bytes=(); ++ ++ # convert to endian-agnostic format ++ if ($1 eq "long") { ++ foreach (split(/,\s*/,$2)) { ++ my $l = /^0/?oct:int; ++ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; ++ } ++ } else { ++ @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); ++ } ++ ++ # little-endian conversion ++ if ($flavour =~ /le$/o) { ++ SWITCH: for($conv) { ++ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; ++ /\?rev/ && do { @bytes=reverse(@bytes); last; }; ++ } ++ } ++ ++ #emit ++ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; ++ next; ++ } ++ $consts=0 if (m/Lconsts:/o); # end of table ++ ++ # instructions prefixed with '?' are endian-specific and need ++ # to be adjusted accordingly... ++ if ($flavour =~ /le$/o) { # little-endian ++ s/le\?//o or ++ s/be\?/#be#/o or ++ s/\?lvsr/lvsl/o or ++ s/\?lvsl/lvsr/o or ++ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or ++ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or ++ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; ++ } else { # big-endian ++ s/le\?/#le#/o or ++ s/be\?//o or ++ s/\?([a-z]+)/$1/o; ++ } ++ ++ print $_,"\n"; ++} ++ ++close STDOUT; +diff -up openssl-1.0.1i/crypto/aes/asm/vpaes-ppc.pl.ppc-asm openssl-1.0.1i/crypto/aes/asm/vpaes-ppc.pl +--- openssl-1.0.1i/crypto/aes/asm/vpaes-ppc.pl.ppc-asm 2014-08-13 19:46:21.093578128 +0200 ++++ openssl-1.0.1i/crypto/aes/asm/vpaes-ppc.pl 2014-08-13 19:46:21.093578128 +0200 +@@ -0,0 +1,1512 @@ ++#!/usr/bin/env perl ++ ++###################################################################### ++## Constant-time SSSE3 AES core implementation. ++## version 0.1 ++## ++## By Mike Hamburg (Stanford University), 2009 ++## Public domain. ++## ++## For details see http://shiftleft.org/papers/vector_aes/ and ++## http://crypto.stanford.edu/vpaes/. ++ ++# CBC encrypt/decrypt performance in cycles per byte processed with ++# 128-bit key. ++# ++# aes-ppc.pl this ++# G4e 35.5/52.1/(23.8) 11.9(*)/15.4 ++# POWER6 42.7/54.3/(28.2) 63.0/92.8(**) ++# POWER7 32.3/42.9/(18.4) 18.5/23.3 ++# ++# (*) This is ~10% worse than reported in paper. The reason is ++# twofold. This module doesn't make any assumption about ++# key schedule (or data for that matter) alignment and handles ++# it in-line. Secondly it, being transliterated from ++# vpaes-x86_64.pl, relies on "nested inversion" better suited ++# for Intel CPUs. ++# (**) Inadequate POWER6 performance is due to astronomic AltiVec ++# latency, 9 cycles per simple logical operation. ++ ++$flavour = shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T =8; ++ $LRSAVE =2*$SIZE_T; ++ $STU ="stdu"; ++ $POP ="ld"; ++ $PUSH ="std"; ++ $UCMP ="cmpld"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T =4; ++ $LRSAVE =$SIZE_T; ++ $STU ="stwu"; ++ $POP ="lwz"; ++ $PUSH ="stw"; ++ $UCMP ="cmplw"; ++} else { die "nonsense $flavour"; } ++ ++$sp="r1"; ++$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; ++ ++$code.=<<___; ++.machine "any" ++ ++.text ++ ++.align 7 # totally strategic alignment ++_vpaes_consts: ++Lk_mc_forward: # mc_forward ++ .long 0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c ?inv ++ .long 0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300 ?inv ++ .long 0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704 ?inv ++ .long 0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08 ?inv ++Lk_mc_backward: # mc_backward ++ .long 0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e ?inv ++ .long 0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a ?inv ++ .long 0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506 ?inv ++ .long 0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102 ?inv ++Lk_sr: # sr ++ .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f ?inv ++ .long 0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b ?inv ++ .long 0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07 ?inv ++ .long 0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603 ?inv ++ ++## ++## "Hot" constants ++## ++Lk_inv: # inv, inva ++ .long 0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704 ?rev ++ .long 0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03 ?rev ++Lk_ipt: # input transform (lo, hi) ++ .long 0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca ?rev ++ .long 0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd ?rev ++Lk_sbo: # sbou, sbot ++ .long 0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15 ?rev ++ .long 0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e ?rev ++Lk_sb1: # sb1u, sb1t ++ .long 0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b ?rev ++ .long 0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5 ?rev ++Lk_sb2: # sb2u, sb2t ++ .long 0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2 ?rev ++ .long 0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e ?rev ++ ++## ++## Decryption stuff ++## ++Lk_dipt: # decryption input transform ++ .long 0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15 ?rev ++ .long 0x00650560, 0xe683e386, 0x94f191f4, 0x72177712 ?rev ++Lk_dsbo: # decryption sbox final output ++ .long 0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7 ?rev ++ .long 0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca ?rev ++Lk_dsb9: # decryption sbox output *9*u, *9*t ++ .long 0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca ?rev ++ .long 0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72 ?rev ++Lk_dsbd: # decryption sbox output *D*u, *D*t ++ .long 0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5 ?rev ++ .long 0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129 ?rev ++Lk_dsbb: # decryption sbox output *B*u, *B*t ++ .long 0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660 ?rev ++ .long 0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3 ?rev ++Lk_dsbe: # decryption sbox output *E*u, *E*t ++ .long 0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222 ?rev ++ .long 0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794 ?rev ++ ++## ++## Key schedule constants ++## ++Lk_dksd: # decryption key schedule: invskew x*D ++ .long 0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007 ?rev ++ .long 0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f ?rev ++Lk_dksb: # decryption key schedule: invskew x*B ++ .long 0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603 ?rev ++ .long 0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9 ?rev ++Lk_dkse: # decryption key schedule: invskew x*E + 0x63 ++ .long 0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553 ?rev ++ .long 0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd ?rev ++Lk_dks9: # decryption key schedule: invskew x*9 ++ .long 0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a ?rev ++ .long 0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b ?rev ++ ++Lk_rcon: # rcon ++ .long 0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70 ?asis ++Lk_s63: ++ .long 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b ?asis ++ ++Lk_opt: # output transform ++ .long 0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7 ?rev ++ .long 0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1 ?rev ++Lk_deskew: # deskew tables: inverts the sbox's "skew" ++ .long 0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d ?rev ++ .long 0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128 ?rev ++.align 5 ++Lconsts: ++ mflr r0 ++ bcl 20,31,\$+4 ++ mflr r12 #vvvvv "distance between . and _vpaes_consts ++ addi r12,r12,-0x308 ++ mtlr r0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.asciz "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)" ++.align 6 ++___ ++ ++my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31)); ++{ ++my ($inp,$out,$key) = map("r$_",(3..5)); ++ ++my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15)); ++my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19)); ++my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23)); ++ ++$code.=<<___; ++## ++## _aes_preheat ++## ++## Fills register %r10 -> .aes_consts (so you can -fPIC) ++## and %xmm9-%xmm15 as specified below. ++## ++.align 4 ++_vpaes_encrypt_preheat: ++ mflr r8 ++ bl Lconsts ++ mtlr r8 ++ li r11, 0xc0 # Lk_inv ++ li r10, 0xd0 ++ li r9, 0xe0 # Lk_ipt ++ li r8, 0xf0 ++ vxor v7, v7, v7 # 0x00..00 ++ vspltisb v8,4 # 0x04..04 ++ vspltisb v9,0x0f # 0x0f..0f ++ lvx $invlo, r12, r11 ++ li r11, 0x100 ++ lvx $invhi, r12, r10 ++ li r10, 0x110 ++ lvx $iptlo, r12, r9 ++ li r9, 0x120 ++ lvx $ipthi, r12, r8 ++ li r8, 0x130 ++ lvx $sbou, r12, r11 ++ li r11, 0x140 ++ lvx $sbot, r12, r10 ++ li r10, 0x150 ++ lvx $sb1u, r12, r9 ++ lvx $sb1t, r12, r8 ++ lvx $sb2u, r12, r11 ++ lvx $sb2t, r12, r10 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## _aes_encrypt_core ++## ++## AES-encrypt %xmm0. ++## ++## Inputs: ++## %xmm0 = input ++## %xmm9-%xmm15 as in _vpaes_preheat ++## (%rdx) = scheduled keys ++## ++## Output in %xmm0 ++## Clobbers %xmm1-%xmm6, %r9, %r10, %r11, %rax ++## ++## ++.align 5 ++_vpaes_encrypt_core: ++ lwz r8, 240($key) # pull rounds ++ li r9, 16 ++ lvx v5, 0, $key # vmovdqu (%r9), %xmm5 # round0 key ++ li r11, 0x10 ++ lvx v6, r9, $key ++ addi r9, r9, 16 ++ ?vperm v5, v5, v6, $keyperm # align round key ++ addi r10, r11, 0x40 ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 ++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm1 ++ vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm3, %xmm2 ++ vxor v0, v0, v5 # vpxor %xmm5, %xmm1, %xmm0 ++ vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0 ++ mtctr r8 ++ b Lenc_entry ++ ++.align 4 ++Lenc_loop: ++ # middle of middle round ++ vperm v4, $sb1t, v7, v2 # vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u ++ lvx v1, r12, r11 # vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] ++ addi r11, r11, 16 ++ vperm v0, $sb1u, v7, v3 # vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t ++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ andi. r11, r11, 0x30 # and \$0x30, %r11 # ... mod 4 ++ vperm v5, $sb2t, v7, v2 # vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u ++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ vperm v2, $sb2u, v7, v3 # vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t ++ lvx v4, r12, r10 # vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] ++ addi r10, r11, 0x40 ++ vperm v3, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm3 # 0 = B ++ vxor v2, v2, v5 # vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A ++ vperm v0, v0, v7, v4 # vpshufb %xmm4, %xmm0, %xmm0 # 3 = D ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B ++ vperm v4, v3, v7, v1 # vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C ++ vxor v0, v0, v3 # vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D ++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D ++ ++Lenc_entry: ++ # top of round ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ vperm v5, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k ++ vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vand v0, v0, v9 ++ vxor v3, v3, v5 # vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ vmr v5, v6 ++ lvx v6, r9, $key # vmovdqu (%r9), %xmm5 ++ vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ addi r9, r9, 16 ++ vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ ?vperm v5, v5, v6, $keyperm # align round key ++ vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ bdnz Lenc_loop ++ ++ # middle of last round ++ addi r10, r11, 0x80 ++ # vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo ++ # vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 ++ vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ lvx v1, r12, r10 # vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] ++ vperm v0, $sbot, v7, v3 # vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t ++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ vperm v0, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.globl .vpaes_encrypt ++.align 5 ++.vpaes_encrypt: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r6 ++ mfspr r7, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r7,`$FRAME-4`($sp) # save vrsave ++ li r0, -1 ++ $PUSH r6,`$FRAME+$LRSAVE`($sp) ++ mtspr 256, r0 # preserve all AltiVec registers ++ ++ bl _vpaes_encrypt_preheat ++ ++ ?lvsl $inpperm, 0, $inp # prepare for unaligned access ++ lvx v0, 0, $inp ++ addi $inp, $inp, 15 # 15 is not a typo ++ ?lvsr $outperm, 0, $out ++ ?lvsl $keyperm, 0, $key # prepare for unaligned access ++ vnor $outmask, v7, v7 # 0xff..ff ++ lvx $inptail, 0, $inp # redundant in aligned case ++ ?vperm $outmask, v7, $outmask, $outperm ++ lvx $outhead, 0, $out ++ ?vperm v0, v0, $inptail, $inpperm ++ ++ bl _vpaes_encrypt_core ++ ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 15 # 15 is not a typo ++ ######## ++ ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtlr r6 ++ mtspr 256, r7 # restore vrsave ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_encrypt,.-.vpaes_encrypt ++ ++.align 4 ++_vpaes_decrypt_preheat: ++ mflr r8 ++ bl Lconsts ++ mtlr r8 ++ li r11, 0xc0 # Lk_inv ++ li r10, 0xd0 ++ li r9, 0x160 # Ldipt ++ li r8, 0x170 ++ vxor v7, v7, v7 # 0x00..00 ++ vspltisb v8,4 # 0x04..04 ++ vspltisb v9,0x0f # 0x0f..0f ++ lvx $invlo, r12, r11 ++ li r11, 0x180 ++ lvx $invhi, r12, r10 ++ li r10, 0x190 ++ lvx $iptlo, r12, r9 ++ li r9, 0x1a0 ++ lvx $ipthi, r12, r8 ++ li r8, 0x1b0 ++ lvx $sbou, r12, r11 ++ li r11, 0x1c0 ++ lvx $sbot, r12, r10 ++ li r10, 0x1d0 ++ lvx $sb9u, r12, r9 ++ li r9, 0x1e0 ++ lvx $sb9t, r12, r8 ++ li r8, 0x1f0 ++ lvx $sbdu, r12, r11 ++ li r11, 0x200 ++ lvx $sbdt, r12, r10 ++ li r10, 0x210 ++ lvx $sbbu, r12, r9 ++ lvx $sbbt, r12, r8 ++ lvx $sbeu, r12, r11 ++ lvx $sbet, r12, r10 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## Decryption core ++## ++## Same API as encryption core. ++## ++.align 4 ++_vpaes_decrypt_core: ++ lwz r8, 240($key) # pull rounds ++ li r9, 16 ++ lvx v5, 0, $key # vmovdqu (%r9), %xmm4 # round0 key ++ li r11, 0x30 ++ lvx v6, r9, $key ++ addi r9, r9, 16 ++ ?vperm v5, v5, v6, $keyperm # align round key ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 ++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2 ++ vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm1, %xmm0 ++ vxor v0, v0, v5 # vpxor %xmm4, %xmm2, %xmm2 ++ vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0 ++ mtctr r8 ++ b Ldec_entry ++ ++.align 4 ++Ldec_loop: ++# ++# Inverse mix columns ++# ++ lvx v0, r12, r11 # v5 and v0 are flipped ++ # vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u ++ # vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t ++ vperm v4, $sb9u, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u ++ subi r11, r11, 16 ++ vperm v1, $sb9t, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t ++ andi. r11, r11, 0x30 ++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 ++ # vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu ++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ # vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt ++ ++ vperm v4, $sbdu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu ++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vperm v1, $sbdt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt ++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ # vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu ++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ # vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt ++ ++ vperm v4, $sbbu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu ++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vperm v1, $sbbt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt ++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ # vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu ++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ # vmovdqa 0x50(%r10), %xmm1 # 0 : sbet ++ ++ vperm v4, $sbeu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu ++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vperm v1, $sbet, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet ++ vxor v0, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ vxor v0, v0, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ ++Ldec_entry: ++ # top of round ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ vperm v2, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vand v0, v0, v9 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ vmr v5, v6 ++ lvx v6, r9, $key # vmovdqu (%r9), %xmm0 ++ vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ addi r9, r9, 16 ++ vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ ?vperm v5, v5, v6, $keyperm # align round key ++ vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ bdnz Ldec_loop ++ ++ # middle of last round ++ addi r10, r11, 0x80 ++ # vmovdqa 0x60(%r10), %xmm4 # 3 : sbou ++ vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ # vmovdqa 0x70(%r10), %xmm1 # 0 : sbot ++ lvx v2, r12, r10 # vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 ++ vperm v1, $sbot, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t ++ vxor v4, v4, v5 # vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k ++ vxor v0, v1, v4 # vpxor %xmm4, %xmm1, %xmm0 # 0 = A ++ vperm v0, v0, v7, v2 # vpshufb %xmm2, %xmm0, %xmm0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.globl .vpaes_decrypt ++.align 5 ++.vpaes_decrypt: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r6 ++ mfspr r7, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r7,`$FRAME-4`($sp) # save vrsave ++ li r0, -1 ++ $PUSH r6,`$FRAME+$LRSAVE`($sp) ++ mtspr 256, r0 # preserve all AltiVec registers ++ ++ bl _vpaes_decrypt_preheat ++ ++ ?lvsl $inpperm, 0, $inp # prepare for unaligned access ++ lvx v0, 0, $inp ++ addi $inp, $inp, 15 # 15 is not a typo ++ ?lvsr $outperm, 0, $out ++ ?lvsl $keyperm, 0, $key ++ vnor $outmask, v7, v7 # 0xff..ff ++ lvx $inptail, 0, $inp # redundant in aligned case ++ ?vperm $outmask, v7, $outmask, $outperm ++ lvx $outhead, 0, $out ++ ?vperm v0, v0, $inptail, $inpperm ++ ++ bl _vpaes_decrypt_core ++ ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 15 # 15 is not a typo ++ ######## ++ ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtlr r6 ++ mtspr 256, r7 # restore vrsave ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_decrypt,.-.vpaes_decrypt ++ ++.globl .vpaes_cbc_encrypt ++.align 5 ++.vpaes_cbc_encrypt: ++ ${UCMP}i r5,16 ++ bltlr- ++ ++ $STU $sp,-`($FRAME+2*$SIZE_T)`($sp) ++ mflr r0 ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mfspr r12, 256 ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r12,`$FRAME-4`($sp) # save vrsave ++ $PUSH r30,`$FRAME+$SIZE_T*0`($sp) ++ $PUSH r31,`$FRAME+$SIZE_T*1`($sp) ++ li r9, -16 ++ $PUSH r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp) ++ ++ and r30, r5, r9 # copy length&-16 ++ mr r5, r6 # copy pointer to key ++ mr r31, r7 # copy pointer to iv ++ blt Lcbc_abort ++ cmpwi r8, 0 # test direction ++ li r6, -1 ++ mr r7, r12 # copy vrsave ++ mtspr 256, r6 # preserve all AltiVec registers ++ ++ lvx v24, 0, r31 # load [potentially unaligned] iv ++ li r9, 15 ++ ?lvsl $inpperm, 0, r31 ++ lvx v25, r9, r31 ++ ?vperm v24, v24, v25, $inpperm ++ ++ neg r8, $inp # prepare for unaligned access ++ vxor v7, v7, v7 ++ ?lvsl $keyperm, 0, $key ++ ?lvsr $outperm, 0, $out ++ ?lvsr $inpperm, 0, r8 # -$inp ++ vnor $outmask, v7, v7 # 0xff..ff ++ lvx $inptail, 0, $inp ++ ?vperm $outmask, v7, $outmask, $outperm ++ addi $inp, $inp, 15 # 15 is not a typo ++ lvx $outhead, 0, $out ++ ++ beq Lcbc_decrypt ++ ++ bl _vpaes_encrypt_preheat ++ li r0, 16 ++ ++Lcbc_enc_loop: ++ vmr v0, $inptail ++ lvx $inptail, 0, $inp ++ addi $inp, $inp, 16 ++ ?vperm v0, v0, $inptail, $inpperm ++ vxor v0, v0, v24 # ^= iv ++ ++ bl _vpaes_encrypt_core ++ ++ vmr v24, v0 # put aside iv ++ sub. r30, r30, r0 # len -= 16 ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 16 ++ bne Lcbc_enc_loop ++ ++ b Lcbc_done ++ ++.align 5 ++Lcbc_decrypt: ++ bl _vpaes_decrypt_preheat ++ li r0, 16 ++ ++Lcbc_dec_loop: ++ vmr v0, $inptail ++ lvx $inptail, 0, $inp ++ addi $inp, $inp, 16 ++ ?vperm v0, v0, $inptail, $inpperm ++ vmr v25, v0 # put aside input ++ ++ bl _vpaes_decrypt_core ++ ++ vxor v0, v0, v24 # ^= iv ++ vmr v24, v25 ++ sub. r30, r30, r0 # len -= 16 ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 16 ++ bne Lcbc_dec_loop ++ ++Lcbc_done: ++ addi $out, $out, -1 ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++ neg r8, r31 # write [potentially unaligned] iv ++ ?lvsl $outperm, 0, r8 ++ li r6, 15 ++ vnor $outmask, v7, v7 # 0xff..ff ++ ?vperm $outmask, v7, $outmask, $outperm ++ lvx $outhead, 0, r31 ++ vperm v24, v24, v24, $outperm # rotate right/left ++ vsel v0, $outhead, v24, $outmask ++ lvx v1, r6, r31 ++ stvx v0, 0, r31 ++ vsel v1, v24, v1, $outmask ++ stvx v1, r6, r31 ++ ++ mtspr 256, r7 # restore vrsave ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++Lcbc_abort: ++ $POP r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp) ++ $POP r30,`$FRAME+$SIZE_T*0`($sp) ++ $POP r31,`$FRAME+$SIZE_T*1`($sp) ++ mtlr r0 ++ addi $sp,$sp,`$FRAME+$SIZE_T*2` ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,2,6,0 ++ .long 0 ++.size .vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt ++___ ++} ++{ ++my ($inp,$bits,$out)=map("r$_",(3..5)); ++my $dir="cr1"; ++my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24)); ++ ++$code.=<<___; ++######################################################## ++## ## ++## AES key schedule ## ++## ## ++######################################################## ++.align 4 ++_vpaes_key_preheat: ++ mflr r8 ++ bl Lconsts ++ mtlr r8 ++ li r11, 0xc0 # Lk_inv ++ li r10, 0xd0 ++ li r9, 0xe0 # L_ipt ++ li r8, 0xf0 ++ ++ vspltisb v8,4 # 0x04..04 ++ vxor v9,v9,v9 # 0x00..00 ++ lvx $invlo, r12, r11 # Lk_inv ++ li r11, 0x120 ++ lvx $invhi, r12, r10 ++ li r10, 0x130 ++ lvx $iptlo, r12, r9 # Lk_ipt ++ li r9, 0x220 ++ lvx $ipthi, r12, r8 ++ li r8, 0x230 ++ ++ lvx v14, r12, r11 # Lk_sb1 ++ li r11, 0x240 ++ lvx v15, r12, r10 ++ li r10, 0x250 ++ ++ lvx v16, r12, r9 # Lk_dksd ++ li r9, 0x260 ++ lvx v17, r12, r8 ++ li r8, 0x270 ++ lvx v18, r12, r11 # Lk_dksb ++ li r11, 0x280 ++ lvx v19, r12, r10 ++ li r10, 0x290 ++ lvx v20, r12, r9 # Lk_dkse ++ li r9, 0x2a0 ++ lvx v21, r12, r8 ++ li r8, 0x2b0 ++ lvx v22, r12, r11 # Lk_dks9 ++ lvx v23, r12, r10 ++ ++ lvx v24, r12, r9 # Lk_rcon ++ lvx v25, 0, r12 # Lk_mc_forward[0] ++ lvx v26, r12, r8 # Lks63 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.align 4 ++_vpaes_schedule_core: ++ mflr r7 ++ ++ bl _vpaes_key_preheat # load the tables ++ ++ #lvx v0, 0, $inp # vmovdqu (%rdi), %xmm0 # load key (unaligned) ++ neg r8, $inp # prepare for unaligned access ++ lvx v0, 0, $inp ++ addi $inp, $inp, 15 # 15 is not typo ++ ?lvsr $inpperm, 0, r8 # -$inp ++ lvx v6, 0, $inp # v6 serves as inptail ++ addi $inp, $inp, 8 ++ ?vperm v0, v0, v6, $inpperm ++ ++ # input transform ++ vmr v3, v0 # vmovdqa %xmm0, %xmm3 ++ bl _vpaes_schedule_transform ++ vmr v7, v0 # vmovdqa %xmm0, %xmm7 ++ ++ bne $dir, Lschedule_am_decrypting ++ ++ # encrypting, output zeroth round key after transform ++ li r8, 0x30 # mov \$0x30,%r8d ++ addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10 ++ ++ ?lvsr $outperm, 0, $out # prepare for unaligned access ++ vnor $outmask, v9, v9 # 0xff..ff ++ lvx $outhead, 0, $out ++ ?vperm $outmask, v9, $outmask, $outperm ++ ++ #stvx v0, 0, $out # vmovdqu %xmm0, (%rdx) ++ vperm v1, v0, v0, $outperm # rotate right/left ++ vsel v2, $outhead, v1, $outmask ++ vmr $outhead, v1 ++ stvx v2, 0, $out ++ b Lschedule_go ++ ++Lschedule_am_decrypting: ++ srwi r8, $bits, 1 # shr \$1,%r8d ++ andi. r8, r8, 32 # and \$32,%r8d ++ xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32 ++ addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10 ++ # decrypting, output zeroth round key after shiftrows ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 ++ vperm v4, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ ++ neg r0, $out # prepare for unaligned access ++ ?lvsl $outperm, 0, r0 ++ addi $out, $out, 15 # 15 is not typo ++ vnor $outmask, v9, v9 # 0xff..ff ++ lvx $outhead, 0, $out ++ ?vperm $outmask, $outmask, v9, $outperm ++ ++ #stvx v4, 0, $out # vmovdqu %xmm3, (%rdx) ++ vperm v4, v4, v4, $outperm # rotate right/left ++ vsel v2, $outhead, v4, $outmask ++ vmr $outhead, v4 ++ stvx v2, 0, $out ++ xori r8, r8, 0x30 # xor \$0x30, %r8 ++ ++Lschedule_go: ++ cmplwi $bits, 192 # cmp \$192, %esi ++ bgt Lschedule_256 ++ beq Lschedule_192 ++ # 128: fall though ++ ++## ++## .schedule_128 ++## ++## 128-bit specific part of key schedule. ++## ++## This schedule is really simple, because all its parts ++## are accomplished by the subroutines. ++## ++Lschedule_128: ++ li r0, 10 # mov \$10, %esi ++ mtctr r0 ++ ++Loop_schedule_128: ++ bl _vpaes_schedule_round ++ bdz Lschedule_mangle_last # dec %esi ++ bl _vpaes_schedule_mangle # write output ++ b Loop_schedule_128 ++ ++## ++## .aes_schedule_192 ++## ++## 192-bit specific part of key schedule. ++## ++## The main body of this schedule is the same as the 128-bit ++## schedule, but with more smearing. The long, high side is ++## stored in %xmm7 as before, and the short, low side is in ++## the high bits of %xmm6. ++## ++## This schedule is somewhat nastier, however, because each ++## round produces 192 bits of key material, or 1.5 round keys. ++## Therefore, on each cycle we do 2 rounds and produce 3 round ++## keys. ++## ++.align 4 ++Lschedule_192: ++ li r0, 4 # mov \$4, %esi ++ lvx v0, 0, $inp ++ ?vperm v0, v6, v0, $inpperm ++ ?vsldoi v0, v3, v0, 8 # vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) ++ bl _vpaes_schedule_transform # input transform ++ ?vsldoi v6, v0, v9, 8 ++ ?vsldoi v6, v9, v6, 8 # clobber "low" side with zeros ++ mtctr r0 ++ ++Loop_schedule_192: ++ bl _vpaes_schedule_round ++ ?vsldoi v0, v6, v0, 8 # vpalignr \$8,%xmm6,%xmm0,%xmm0 ++ bl _vpaes_schedule_mangle # save key n ++ bl _vpaes_schedule_192_smear ++ bl _vpaes_schedule_mangle # save key n+1 ++ bl _vpaes_schedule_round ++ bdz Lschedule_mangle_last # dec %esi ++ bl _vpaes_schedule_mangle # save key n+2 ++ bl _vpaes_schedule_192_smear ++ b Loop_schedule_192 ++ ++## ++## .aes_schedule_256 ++## ++## 256-bit specific part of key schedule. ++## ++## The structure here is very similar to the 128-bit ++## schedule, but with an additional "low side" in ++## %xmm6. The low side's rounds are the same as the ++## high side's, except no rcon and no rotation. ++## ++.align 4 ++Lschedule_256: ++ li r0, 7 # mov \$7, %esi ++ addi $inp, $inp, 8 ++ lvx v0, 0, $inp # vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) ++ ?vperm v0, v6, v0, $inpperm ++ bl _vpaes_schedule_transform # input transform ++ mtctr r0 ++ ++Loop_schedule_256: ++ bl _vpaes_schedule_mangle # output low result ++ vmr v6, v0 # vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 ++ ++ # high round ++ bl _vpaes_schedule_round ++ bdz Lschedule_mangle_last # dec %esi ++ bl _vpaes_schedule_mangle ++ ++ # low round. swap xmm7 and xmm6 ++ ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0 ++ vmr v5, v7 # vmovdqa %xmm7, %xmm5 ++ vmr v7, v6 # vmovdqa %xmm6, %xmm7 ++ bl _vpaes_schedule_low_round ++ vmr v7, v5 # vmovdqa %xmm5, %xmm7 ++ ++ b Loop_schedule_256 ++## ++## .aes_schedule_mangle_last ++## ++## Mangler for last round of key schedule ++## Mangles %xmm0 ++## when encrypting, outputs out(%xmm0) ^ 63 ++## when decrypting, outputs unskew(%xmm0) ++## ++## Always called right before return... jumps to cleanup and exits ++## ++.align 4 ++Lschedule_mangle_last: ++ # schedule last round key from xmm0 ++ li r11, 0x2e0 # lea .Lk_deskew(%rip),%r11 ++ li r9, 0x2f0 ++ bne $dir, Lschedule_mangle_last_dec ++ ++ # encrypting ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10),%xmm1 ++ li r11, 0x2c0 # lea .Lk_opt(%rip), %r11 # prepare to output transform ++ li r9, 0x2d0 # prepare to output transform ++ vperm v0, v0, v0, v1 # vpshufb %xmm1, %xmm0, %xmm0 # output permute ++ ++ lvx $iptlo, r11, r12 # reload $ipt ++ lvx $ipthi, r9, r12 ++ addi $out, $out, 16 # add \$16, %rdx ++ vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0 ++ bl _vpaes_schedule_transform # output transform ++ ++ #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v2, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v2, 0, $out ++ ++ addi $out, $out, 15 # 15 is not typo ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ b Lschedule_mangle_done ++ ++.align 4 ++Lschedule_mangle_last_dec: ++ lvx $iptlo, r11, r12 # reload $ipt ++ lvx $ipthi, r9, r12 ++ addi $out, $out, -16 # add \$-16, %rdx ++ vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0 ++ bl _vpaes_schedule_transform # output transform ++ ++ #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v2, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v2, 0, $out ++ ++ addi $out, $out, -15 # -15 is not typo ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++Lschedule_mangle_done: ++ mtlr r7 ++ # cleanup ++ vxor v0, v0, v0 # vpxor %xmm0, %xmm0, %xmm0 ++ vxor v1, v1, v1 # vpxor %xmm1, %xmm1, %xmm1 ++ vxor v2, v2, v2 # vpxor %xmm2, %xmm2, %xmm2 ++ vxor v3, v3, v3 # vpxor %xmm3, %xmm3, %xmm3 ++ vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4 ++ vxor v5, v5, v5 # vpxor %xmm5, %xmm5, %xmm5 ++ vxor v6, v6, v6 # vpxor %xmm6, %xmm6, %xmm6 ++ vxor v7, v7, v7 # vpxor %xmm7, %xmm7, %xmm7 ++ ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_192_smear ++## ++## Smear the short, low side in the 192-bit key schedule. ++## ++## Inputs: ++## %xmm7: high side, b a x y ++## %xmm6: low side, d c 0 0 ++## %xmm13: 0 ++## ++## Outputs: ++## %xmm6: b+c+d b+c 0 0 ++## %xmm0: b+c+d b+c b a ++## ++.align 4 ++_vpaes_schedule_192_smear: ++ ?vspltw v0, v7, 3 ++ ?vsldoi v1, v9, v6, 12 # vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 ++ ?vsldoi v0, v7, v0, 8 # vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a ++ vxor v6, v6, v1 # vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 ++ vxor v6, v6, v0 # vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a ++ vmr v0, v6 ++ ?vsldoi v6, v6, v9, 8 ++ ?vsldoi v6, v9, v6, 8 # clobber low side with zeros ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_round ++## ++## Runs one main round of the key schedule on %xmm0, %xmm7 ++## ++## Specifically, runs subbytes on the high dword of %xmm0 ++## then rotates it by one byte and xors into the low dword of ++## %xmm7. ++## ++## Adds rcon from low byte of %xmm8, then rotates %xmm8 for ++## next rcon. ++## ++## Smears the dwords of %xmm7 by xoring the low into the ++## second low, result into third, result into highest. ++## ++## Returns results in %xmm7 = %xmm0. ++## Clobbers %xmm1-%xmm4, %r11. ++## ++.align 4 ++_vpaes_schedule_round: ++ # extract rcon from xmm8 ++ #vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4 ++ ?vsldoi v1, $rcon, v9, 15 # vpalignr \$15, %xmm8, %xmm4, %xmm1 ++ ?vsldoi $rcon, $rcon, $rcon, 15 # vpalignr \$15, %xmm8, %xmm8, %xmm8 ++ vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7 ++ ++ # rotate ++ ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0 ++ ?vsldoi v0, v0, v0, 1 # vpalignr \$1, %xmm0, %xmm0, %xmm0 ++ ++ # fall through... ++ ++ # low round: same as high round, but no rotation and no rcon. ++_vpaes_schedule_low_round: ++ # smear xmm7 ++ ?vsldoi v1, v9, v7, 12 # vpslldq \$4, %xmm7, %xmm1 ++ vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7 ++ vspltisb v1, 0x0f # 0x0f..0f ++ ?vsldoi v4, v9, v7, 8 # vpslldq \$8, %xmm7, %xmm4 ++ ++ # subbytes ++ vand v1, v1, v0 # vpand %xmm9, %xmm0, %xmm1 # 0 = k ++ vsrb v0, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ vxor v7, v7, v4 # vpxor %xmm4, %xmm7, %xmm7 ++ vperm v2, $invhi, v9, v1 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ vxor v1, v1, v0 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vperm v3, $invlo, v9, v0 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ vperm v4, $invlo, v9, v1 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vxor v7, v7, v26 # vpxor .Lk_s63(%rip), %xmm7, %xmm7 ++ vperm v3, $invlo, v9, v3 # vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak ++ vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vperm v2, $invlo, v9, v4 # vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak ++ vxor v3, v3, v1 # vpxor %xmm1, %xmm3, %xmm3 # 2 = io ++ vxor v2, v2, v0 # vpxor %xmm0, %xmm2, %xmm2 # 3 = jo ++ vperm v4, v15, v9, v3 # vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou ++ vperm v1, v14, v9, v2 # vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t ++ vxor v1, v1, v4 # vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output ++ ++ # add in smeared stuff ++ vxor v0, v1, v7 # vpxor %xmm7, %xmm1, %xmm0 ++ vxor v7, v1, v7 # vmovdqa %xmm0, %xmm7 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_transform ++## ++## Linear-transform %xmm0 according to tables at (%r11) ++## ++## Requires that %xmm9 = 0x0F0F... as in preheat ++## Output in %xmm0 ++## Clobbers %xmm2 ++## ++.align 4 ++_vpaes_schedule_transform: ++ #vand v1, v0, v9 # vpand %xmm9, %xmm0, %xmm1 ++ vsrb v2, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 ++ # vmovdqa (%r11), %xmm2 # lo ++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2 ++ # vmovdqa 16(%r11), %xmm1 # hi ++ vperm v2, $ipthi, $ipthi, v2 # vpshufb %xmm0, %xmm1, %xmm0 ++ vxor v0, v0, v2 # vpxor %xmm2, %xmm0, %xmm0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_mangle ++## ++## Mangle xmm0 from (basis-transformed) standard version ++## to our version. ++## ++## On encrypt, ++## xor with 0x63 ++## multiply by circulant 0,1,1,1 ++## apply shiftrows transform ++## ++## On decrypt, ++## xor with 0x63 ++## multiply by "inverse mixcolumns" circulant E,B,D,9 ++## deskew ++## apply shiftrows transform ++## ++## ++## Writes out to (%rdx), and increments or decrements it ++## Keeps track of round number mod 4 in %r8 ++## Preserves xmm0 ++## Clobbers xmm1-xmm5 ++## ++.align 4 ++_vpaes_schedule_mangle: ++ #vmr v4, v0 # vmovdqa %xmm0, %xmm4 # save xmm0 for later ++ # vmovdqa .Lk_mc_forward(%rip),%xmm5 ++ bne $dir, Lschedule_mangle_dec ++ ++ # encrypting ++ vxor v4, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm4 ++ addi $out, $out, 16 # add \$16, %rdx ++ vperm v4, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm4 ++ vperm v1, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm1 ++ vperm v3, v1, v1, v25 # vpshufb %xmm5, %xmm1, %xmm3 ++ vxor v4, v4, v1 # vpxor %xmm1, %xmm4, %xmm4 ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 ++ vxor v3, v3, v4 # vpxor %xmm4, %xmm3, %xmm3 ++ ++ vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ addi r8, r8, -16 # add \$-16, %r8 ++ andi. r8, r8, 0x30 # and \$0x30, %r8 ++ ++ #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx) ++ vperm v1, v3, v3, $outperm # rotate right/left ++ vsel v2, $outhead, v1, $outmask ++ vmr $outhead, v1 ++ stvx v2, 0, $out ++ blr ++ ++.align 4 ++Lschedule_mangle_dec: ++ # inverse mix columns ++ # lea .Lk_dksd(%rip),%r11 ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm4, %xmm1 # 1 = hi ++ #and v4, v0, v9 # vpand %xmm9, %xmm4, %xmm4 # 4 = lo ++ ++ # vmovdqa 0x00(%r11), %xmm2 ++ vperm v2, v16, v16, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ # vmovdqa 0x10(%r11), %xmm3 ++ vperm v3, v17, v17, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 ++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 ++ ++ # vmovdqa 0x20(%r11), %xmm2 ++ vperm v2, v18, v18, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 ++ # vmovdqa 0x30(%r11), %xmm3 ++ vperm v3, v19, v19, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 ++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 ++ ++ # vmovdqa 0x40(%r11), %xmm2 ++ vperm v2, v20, v20, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 ++ # vmovdqa 0x50(%r11), %xmm3 ++ vperm v3, v21, v21, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 ++ ++ # vmovdqa 0x60(%r11), %xmm2 ++ vperm v2, v22, v22, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 ++ # vmovdqa 0x70(%r11), %xmm4 ++ vperm v4, v23, v23, v1 # vpshufb %xmm1, %xmm4, %xmm4 ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 ++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 ++ vxor v3, v4, v2 # vpxor %xmm2, %xmm4, %xmm3 ++ ++ addi $out, $out, -16 # add \$-16, %rdx ++ ++ vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ addi r8, r8, -16 # add \$-16, %r8 ++ andi. r8, r8, 0x30 # and \$0x30, %r8 ++ ++ #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx) ++ vperm v1, v3, v3, $outperm # rotate right/left ++ vsel v2, $outhead, v1, $outmask ++ vmr $outhead, v1 ++ stvx v2, 0, $out ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.globl .vpaes_set_encrypt_key ++.align 5 ++.vpaes_set_encrypt_key: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r0 ++ mfspr r6, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r6,`$FRAME-4`($sp) # save vrsave ++ li r7, -1 ++ $PUSH r0, `$FRAME+$LRSAVE`($sp) ++ mtspr 256, r7 # preserve all AltiVec registers ++ ++ srwi r9, $bits, 5 # shr \$5,%eax ++ addi r9, r9, 6 # add \$5,%eax ++ stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ ++ cmplw $dir, $bits, $bits # set encrypt direction ++ li r8, 0x30 # mov \$0x30,%r8d ++ bl _vpaes_schedule_core ++ ++ $POP r0, `$FRAME+$LRSAVE`($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtspr 256, r6 # restore vrsave ++ mtlr r0 ++ xor r3, r3, r3 ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key ++ ++.globl .vpaes_set_decrypt_key ++.align 4 ++.vpaes_set_decrypt_key: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r0 ++ mfspr r6, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r6,`$FRAME-4`($sp) # save vrsave ++ li r7, -1 ++ $PUSH r0, `$FRAME+$LRSAVE`($sp) ++ mtspr 256, r7 # preserve all AltiVec registers ++ ++ srwi r9, $bits, 5 # shr \$5,%eax ++ addi r9, r9, 6 # add \$5,%eax ++ stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ ++ slwi r9, r9, 4 # shl \$4,%eax ++ add $out, $out, r9 # lea (%rdx,%rax),%rdx ++ ++ cmplwi $dir, $bits, 0 # set decrypt direction ++ srwi r8, $bits, 1 # shr \$1,%r8d ++ andi. r8, r8, 32 # and \$32,%r8d ++ xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32 ++ bl _vpaes_schedule_core ++ ++ $POP r0, `$FRAME+$LRSAVE`($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtspr 256, r6 # restore vrsave ++ mtlr r0 ++ xor r3, r3, r3 ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key ++___ ++} ++ ++my $consts=1; ++foreach (split("\n",$code)) { ++ s/\`([^\`]*)\`/eval $1/geo; ++ ++ # constants table endian-specific conversion ++ if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) { ++ my $conv=$2; ++ my @bytes=(); ++ ++ # convert to endian-agnostic format ++ foreach (split(/,\s+/,$1)) { ++ my $l = /^0/?oct:int; ++ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; ++ } ++ ++ # little-endian conversion ++ if ($flavour =~ /le$/o) { ++ SWITCH: for($conv) { ++ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; ++ /\?rev/ && do { @bytes=reverse(@bytes); last; }; ++ } ++ } ++ ++ #emit ++ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; ++ next; ++ } ++ $consts=0 if (m/Lconsts:/o); # end of table ++ ++ # instructions prefixed with '?' are endian-specific and need ++ # to be adjusted accordingly... ++ if ($flavour =~ /le$/o) { # little-endian ++ s/\?lvsr/lvsl/o or ++ s/\?lvsl/lvsr/o or ++ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or ++ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or ++ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; ++ } else { # big-endian ++ s/\?([a-z]+)/$1/o; ++ } ++ ++ print $_,"\n"; ++} ++ ++close STDOUT; +diff -up openssl-1.0.1i/crypto/aes/Makefile.ppc-asm openssl-1.0.1i/crypto/aes/Makefile +--- openssl-1.0.1i/crypto/aes/Makefile.ppc-asm 2014-08-06 23:18:31.000000000 +0200 ++++ openssl-1.0.1i/crypto/aes/Makefile 2014-08-13 19:46:21.092578104 +0200 +@@ -71,6 +71,10 @@ aes-sparcv9.s: asm/aes-sparcv9.pl + + aes-ppc.s: asm/aes-ppc.pl + $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@ ++vpaes-ppc.s: asm/vpaes-ppc.pl ++ $(PERL) asm/vpaes-ppc.pl $(PERLASM_SCHEME) $@ ++aesp8-ppc.s: asm/aesp8-ppc.pl ++ $(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@ + + aes-parisc.s: asm/aes-parisc.pl + $(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@ +diff -up openssl-1.0.1i/crypto/bn/asm/ppc-mont.pl.ppc-asm openssl-1.0.1i/crypto/bn/asm/ppc-mont.pl +--- openssl-1.0.1i/crypto/bn/asm/ppc-mont.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/bn/asm/ppc-mont.pl 2014-08-13 19:46:21.093578128 +0200 +@@ -325,6 +325,7 @@ Lcopy: ; copy or in-place refresh + .long 0 + .byte 0,12,4,0,0x80,12,6,0 + .long 0 ++.size .bn_mul_mont_int,.-.bn_mul_mont_int + + .asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by " + ___ +diff -up openssl-1.0.1i/crypto/bn/asm/ppc.pl.ppc-asm openssl-1.0.1i/crypto/bn/asm/ppc.pl +--- openssl-1.0.1i/crypto/bn/asm/ppc.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/bn/asm/ppc.pl 2014-08-13 19:46:21.094578151 +0200 +@@ -392,6 +392,7 @@ $data=< for the OpenSSL ++# Written by Andy Polyakov for the OpenSSL + # project. The module is, however, dual licensed under OpenSSL and + # CRYPTOGAMS licenses depending on where you obtain it. For further + # details see http://www.openssl.org/~appro/cryptogams/. +@@ -65,6 +65,14 @@ + # others alternative would be to break dependence on upper halves of + # GPRs by sticking to 32-bit integer operations... + ++# December 2012 ++ ++# Remove above mentioned dependence on GPRs' upper halves in 32-bit ++# build. No signal masking overhead, but integer instructions are ++# *more* numerous... It's still "universally" faster than 32-bit ++# ppc-mont.pl, but improvement coefficient is not as impressive ++# for longer keys... ++ + $flavour = shift; + + if ($flavour =~ /32/) { +@@ -110,6 +118,9 @@ $tp="r10"; + $j="r11"; + $i="r12"; + # non-volatile registers ++$c1="r19"; ++$n1="r20"; ++$a1="r21"; + $nap_d="r22"; # interleaved ap and np in double format + $a0="r23"; # ap[0] + $t0="r24"; # temporary registers +@@ -180,8 +191,8 @@ $T3a="f30"; $T3b="f31"; + # . . + # +-------------------------------+ + # . . +-# -12*size_t +-------------------------------+ +-# | 10 saved gpr, r22-r31 | ++# -13*size_t +-------------------------------+ ++# | 13 saved gpr, r19-r31 | + # . . + # . . + # -12*8 +-------------------------------+ +@@ -215,6 +226,9 @@ $code=<<___; + mr $i,$sp + $STUX $sp,$sp,$tp ; alloca + ++ $PUSH r19,`-12*8-13*$SIZE_T`($i) ++ $PUSH r20,`-12*8-12*$SIZE_T`($i) ++ $PUSH r21,`-12*8-11*$SIZE_T`($i) + $PUSH r22,`-12*8-10*$SIZE_T`($i) + $PUSH r23,`-12*8-9*$SIZE_T`($i) + $PUSH r24,`-12*8-8*$SIZE_T`($i) +@@ -237,40 +251,26 @@ $code=<<___; + stfd f29,`-3*8`($i) + stfd f30,`-2*8`($i) + stfd f31,`-1*8`($i) +-___ +-$code.=<<___ if ($SIZE_T==8); +- ld $a0,0($ap) ; pull ap[0] value +- ld $n0,0($n0) ; pull n0[0] value +- ld $t3,0($bp) ; bp[0] +-___ +-$code.=<<___ if ($SIZE_T==4); +- mr $t1,$n0 +- lwz $a0,0($ap) ; pull ap[0,1] value +- lwz $t0,4($ap) +- lwz $n0,0($t1) ; pull n0[0,1] value +- lwz $t1,4($t1) +- lwz $t3,0($bp) ; bp[0,1] +- lwz $t2,4($bp) +- insrdi $a0,$t0,32,0 +- insrdi $n0,$t1,32,0 +- insrdi $t3,$t2,32,0 +-___ +-$code.=<<___; ++ + addi $tp,$sp,`$FRAME+$TRANSFER+8+64` + li $i,-64 + add $nap_d,$tp,$num + and $nap_d,$nap_d,$i ; align to 64 bytes +- +- mulld $t7,$a0,$t3 ; ap[0]*bp[0] + ; nap_d is off by 1, because it's used with stfdu/lfdu + addi $nap_d,$nap_d,-8 + srwi $j,$num,`3+1` ; counter register, num/2 +- mulld $t7,$t7,$n0 ; tp[0]*n0 + addi $j,$j,-1 + addi $tp,$sp,`$FRAME+$TRANSFER-8` + li $carry,0 + mtctr $j ++___ ++ ++$code.=<<___ if ($SIZE_T==8); ++ ld $a0,0($ap) ; pull ap[0] value ++ ld $t3,0($bp) ; bp[0] ++ ld $n0,0($n0) ; pull n0[0] value + ++ mulld $t7,$a0,$t3 ; ap[0]*bp[0] + ; transfer bp[0] to FPU as 4x16-bit values + extrdi $t0,$t3,16,48 + extrdi $t1,$t3,16,32 +@@ -280,6 +280,8 @@ $code.=<<___; + std $t1,`$FRAME+8`($sp) + std $t2,`$FRAME+16`($sp) + std $t3,`$FRAME+24`($sp) ++ ++ mulld $t7,$t7,$n0 ; tp[0]*n0 + ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values + extrdi $t4,$t7,16,48 + extrdi $t5,$t7,16,32 +@@ -289,21 +291,61 @@ $code.=<<___; + std $t5,`$FRAME+40`($sp) + std $t6,`$FRAME+48`($sp) + std $t7,`$FRAME+56`($sp) +-___ +-$code.=<<___ if ($SIZE_T==8); +- lwz $t0,4($ap) ; load a[j] as 32-bit word pair +- lwz $t1,0($ap) +- lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair ++ ++ extrdi $t0,$a0,32,32 ; lwz $t0,4($ap) ++ extrdi $t1,$a0,32,0 ; lwz $t1,0($ap) ++ lwz $t2,12($ap) ; load a[1] as 32-bit word pair + lwz $t3,8($ap) +- lwz $t4,4($np) ; load n[j] as 32-bit word pair ++ lwz $t4,4($np) ; load n[0] as 32-bit word pair + lwz $t5,0($np) +- lwz $t6,12($np) ; load n[j+1] as 32-bit word pair ++ lwz $t6,12($np) ; load n[1] as 32-bit word pair + lwz $t7,8($np) + ___ + $code.=<<___ if ($SIZE_T==4); +- lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs +- lwz $t1,4($ap) +- lwz $t2,8($ap) ++ lwz $a0,0($ap) ; pull ap[0,1] value ++ mr $n1,$n0 ++ lwz $a1,4($ap) ++ li $c1,0 ++ lwz $t1,0($bp) ; bp[0,1] ++ lwz $t3,4($bp) ++ lwz $n0,0($n1) ; pull n0[0,1] value ++ lwz $n1,4($n1) ++ ++ mullw $t4,$a0,$t1 ; mulld ap[0]*bp[0] ++ mulhwu $t5,$a0,$t1 ++ mullw $t6,$a1,$t1 ++ mullw $t7,$a0,$t3 ++ add $t5,$t5,$t6 ++ add $t5,$t5,$t7 ++ ; transfer bp[0] to FPU as 4x16-bit values ++ extrwi $t0,$t1,16,16 ++ extrwi $t1,$t1,16,0 ++ extrwi $t2,$t3,16,16 ++ extrwi $t3,$t3,16,0 ++ std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build ++ std $t1,`$FRAME+8`($sp) ++ std $t2,`$FRAME+16`($sp) ++ std $t3,`$FRAME+24`($sp) ++ ++ mullw $t0,$t4,$n0 ; mulld tp[0]*n0 ++ mulhwu $t1,$t4,$n0 ++ mullw $t2,$t5,$n0 ++ mullw $t3,$t4,$n1 ++ add $t1,$t1,$t2 ++ add $t1,$t1,$t3 ++ ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values ++ extrwi $t4,$t0,16,16 ++ extrwi $t5,$t0,16,0 ++ extrwi $t6,$t1,16,16 ++ extrwi $t7,$t1,16,0 ++ std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build ++ std $t5,`$FRAME+40`($sp) ++ std $t6,`$FRAME+48`($sp) ++ std $t7,`$FRAME+56`($sp) ++ ++ mr $t0,$a0 ; lwz $t0,0($ap) ++ mr $t1,$a1 ; lwz $t1,4($ap) ++ lwz $t2,8($ap) ; load a[j..j+3] as 32-bit word pairs + lwz $t3,12($ap) + lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs + lwz $t5,4($np) +@@ -319,7 +361,7 @@ $code.=<<___; + lfd $nb,`$FRAME+40`($sp) + lfd $nc,`$FRAME+48`($sp) + lfd $nd,`$FRAME+56`($sp) +- std $t0,`$FRAME+64`($sp) ++ std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build + std $t1,`$FRAME+72`($sp) + std $t2,`$FRAME+80`($sp) + std $t3,`$FRAME+88`($sp) +@@ -441,7 +483,7 @@ $code.=<<___ if ($SIZE_T==4); + lwz $t7,12($np) + ___ + $code.=<<___; +- std $t0,`$FRAME+64`($sp) ++ std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build + std $t1,`$FRAME+72`($sp) + std $t2,`$FRAME+80`($sp) + std $t3,`$FRAME+88`($sp) +@@ -449,6 +491,9 @@ $code.=<<___; + std $t5,`$FRAME+104`($sp) + std $t6,`$FRAME+112`($sp) + std $t7,`$FRAME+120`($sp) ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + ld $t0,`$FRAME+0`($sp) + ld $t1,`$FRAME+8`($sp) + ld $t2,`$FRAME+16`($sp) +@@ -457,6 +502,20 @@ $code.=<<___; + ld $t5,`$FRAME+40`($sp) + ld $t6,`$FRAME+48`($sp) + ld $t7,`$FRAME+56`($sp) ++___ ++} else { ++$code.=<<___; ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++___ ++} ++$code.=<<___; + lfd $A0,`$FRAME+64`($sp) + lfd $A1,`$FRAME+72`($sp) + lfd $A2,`$FRAME+80`($sp) +@@ -488,7 +547,9 @@ $code.=<<___; + fmadd $T0b,$A0,$bb,$dotb + stfd $A2,24($nap_d) ; save a[j+1] in double format + stfd $A3,32($nap_d) +- ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + fmadd $T1a,$A0,$bc,$T1a + fmadd $T1b,$A0,$bd,$T1b + fmadd $T2a,$A1,$bc,$T2a +@@ -561,11 +622,123 @@ $code.=<<___; + stfd $T3b,`$FRAME+56`($sp) + std $t0,8($tp) ; tp[j-1] + stdu $t4,16($tp) ; tp[j] ++___ ++} else { ++$code.=<<___; ++ fmadd $T1a,$A0,$bc,$T1a ++ fmadd $T1b,$A0,$bd,$T1b ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ fmadd $T2a,$A1,$bc,$T2a ++ fmadd $T2b,$A1,$bd,$T2b ++ stfd $N0,40($nap_d) ; save n[j] in double format ++ stfd $N1,48($nap_d) ++ srwi $c1,$t1,16 ++ insrwi $carry,$t1,16,0 ++ fmadd $T3a,$A2,$bc,$T3a ++ fmadd $T3b,$A2,$bd,$T3b ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ fmul $dota,$A3,$bc ++ fmul $dotb,$A3,$bd ++ stfd $N2,56($nap_d) ; save n[j+1] in double format ++ stfdu $N3,64($nap_d) ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ ++ fmadd $T1a,$N1,$na,$T1a ++ fmadd $T1b,$N1,$nb,$T1b ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ fmadd $T2a,$N2,$na,$T2a ++ fmadd $T2b,$N2,$nb,$T2b ++ srwi $c1,$t5,16 ++ insrwi $carry,$t5,16,0 ++ fmadd $T3a,$N3,$na,$T3a ++ fmadd $T3b,$N3,$nb,$T3b ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ fmadd $T0a,$N0,$na,$T0a ++ fmadd $T0b,$N0,$nb,$T0b ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ srwi $c1,$t7,16 ++ insrwi $carry,$t7,16,0 ++ ++ fmadd $T1a,$N0,$nc,$T1a ++ fmadd $T1b,$N0,$nd,$T1b ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ fmadd $T2a,$N1,$nc,$T2a ++ fmadd $T2b,$N1,$nd,$T2b ++ stw $t0,12($tp) ; tp[j-1] ++ stw $t4,8($tp) ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ fmadd $T3a,$N2,$nc,$T3a ++ fmadd $T3b,$N2,$nd,$T3b ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ fmadd $dota,$N3,$nc,$dota ++ fmadd $dotb,$N3,$nd,$dotb ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ srwi $c1,$t7,16 ++ insrwi $carry,$t7,16,0 ++ ++ fctid $T0a,$T0a ++ fctid $T0b,$T0b ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ fctid $T1a,$T1a ++ fctid $T1b,$T1b ++ srwi $c1,$t1,16 ++ insrwi $carry,$t1,16,0 ++ fctid $T2a,$T2a ++ fctid $T2b,$T2b ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ fctid $T3a,$T3a ++ fctid $T3b,$T3b ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ srwi $c1,$t5,16 ++ insrwi $carry,$t5,16,0 ++ ++ stfd $T0a,`$FRAME+0`($sp) ++ stfd $T0b,`$FRAME+8`($sp) ++ stfd $T1a,`$FRAME+16`($sp) ++ stfd $T1b,`$FRAME+24`($sp) ++ stfd $T2a,`$FRAME+32`($sp) ++ stfd $T2b,`$FRAME+40`($sp) ++ stfd $T3a,`$FRAME+48`($sp) ++ stfd $T3b,`$FRAME+56`($sp) ++ stw $t2,20($tp) ; tp[j] ++ stwu $t0,16($tp) ++___ ++} ++$code.=<<___; + bdnz- L1st + + fctid $dota,$dota + fctid $dotb,$dotb +- ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + ld $t0,`$FRAME+0`($sp) + ld $t1,`$FRAME+8`($sp) + ld $t2,`$FRAME+16`($sp) +@@ -611,33 +784,117 @@ $code.=<<___; + insrdi $t6,$t7,48,0 + srdi $ovf,$t7,48 + std $t6,8($tp) ; tp[num-1] ++___ ++} else { ++$code.=<<___; ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++ stfd $dota,`$FRAME+64`($sp) ++ stfd $dotb,`$FRAME+72`($sp) + ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ stw $t0,12($tp) ; tp[j-1] ++ stw $t4,8($tp) ++ ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ stw $t2,20($tp) ; tp[j] ++ stwu $t0,16($tp) ++ ++ lwz $t7,`$FRAME+64`($sp) ++ lwz $t6,`$FRAME+68`($sp) ++ lwz $t5,`$FRAME+72`($sp) ++ lwz $t4,`$FRAME+76`($sp) ++ ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ ++ insrwi $t6,$t4,16,0 ++ srwi $t4,$t4,16 ++ insrwi $t4,$t5,16,0 ++ srwi $ovf,$t5,16 ++ stw $t6,12($tp) ; tp[num-1] ++ stw $t4,8($tp) ++___ ++} ++$code.=<<___; + slwi $t7,$num,2 + subf $nap_d,$t7,$nap_d ; rewind pointer + + li $i,8 ; i=1 + .align 5 + Louter: +-___ +-$code.=<<___ if ($SIZE_T==8); +- ldx $t3,$bp,$i ; bp[i] +-___ +-$code.=<<___ if ($SIZE_T==4); +- add $t0,$bp,$i +- lwz $t3,0($t0) ; bp[i,i+1] +- lwz $t0,4($t0) +- insrdi $t3,$t0,32,0 +-___ +-$code.=<<___; +- ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] +- mulld $t7,$a0,$t3 ; ap[0]*bp[i] +- + addi $tp,$sp,`$FRAME+$TRANSFER` +- add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0] + li $carry,0 +- mulld $t7,$t7,$n0 ; tp[0]*n0 + mtctr $j ++___ ++$code.=<<___ if ($SIZE_T==8); ++ ldx $t3,$bp,$i ; bp[i] + ++ ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] ++ mulld $t7,$a0,$t3 ; ap[0]*bp[i] ++ add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0] + ; transfer bp[i] to FPU as 4x16-bit values + extrdi $t0,$t3,16,48 + extrdi $t1,$t3,16,32 +@@ -647,6 +904,8 @@ $code.=<<___; + std $t1,`$FRAME+8`($sp) + std $t2,`$FRAME+16`($sp) + std $t3,`$FRAME+24`($sp) ++ ++ mulld $t7,$t7,$n0 ; tp[0]*n0 + ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values + extrdi $t4,$t7,16,48 + extrdi $t5,$t7,16,32 +@@ -656,7 +915,50 @@ $code.=<<___; + std $t5,`$FRAME+40`($sp) + std $t6,`$FRAME+48`($sp) + std $t7,`$FRAME+56`($sp) ++___ ++$code.=<<___ if ($SIZE_T==4); ++ add $t0,$bp,$i ++ li $c1,0 ++ lwz $t1,0($t0) ; bp[i,i+1] ++ lwz $t3,4($t0) ++ ++ mullw $t4,$a0,$t1 ; ap[0]*bp[i] ++ lwz $t0,`$FRAME+$TRANSFER+8+4`($sp) ; tp[0] ++ mulhwu $t5,$a0,$t1 ++ lwz $t2,`$FRAME+$TRANSFER+8`($sp) ; tp[0] ++ mullw $t6,$a1,$t1 ++ mullw $t7,$a0,$t3 ++ add $t5,$t5,$t6 ++ add $t5,$t5,$t7 ++ addc $t4,$t4,$t0 ; ap[0]*bp[i]+tp[0] ++ adde $t5,$t5,$t2 ++ ; transfer bp[i] to FPU as 4x16-bit values ++ extrwi $t0,$t1,16,16 ++ extrwi $t1,$t1,16,0 ++ extrwi $t2,$t3,16,16 ++ extrwi $t3,$t3,16,0 ++ std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build ++ std $t1,`$FRAME+8`($sp) ++ std $t2,`$FRAME+16`($sp) ++ std $t3,`$FRAME+24`($sp) + ++ mullw $t0,$t4,$n0 ; mulld tp[0]*n0 ++ mulhwu $t1,$t4,$n0 ++ mullw $t2,$t5,$n0 ++ mullw $t3,$t4,$n1 ++ add $t1,$t1,$t2 ++ add $t1,$t1,$t3 ++ ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values ++ extrwi $t4,$t0,16,16 ++ extrwi $t5,$t0,16,0 ++ extrwi $t6,$t1,16,16 ++ extrwi $t7,$t1,16,0 ++ std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build ++ std $t5,`$FRAME+40`($sp) ++ std $t6,`$FRAME+48`($sp) ++ std $t7,`$FRAME+56`($sp) ++___ ++$code.=<<___; + lfd $A0,8($nap_d) ; load a[j] in double format + lfd $A1,16($nap_d) + lfd $A2,24($nap_d) ; load a[j+1] in double format +@@ -769,7 +1071,9 @@ Linner: + fmul $dotb,$A3,$bd + lfd $A2,24($nap_d) ; load a[j+1] in double format + lfd $A3,32($nap_d) +- ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + fmadd $T1a,$N1,$na,$T1a + fmadd $T1b,$N1,$nb,$T1b + ld $t0,`$FRAME+0`($sp) +@@ -856,10 +1160,131 @@ $code.=<<___; + addze $carry,$carry + std $t3,-16($tp) ; tp[j-1] + std $t5,-8($tp) ; tp[j] ++___ ++} else { ++$code.=<<___; ++ fmadd $T1a,$N1,$na,$T1a ++ fmadd $T1b,$N1,$nb,$T1b ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ fmadd $T2a,$N2,$na,$T2a ++ fmadd $T2b,$N2,$nb,$T2b ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ fmadd $T3a,$N3,$na,$T3a ++ fmadd $T3b,$N3,$nb,$T3b ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ fmadd $T0a,$N0,$na,$T0a ++ fmadd $T0b,$N0,$nb,$T0b ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++ srwi $c1,$t1,16 ++ insrwi $carry,$t1,16,0 ++ ++ fmadd $T1a,$N0,$nc,$T1a ++ fmadd $T1b,$N0,$nd,$T1b ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ fmadd $T2a,$N1,$nc,$T2a ++ fmadd $T2b,$N1,$nd,$T2b ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ fmadd $T3a,$N2,$nc,$T3a ++ fmadd $T3b,$N2,$nd,$T3b ++ lwz $t2,12($tp) ; tp[j] ++ lwz $t3,8($tp) ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ fmadd $dota,$N3,$nc,$dota ++ fmadd $dotb,$N3,$nd,$dotb ++ srwi $c1,$t5,16 ++ insrwi $carry,$t5,16,0 ++ ++ fctid $T0a,$T0a ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ fctid $T0b,$T0b ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ srwi $c1,$t7,16 ++ insrwi $carry,$t7,16,0 ++ fctid $T1a,$T1a ++ addc $t0,$t0,$t2 ++ adde $t4,$t4,$t3 ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ fctid $T1b,$T1b ++ addze $carry,$carry ++ addze $c1,$c1 ++ stw $t0,4($tp) ; tp[j-1] ++ stw $t4,0($tp) ++ fctid $T2a,$T2a ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ fctid $T2b,$T2b ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ fctid $T3a,$T3a ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ fctid $T3b,$T3b ++ ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ lwz $t6,20($tp) ++ lwzu $t7,16($tp) ++ addc $t0,$t0,$carry ++ stfd $T0a,`$FRAME+0`($sp) ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ stfd $T0b,`$FRAME+8`($sp) ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t4,$t4,$carry ++ stfd $T1a,`$FRAME+16`($sp) ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ stfd $T1b,`$FRAME+24`($sp) ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ ++ addc $t2,$t2,$t6 ++ stfd $T2a,`$FRAME+32`($sp) ++ adde $t0,$t0,$t7 ++ stfd $T2b,`$FRAME+40`($sp) ++ addze $carry,$carry ++ stfd $T3a,`$FRAME+48`($sp) ++ addze $c1,$c1 ++ stfd $T3b,`$FRAME+56`($sp) ++ stw $t2,-4($tp) ; tp[j] ++ stw $t0,-8($tp) ++___ ++} ++$code.=<<___; + bdnz- Linner + + fctid $dota,$dota + fctid $dotb,$dotb ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + ld $t0,`$FRAME+0`($sp) + ld $t1,`$FRAME+8`($sp) + ld $t2,`$FRAME+16`($sp) +@@ -926,7 +1351,116 @@ $code.=<<___; + insrdi $t6,$t7,48,0 + srdi $ovf,$t7,48 + std $t6,0($tp) ; tp[num-1] ++___ ++} else { ++$code.=<<___; ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++ stfd $dota,`$FRAME+64`($sp) ++ stfd $dotb,`$FRAME+72`($sp) + ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ lwz $t2,12($tp) ; tp[j] ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ lwz $t3,8($tp) ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ ++ addc $t0,$t0,$t2 ++ adde $t4,$t4,$t3 ++ addze $carry,$carry ++ addze $c1,$c1 ++ stw $t0,4($tp) ; tp[j-1] ++ stw $t4,0($tp) ++ ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ lwz $t6,20($tp) ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ lwzu $t7,16($tp) ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ ++ addc $t2,$t2,$t6 ++ adde $t0,$t0,$t7 ++ lwz $t7,`$FRAME+64`($sp) ++ lwz $t6,`$FRAME+68`($sp) ++ addze $carry,$carry ++ addze $c1,$c1 ++ lwz $t5,`$FRAME+72`($sp) ++ lwz $t4,`$FRAME+76`($sp) ++ ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ stw $t2,-4($tp) ; tp[j] ++ stw $t0,-8($tp) ++ addc $t6,$t6,$ovf ++ addze $t7,$t7 ++ srwi $carry,$t6,16 ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ ++ insrwi $t6,$t4,16,0 ++ srwi $t4,$t4,16 ++ insrwi $t4,$t5,16,0 ++ srwi $ovf,$t5,16 ++ stw $t6,4($tp) ; tp[num-1] ++ stw $t4,0($tp) ++___ ++} ++$code.=<<___; + slwi $t7,$num,2 + addi $i,$i,8 + subf $nap_d,$t7,$nap_d ; rewind pointer +@@ -994,14 +1528,14 @@ $code.=<<___ if ($SIZE_T==4); + mtctr $j + + .align 4 +-Lsub: ld $t0,8($tp) ; load tp[j..j+3] in 64-bit word order +- ldu $t2,16($tp) ++Lsub: lwz $t0,12($tp) ; load tp[j..j+3] in 64-bit word order ++ lwz $t1,8($tp) ++ lwz $t2,20($tp) ++ lwzu $t3,16($tp) + lwz $t4,4($np) ; load np[j..j+3] in 32-bit word order + lwz $t5,8($np) + lwz $t6,12($np) + lwzu $t7,16($np) +- extrdi $t1,$t0,32,0 +- extrdi $t3,$t2,32,0 + subfe $t4,$t4,$t0 ; tp[j]-np[j] + stw $t0,4($ap) ; save tp[j..j+3] in 32-bit word order + subfe $t5,$t5,$t1 ; tp[j+1]-np[j+1] +@@ -1052,6 +1586,9 @@ ___ + $code.=<<___; + $POP $i,0($sp) + li r3,1 ; signal "handled" ++ $POP r19,`-12*8-13*$SIZE_T`($i) ++ $POP r20,`-12*8-12*$SIZE_T`($i) ++ $POP r21,`-12*8-11*$SIZE_T`($i) + $POP r22,`-12*8-10*$SIZE_T`($i) + $POP r23,`-12*8-9*$SIZE_T`($i) + $POP r24,`-12*8-8*$SIZE_T`($i) +@@ -1077,8 +1614,9 @@ $code.=<<___; + mr $sp,$i + blr + .long 0 +- .byte 0,12,4,0,0x8c,10,6,0 ++ .byte 0,12,4,0,0x8c,13,6,0 + .long 0 ++.size .$fname,.-.$fname + + .asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by " + ___ +diff -up openssl-1.0.1i/crypto/evp/e_aes.c.ppc-asm openssl-1.0.1i/crypto/evp/e_aes.c +--- openssl-1.0.1i/crypto/evp/e_aes.c.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/evp/e_aes.c 2014-08-13 19:46:21.094578151 +0200 +@@ -153,6 +153,20 @@ void AES_xts_decrypt(const char *inp,cha + const unsigned char iv[16]); + #endif + ++#if defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) ++# include "ppc_arch.h" ++# ifdef VPAES_ASM ++# define VPAES_CAPABLE (OPENSSL_ppccap_P & PPC_ALTIVEC) ++# endif ++# define HWAES_CAPABLE (OPENSSL_ppccap_P & PPC_CRYPTO207) ++# define HWAES_set_encrypt_key aes_p8_set_encrypt_key ++# define HWAES_set_decrypt_key aes_p8_set_decrypt_key ++# define HWAES_encrypt aes_p8_encrypt ++# define HWAES_decrypt aes_p8_decrypt ++# define HWAES_cbc_encrypt aes_p8_cbc_encrypt ++# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks ++#endif ++ + #if defined(AES_ASM) && !defined(I386_ONLY) && ( \ + ((defined(__i386) || defined(__i386__) || \ + defined(_M_IX86)) && defined(OPENSSL_IA32_SSE2))|| \ +diff -up openssl-1.0.1i/crypto/modes/asm/ghashp8-ppc.pl.ppc-asm openssl-1.0.1i/crypto/modes/asm/ghashp8-ppc.pl +--- openssl-1.0.1i/crypto/modes/asm/ghashp8-ppc.pl.ppc-asm 2014-08-13 19:46:21.095578174 +0200 ++++ openssl-1.0.1i/crypto/modes/asm/ghashp8-ppc.pl 2014-08-13 19:46:21.095578174 +0200 +@@ -0,0 +1,234 @@ ++#!/usr/bin/env perl ++# ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# GHASH for for PowerISA v2.07. ++# ++# July 2014 ++# ++# Accurate performance measurements are problematic, because it's ++# always virtualized setup with possibly throttled processor. ++# Relative comparison is therefore more informative. This initial ++# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x ++# faster than "4-bit" integer-only compiler-generated 64-bit code. ++# "Initial version" means that there is room for futher improvement. ++ ++$flavour=shift; ++$output =shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T=8; ++ $LRSAVE=2*$SIZE_T; ++ $STU="stdu"; ++ $POP="ld"; ++ $PUSH="std"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T=4; ++ $LRSAVE=$SIZE_T; ++ $STU="stwu"; ++ $POP="lwz"; ++ $PUSH="stw"; ++} else { die "nonsense $flavour"; } ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; ++ ++my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block ++ ++my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); ++my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); ++my $vrsave="r12"; ++ ++$code=<<___; ++.machine "any" ++ ++.text ++ ++.globl .gcm_init_p8 ++.align 5 ++.gcm_init_p8: ++ lis r0,0xfff0 ++ li r8,0x10 ++ mfspr $vrsave,256 ++ li r9,0x20 ++ mtspr 256,r0 ++ li r10,0x30 ++ lvx_u $H,0,r4 # load H ++ ++ vspltisb $xC2,-16 # 0xf0 ++ vspltisb $t0,1 # one ++ vaddubm $xC2,$xC2,$xC2 # 0xe0 ++ vxor $zero,$zero,$zero ++ vor $xC2,$xC2,$t0 # 0xe1 ++ vsldoi $xC2,$xC2,$zero,15 # 0xe1... ++ vsldoi $t1,$zero,$t0,1 # ...1 ++ vaddubm $xC2,$xC2,$xC2 # 0xc2... ++ vspltisb $t2,7 ++ vor $xC2,$xC2,$t1 # 0xc2....01 ++ vspltb $t1,$H,0 # most significant byte ++ vsl $H,$H,$t0 # H<<=1 ++ vsrab $t1,$t1,$t2 # broadcast carry bit ++ vand $t1,$t1,$xC2 ++ vxor $H,$H,$t1 # twisted H ++ ++ vsldoi $H,$H,$H,8 # twist even more ... ++ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 ++ vsldoi $Hl,$zero,$H,8 # ... and split ++ vsldoi $Hh,$H,$zero,8 ++ ++ stvx_u $xC2,0,r3 # save pre-computed table ++ stvx_u $Hl,r8,r3 ++ stvx_u $H, r9,r3 ++ stvx_u $Hh,r10,r3 ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,2,0 ++ .long 0 ++.size .gcm_init_p8,.-.gcm_init_p8 ++ ++.globl .gcm_gmult_p8 ++.align 5 ++.gcm_gmult_p8: ++ lis r0,0xfff8 ++ li r8,0x10 ++ mfspr $vrsave,256 ++ li r9,0x20 ++ mtspr 256,r0 ++ li r10,0x30 ++ lvx_u $IN,0,$Xip # load Xi ++ ++ lvx_u $Hl,r8,$Htbl # load pre-computed table ++ le?lvsl $lemask,r0,r0 ++ lvx_u $H, r9,$Htbl ++ le?vspltisb $t0,0x07 ++ lvx_u $Hh,r10,$Htbl ++ le?vxor $lemask,$lemask,$t0 ++ lvx_u $xC2,0,$Htbl ++ le?vperm $IN,$IN,$IN,$lemask ++ vxor $zero,$zero,$zero ++ ++ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo ++ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi ++ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi ++ ++ vpmsumd $t2,$Xl,$xC2 # 1st phase ++ ++ vsldoi $t0,$Xm,$zero,8 ++ vsldoi $t1,$zero,$Xm,8 ++ vxor $Xl,$Xl,$t0 ++ vxor $Xh,$Xh,$t1 ++ ++ vsldoi $Xl,$Xl,$Xl,8 ++ vxor $Xl,$Xl,$t2 ++ ++ vsldoi $t1,$Xl,$Xl,8 # 2nd phase ++ vpmsumd $Xl,$Xl,$xC2 ++ vxor $t1,$t1,$Xh ++ vxor $Xl,$Xl,$t1 ++ ++ le?vperm $Xl,$Xl,$Xl,$lemask ++ stvx_u $Xl,0,$Xip # write out Xi ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,2,0 ++ .long 0 ++.size .gcm_gmult_p8,.-.gcm_gmult_p8 ++ ++.globl .gcm_ghash_p8 ++.align 5 ++.gcm_ghash_p8: ++ lis r0,0xfff8 ++ li r8,0x10 ++ mfspr $vrsave,256 ++ li r9,0x20 ++ mtspr 256,r0 ++ li r10,0x30 ++ lvx_u $Xl,0,$Xip # load Xi ++ ++ lvx_u $Hl,r8,$Htbl # load pre-computed table ++ le?lvsl $lemask,r0,r0 ++ lvx_u $H, r9,$Htbl ++ le?vspltisb $t0,0x07 ++ lvx_u $Hh,r10,$Htbl ++ le?vxor $lemask,$lemask,$t0 ++ lvx_u $xC2,0,$Htbl ++ le?vperm $Xl,$Xl,$Xl,$lemask ++ vxor $zero,$zero,$zero ++ ++ lvx_u $IN,0,$inp ++ addi $inp,$inp,16 ++ subi $len,$len,16 ++ le?vperm $IN,$IN,$IN,$lemask ++ vxor $IN,$IN,$Xl ++ b Loop ++ ++.align 5 ++Loop: ++ subic $len,$len,16 ++ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo ++ subfe. r0,r0,r0 # borrow?-1:0 ++ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi ++ and r0,r0,$len ++ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi ++ add $inp,$inp,r0 ++ ++ vpmsumd $t2,$Xl,$xC2 # 1st phase ++ ++ vsldoi $t0,$Xm,$zero,8 ++ vsldoi $t1,$zero,$Xm,8 ++ vxor $Xl,$Xl,$t0 ++ vxor $Xh,$Xh,$t1 ++ ++ vsldoi $Xl,$Xl,$Xl,8 ++ vxor $Xl,$Xl,$t2 ++ lvx_u $IN,0,$inp ++ addi $inp,$inp,16 ++ ++ vsldoi $t1,$Xl,$Xl,8 # 2nd phase ++ vpmsumd $Xl,$Xl,$xC2 ++ le?vperm $IN,$IN,$IN,$lemask ++ vxor $t1,$t1,$Xh ++ vxor $IN,$IN,$t1 ++ vxor $IN,$IN,$Xl ++ beq Loop # did $len-=16 borrow? ++ ++ vxor $Xl,$Xl,$t1 ++ le?vperm $Xl,$Xl,$Xl,$lemask ++ stvx_u $Xl,0,$Xip # write out Xi ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,4,0 ++ .long 0 ++.size .gcm_ghash_p8,.-.gcm_ghash_p8 ++ ++.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by " ++.align 2 ++___ ++ ++foreach (split("\n",$code)) { ++ if ($flavour =~ /le$/o) { # little-endian ++ s/le\?//o or ++ s/be\?/#be#/o; ++ } else { ++ s/le\?/#le#/o or ++ s/be\?//o; ++ } ++ print $_,"\n"; ++} ++ ++close STDOUT; # enforce flush +diff -up openssl-1.0.1i/crypto/modes/gcm128.c.ppc-asm openssl-1.0.1i/crypto/modes/gcm128.c +--- openssl-1.0.1i/crypto/modes/gcm128.c.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/modes/gcm128.c 2014-08-13 19:46:21.095578174 +0200 +@@ -671,6 +671,21 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const + void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); + void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); + # endif ++# elif defined(__sparc__) || defined(__sparc) ++# include "sparc_arch.h" ++# define GHASH_ASM_SPARC ++# define GCM_FUNCREF_4BIT ++extern unsigned int OPENSSL_sparcv9cap_P[]; ++void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]); ++void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]); ++void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); ++#elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) ++# include "ppc_arch.h" ++# define GHASH_ASM_PPC ++# define GCM_FUNCREF_4BIT ++void gcm_init_p8(u128 Htable[16],const u64 Xi[2]); ++void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]); ++void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); + # endif + #endif + +@@ -745,6 +760,16 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT * + } else { + gcm_init_4bit(ctx->Htable,ctx->H.u); + ctx->gmult = gcm_gmult_4bit; ++ ctx->ghash = gcm_ghash_4bit; ++ } ++# elif defined(GHASH_ASM_PPC) ++ if (OPENSSL_ppccap_P & PPC_CRYPTO207) { ++ gcm_init_p8(ctx->Htable,ctx->H.u); ++ ctx->gmult = gcm_gmult_p8; ++ ctx->ghash = gcm_ghash_p8; ++ } else { ++ gcm_init_4bit(ctx->Htable,ctx->H.u); ++ ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; + } + # else +diff -up openssl-1.0.1i/crypto/modes/Makefile.ppc-asm openssl-1.0.1i/crypto/modes/Makefile +--- openssl-1.0.1i/crypto/modes/Makefile.ppc-asm 2014-08-13 19:48:28.435511100 +0200 ++++ openssl-1.0.1i/crypto/modes/Makefile 2014-08-13 19:48:04.641963082 +0200 +@@ -59,6 +59,8 @@ ghash-alpha.s: asm/ghash-alpha.pl + + ghash-parisc.s: asm/ghash-parisc.pl + $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@ ++ghashp8-ppc.s: asm/ghashp8-ppc.pl ++ $(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@ + + # GNU make "catch all" + ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ +diff -up openssl-1.0.1i/crypto/perlasm/ppc-xlate.pl.ppc-asm openssl-1.0.1i/crypto/perlasm/ppc-xlate.pl +--- openssl-1.0.1i/crypto/perlasm/ppc-xlate.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/perlasm/ppc-xlate.pl 2014-08-13 19:46:21.095578174 +0200 +@@ -27,7 +27,8 @@ my $globl = sub { + /osx/ && do { $name = "_$name"; + last; + }; +- /linux.*32/ && do { $ret .= ".globl $name\n"; ++ /linux.*(32|64le)/ ++ && do { $ret .= ".globl $name\n"; + $ret .= ".type $name,\@function"; + last; + }; +@@ -37,7 +38,6 @@ my $globl = sub { + $ret .= ".align 3\n"; + $ret .= "$name:\n"; + $ret .= ".quad .$name,.TOC.\@tocbase,0\n"; +- $ret .= ".size $name,24\n"; + $ret .= ".previous\n"; + + $name = ".$name"; +@@ -50,7 +50,9 @@ my $globl = sub { + $ret; + }; + my $text = sub { +- ($flavour =~ /aix/) ? ".csect" : ".text"; ++ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; ++ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); ++ $ret; + }; + my $machine = sub { + my $junk = shift; +@@ -62,9 +64,12 @@ my $machine = sub { + ".machine $arch"; + }; + my $size = sub { +- if ($flavour =~ /linux.*32/) ++ if ($flavour =~ /linux/) + { shift; +- ".size " . join(",",@_); ++ my $name = shift; $name =~ s|^[\.\_]||; ++ my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name; ++ $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/); ++ $ret; + } + else + { ""; } +@@ -77,6 +82,25 @@ my $asciz = sub { + else + { ""; } + }; ++my $quad = sub { ++ shift; ++ my @ret; ++ my ($hi,$lo); ++ for (@_) { ++ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) ++ { $hi=$1?"0x$1":"0"; $lo="0x$2"; } ++ elsif (/^([0-9]+)$/o) ++ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl ++ else ++ { $hi=undef; $lo=$_; } ++ ++ if (defined($hi)) ++ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } ++ else ++ { push(@ret,".quad $lo"); } ++ } ++ join("\n",@ret); ++}; + + ################################################################ + # simplified mnemonics not handled by at least one assembler +@@ -122,6 +146,46 @@ my $extrdi = sub { + $b = ($b+$n)&63; $n = 64-$n; + " rldicl $ra,$rs,$b,$n"; + }; ++my $vmr = sub { ++ my ($f,$vx,$vy) = @_; ++ " vor $vx,$vy,$vy"; ++}; ++ ++# PowerISA 2.06 stuff ++sub vsxmem_op { ++ my ($f, $vrt, $ra, $rb, $op) = @_; ++ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); ++} ++# made-up unaligned memory reference AltiVec/VMX instructions ++my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x ++my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x ++my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx ++my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx ++my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x ++my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x ++ ++# PowerISA 2.07 stuff ++sub vcrypto_op { ++ my ($f, $vrt, $vra, $vrb, $op) = @_; ++ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; ++} ++my $vcipher = sub { vcrypto_op(@_, 1288); }; ++my $vcipherlast = sub { vcrypto_op(@_, 1289); }; ++my $vncipher = sub { vcrypto_op(@_, 1352); }; ++my $vncipherlast= sub { vcrypto_op(@_, 1353); }; ++my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; ++my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; ++my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; ++my $vpmsumb = sub { vcrypto_op(@_, 1032); }; ++my $vpmsumd = sub { vcrypto_op(@_, 1224); }; ++my $vpmsubh = sub { vcrypto_op(@_, 1096); }; ++my $vpmsumw = sub { vcrypto_op(@_, 1160); }; ++my $vaddudm = sub { vcrypto_op(@_, 192); }; ++ ++my $mtsle = sub { ++ my ($f, $arg) = @_; ++ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); ++}; + + while($line=<>) { + +@@ -138,7 +202,10 @@ while($line=<>) { + { + $line =~ s|(^[\.\w]+)\:\s*||; + my $label = $1; +- printf "%s:",($GLOBALS{$label} or $label) if ($label); ++ if ($label) { ++ printf "%s:",($GLOBALS{$label} or $label); ++ printf "\n.localentry\t$GLOBALS{$label},0" if ($GLOBALS{$label} && $flavour =~ /linux.*64le/); ++ } + } + + { +@@ -147,7 +214,7 @@ while($line=<>) { + my $mnemonic = $2; + my $f = $3; + my $opcode = eval("\$$mnemonic"); +- $line =~ s|\bc?[rf]([0-9]+)\b|$1|g if ($c ne "." and $flavour !~ /osx/); ++ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); + if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } + elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } + } +diff -up openssl-1.0.1i/crypto/ppc_arch.h.ppc-asm openssl-1.0.1i/crypto/ppc_arch.h +--- openssl-1.0.1i/crypto/ppc_arch.h.ppc-asm 2014-08-13 19:46:21.095578174 +0200 ++++ openssl-1.0.1i/crypto/ppc_arch.h 2014-08-13 19:46:21.095578174 +0200 +@@ -0,0 +1,10 @@ ++#ifndef __PPC_ARCH_H__ ++#define __PPC_ARCH_H__ ++ ++extern unsigned int OPENSSL_ppccap_P; ++ ++#define PPC_FPU64 (1<<0) ++#define PPC_ALTIVEC (1<<1) ++#define PPC_CRYPTO207 (1<<2) ++ ++#endif +diff -up openssl-1.0.1i/crypto/ppccap.c.ppc-asm openssl-1.0.1i/crypto/ppccap.c +--- openssl-1.0.1i/crypto/ppccap.c.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/ppccap.c 2014-08-13 19:46:21.095578174 +0200 +@@ -4,13 +4,15 @@ + #include + #include + #include ++#if defined(__linux) || defined(_AIX) ++#include ++#endif + #include + #include + +-#define PPC_FPU64 (1<<0) +-#define PPC_ALTIVEC (1<<1) ++#include "ppc_arch.h" + +-static int OPENSSL_ppccap_P = 0; ++unsigned int OPENSSL_ppccap_P = 0; + + static sigset_t all_masked; + +@@ -22,7 +24,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_U + + if (sizeof(size_t)==4) + { +-#if (defined(__APPLE__) && defined(__MACH__)) ++#if 1 || (defined(__APPLE__) && defined(__MACH__)) + if (num>=8 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64)) + return bn_mul_mont_fpu64(rp,ap,bp,np,n0,num); + #else +@@ -50,11 +52,28 @@ int bn_mul_mont(BN_ULONG *rp, const BN_U + } + #endif + ++void sha256_block_p8(void *ctx,const void *inp,size_t len); ++void sha256_block_ppc(void *ctx,const void *inp,size_t len); ++void sha256_block_data_order(void *ctx,const void *inp,size_t len) ++ { ++ OPENSSL_ppccap_P&PPC_CRYPTO207? sha256_block_p8(ctx,inp,len): ++ sha256_block_ppc(ctx,inp,len); ++ } ++ ++void sha512_block_p8(void *ctx,const void *inp,size_t len); ++void sha512_block_ppc(void *ctx,const void *inp,size_t len); ++void sha512_block_data_order(void *ctx,const void *inp,size_t len) ++ { ++ OPENSSL_ppccap_P&PPC_CRYPTO207? sha512_block_p8(ctx,inp,len): ++ sha512_block_ppc(ctx,inp,len); ++ } ++ + static sigjmp_buf ill_jmp; + static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } + + void OPENSSL_ppc64_probe(void); + void OPENSSL_altivec_probe(void); ++void OPENSSL_crypto207_probe(void); + + void OPENSSL_cpuid_setup(void) + { +@@ -85,12 +104,14 @@ void OPENSSL_cpuid_setup(void) + OPENSSL_ppccap_P = 0; + + #if defined(_AIX) +- if (sizeof(size_t)==4 ++ if (sizeof(size_t)==4) ++ { ++ struct utsname uts; + # if defined(_SC_AIX_KERNEL_BITMODE) +- && sysconf(_SC_AIX_KERNEL_BITMODE)!=64 ++ if (sysconf(_SC_AIX_KERNEL_BITMODE)!=64) return; + # endif +- ) +- return; ++ if (uname(&uts)!=0 || atoi(uts.version)<6) return; ++ } + #endif + + memset(&ill_act,0,sizeof(ill_act)); +@@ -102,6 +123,10 @@ void OPENSSL_cpuid_setup(void) + + if (sizeof(size_t)==4) + { ++#ifdef __linux ++ struct utsname uts; ++ if (uname(&uts)==0 && strcmp(uts.machine,"ppc64")==0) ++#endif + if (sigsetjmp(ill_jmp,1) == 0) + { + OPENSSL_ppc64_probe(); +@@ -119,6 +144,11 @@ void OPENSSL_cpuid_setup(void) + { + OPENSSL_altivec_probe(); + OPENSSL_ppccap_P |= PPC_ALTIVEC; ++ if (sigsetjmp(ill_jmp,1) == 0) ++ { ++ OPENSSL_crypto207_probe(); ++ OPENSSL_ppccap_P |= PPC_CRYPTO207; ++ } + } + + sigaction (SIGILL,&ill_oact,NULL); +diff -up openssl-1.0.1i/crypto/ppccpuid.pl.ppc-asm openssl-1.0.1i/crypto/ppccpuid.pl +--- openssl-1.0.1i/crypto/ppccpuid.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/ppccpuid.pl 2014-08-13 19:46:21.096578196 +0200 +@@ -31,6 +31,7 @@ $code=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_ppc64_probe,.-.OPENSSL_ppc64_probe + + .globl .OPENSSL_altivec_probe + .align 4 +@@ -39,6 +40,17 @@ $code=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_altivec_probe,.-..OPENSSL_altivec_probe ++ ++.globl .OPENSSL_crypto207_probe ++.align 4 ++.OPENSSL_crypto207_probe: ++ lvx_u v0,0,r1 ++ vcipher v0,v0,v0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_crypto207_probe,.-.OPENSSL_crypto207_probe + + .globl .OPENSSL_wipe_cpu + .align 4 +@@ -71,6 +83,7 @@ $code=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_wipe_cpu,.-.OPENSSL_wipe_cpu + + .globl .OPENSSL_atomic_add + .align 4 +@@ -84,6 +97,7 @@ Ladd: lwarx r5,0,r3 + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 ++.size .OPENSSL_atomic_add,.-.OPENSSL_atomic_add + + .globl .OPENSSL_rdtsc + .align 4 +@@ -93,6 +107,7 @@ Ladd: lwarx r5,0,r3 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_rdtsc,.-.OPENSSL_rdtsc + + .globl .OPENSSL_cleanse + .align 4 +@@ -125,7 +140,99 @@ Laligned: + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 ++.size .OPENSSL_cleanse,.-.OPENSSL_cleanse ++___ ++{ ++my ($out,$cnt,$max)=("r3","r4","r5"); ++my ($tick,$lasttick)=("r6","r7"); ++my ($diff,$lastdiff)=("r8","r9"); ++ ++$code.=<<___; ++.globl .OPENSSL_instrument_bus ++.align 4 ++.OPENSSL_instrument_bus: ++ mtctr $cnt ++ ++ mftb $lasttick # collect 1st tick ++ li $diff,0 ++ ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ ++Loop: mftb $tick ++ sub $diff,$tick,$lasttick ++ mr $lasttick,$tick ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ addi $out,$out,4 # ++$out ++ bdnz Loop ++ ++ mr r3,$cnt ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,2,0 ++ .long 0 ++.size .OPENSSL_instrument_bus,.-.OPENSSL_instrument_bus ++ ++.globl .OPENSSL_instrument_bus2 ++.align 4 ++.OPENSSL_instrument_bus2: ++ mr r0,$cnt ++ slwi $cnt,$cnt,2 ++ ++ mftb $lasttick # collect 1st tick ++ li $diff,0 ++ ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ ++ mftb $tick # collect 1st diff ++ sub $diff,$tick,$lasttick ++ mr $lasttick,$tick ++ mr $lastdiff,$diff ++Loop2: ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ ++ addic. $max,$max,-1 ++ beq Ldone2 ++ ++ mftb $tick ++ sub $diff,$tick,$lasttick ++ mr $lasttick,$tick ++ cmplw 7,$diff,$lastdiff ++ mr $lastdiff,$diff ++ ++ mfcr $tick # pull cr ++ not $tick,$tick # flip bits ++ rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale ++ ++ sub. $cnt,$cnt,$tick # conditional --$cnt ++ add $out,$out,$tick # conditional ++$out ++ bne Loop2 ++ ++Ldone2: ++ srwi $cnt,$cnt,2 ++ sub r3,r0,$cnt ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,3,0 ++ .long 0 ++.size .OPENSSL_instrument_bus2,.-.OPENSSL_instrument_bus2 + ___ ++} + + $code =~ s/\`([^\`]*)\`/eval $1/gem; + print $code; +diff -up openssl-1.0.1i/crypto/sha/asm/sha1-ppc.pl.ppc-asm openssl-1.0.1i/crypto/sha/asm/sha1-ppc.pl +--- openssl-1.0.1i/crypto/sha/asm/sha1-ppc.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/sha/asm/sha1-ppc.pl 2014-08-13 19:46:21.096578196 +0200 +@@ -9,8 +9,7 @@ + + # I let hardware handle unaligned input(*), except on page boundaries + # (see below for details). Otherwise straightforward implementation +-# with X vector in register bank. The module is big-endian [which is +-# not big deal as there're no little-endian targets left around]. ++# with X vector in register bank. + # + # (*) this means that this module is inappropriate for PPC403? Does + # anybody know if pre-POWER3 can sustain unaligned load? +@@ -38,6 +37,10 @@ if ($flavour =~ /64/) { + $PUSH ="stw"; + } else { die "nonsense $flavour"; } + ++# Define endianess based on flavour ++# i.e.: linux64le ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +@@ -68,14 +71,28 @@ $T ="r12"; + @X=("r16","r17","r18","r19","r20","r21","r22","r23", + "r24","r25","r26","r27","r28","r29","r30","r31"); + ++sub loadbe { ++my ($dst, $src, $temp_reg) = @_; ++$code.=<<___ if (!$LITTLE_ENDIAN); ++ lwz $dst,$src ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $temp_reg,$src ++ rotlwi $dst,$temp_reg,8 ++ rlwimi $dst,$temp_reg,24,0,7 ++ rlwimi $dst,$temp_reg,24,16,23 ++___ ++} ++ + sub BODY_00_19 { + my ($i,$a,$b,$c,$d,$e,$f)=@_; + my $j=$i+1; +-$code.=<<___ if ($i==0); +- lwz @X[$i],`$i*4`($inp) +-___ ++ ++ # Since the last value of $f is discarded, we can use ++ # it as a temp reg to swap byte-order when needed. ++ loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0); ++ loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15); + $code.=<<___ if ($i<15); +- lwz @X[$j],`$j*4`($inp) + add $f,$K,$e + rotlwi $e,$a,5 + add $f,$f,@X[$i] +@@ -108,31 +125,31 @@ my ($i,$a,$b,$c,$d,$e,$f)=@_; + my $j=$i+1; + $code.=<<___ if ($i<79); + add $f,$K,$e ++ xor $t0,$b,$d + rotlwi $e,$a,5 + xor @X[$j%16],@X[$j%16],@X[($j+2)%16] + add $f,$f,@X[$i%16] +- xor $t0,$b,$c ++ xor $t0,$t0,$c + xor @X[$j%16],@X[$j%16],@X[($j+8)%16] +- add $f,$f,$e ++ add $f,$f,$t0 + rotlwi $b,$b,30 +- xor $t0,$t0,$d + xor @X[$j%16],@X[$j%16],@X[($j+13)%16] +- add $f,$f,$t0 ++ add $f,$f,$e + rotlwi @X[$j%16],@X[$j%16],1 + ___ + $code.=<<___ if ($i==79); + add $f,$K,$e ++ xor $t0,$b,$d + rotlwi $e,$a,5 + lwz r16,0($ctx) + add $f,$f,@X[$i%16] +- xor $t0,$b,$c ++ xor $t0,$t0,$c + lwz r17,4($ctx) +- add $f,$f,$e ++ add $f,$f,$t0 + rotlwi $b,$b,30 + lwz r18,8($ctx) +- xor $t0,$t0,$d + lwz r19,12($ctx) +- add $f,$f,$t0 ++ add $f,$f,$e + lwz r20,16($ctx) + ___ + } +@@ -316,6 +333,7 @@ $code.=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .sha1_block_data_order,.-.sha1_block_data_order + ___ + $code.=<<___; + .asciz "SHA1 block transform for PPC, CRYPTOGAMS by " +diff -up openssl-1.0.1i/crypto/sha/asm/sha512-ppc.pl.ppc-asm openssl-1.0.1i/crypto/sha/asm/sha512-ppc.pl +--- openssl-1.0.1i/crypto/sha/asm/sha512-ppc.pl.ppc-asm 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/crypto/sha/asm/sha512-ppc.pl 2014-08-13 19:46:21.096578196 +0200 +@@ -1,7 +1,7 @@ + #!/usr/bin/env perl + + # ==================================================================== +-# Written by Andy Polyakov for the OpenSSL ++# Written by Andy Polyakov for the OpenSSL + # project. The module is, however, dual licensed under OpenSSL and + # CRYPTOGAMS licenses depending on where you obtain it. For further + # details see http://www.openssl.org/~appro/cryptogams/. +@@ -9,8 +9,7 @@ + + # I let hardware handle unaligned input, except on page boundaries + # (see below for details). Otherwise straightforward implementation +-# with X vector in register bank. The module is big-endian [which is +-# not big deal as there're no little-endian targets left around]. ++# with X vector in register bank. + + # sha256 | sha512 + # -m64 -m32 | -m64 -m32 +@@ -56,6 +55,8 @@ if ($flavour =~ /64/) { + $PUSH="stw"; + } else { die "nonsense $flavour"; } + ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +@@ -64,7 +65,7 @@ die "can't locate ppc-xlate.pl"; + open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + + if ($output =~ /512/) { +- $func="sha512_block_data_order"; ++ $func="sha512_block_ppc"; + $SZ=8; + @Sigma0=(28,34,39); + @Sigma1=(14,18,41); +@@ -76,7 +77,7 @@ if ($output =~ /512/) { + $ROR="rotrdi"; + $SHR="srdi"; + } else { +- $func="sha256_block_data_order"; ++ $func="sha256_block_ppc"; + $SZ=4; + @Sigma0=( 2,13,22); + @Sigma1=( 6,11,25); +@@ -110,7 +111,7 @@ $B ="r9"; + $C ="r10"; + $D ="r11"; + $E ="r12"; +-$F ="r13"; $F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer ++$F =$t1; $t1 = "r0"; # stay away from "r13"; + $G ="r14"; + $H ="r15"; + +@@ -118,24 +119,23 @@ $H ="r15"; + @X=("r16","r17","r18","r19","r20","r21","r22","r23", + "r24","r25","r26","r27","r28","r29","r30","r31"); + +-$inp="r31"; # reassigned $inp! aliases with @X[15] ++$inp="r31" if($SZ==4 || $SIZE_T==8); # reassigned $inp! aliases with @X[15] + + sub ROUND_00_15 { + my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; + $code.=<<___; +- $LD $T,`$i*$SZ`($Tbl) + $ROR $a0,$e,$Sigma1[0] + $ROR $a1,$e,$Sigma1[1] + and $t0,$f,$e +- andc $t1,$g,$e +- add $T,$T,$h + xor $a0,$a0,$a1 ++ add $h,$h,$t1 ++ andc $t1,$g,$e + $ROR $a1,$a1,`$Sigma1[2]-$Sigma1[1]` + or $t0,$t0,$t1 ; Ch(e,f,g) +- add $T,$T,@X[$i] ++ add $h,$h,@X[$i%16] + xor $a0,$a0,$a1 ; Sigma1(e) +- add $T,$T,$t0 +- add $T,$T,$a0 ++ add $h,$h,$t0 ++ add $h,$h,$a0 + + $ROR $a0,$a,$Sigma0[0] + $ROR $a1,$a,$Sigma0[1] +@@ -146,9 +146,14 @@ $code.=<<___; + xor $t0,$t0,$t1 + and $t1,$b,$c + xor $a0,$a0,$a1 ; Sigma0(a) +- add $d,$d,$T ++ add $d,$d,$h + xor $t0,$t0,$t1 ; Maj(a,b,c) +- add $h,$T,$a0 ++___ ++$code.=<<___ if ($i<15); ++ $LD $t1,`($i+1)*$SZ`($Tbl) ++___ ++$code.=<<___; ++ add $h,$h,$a0 + add $h,$h,$t0 + + ___ +@@ -169,10 +174,11 @@ $code.=<<___; + add @X[$i],@X[$i],@X[($i+9)%16] + xor $a0,$a0,$a1 ; sigma0(X[(i+1)&0x0f]) + xor $t0,$t0,$t1 ; sigma1(X[(i+14)&0x0f]) ++ $LD $t1,`$i*$SZ`($Tbl) + add @X[$i],@X[$i],$a0 + add @X[$i],@X[$i],$t0 + ___ +-&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h); ++&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); + } + + $code=<<___; +@@ -188,8 +194,6 @@ $func: + + $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp) + +- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) +- $PUSH r13,`$FRAME-$SIZE_T*19`($sp) + $PUSH r14,`$FRAME-$SIZE_T*18`($sp) + $PUSH r15,`$FRAME-$SIZE_T*17`($sp) + $PUSH r16,`$FRAME-$SIZE_T*16`($sp) +@@ -209,7 +213,10 @@ $func: + $PUSH r30,`$FRAME-$SIZE_T*2`($sp) + $PUSH r31,`$FRAME-$SIZE_T*1`($sp) + $PUSH r0,`$FRAME+$LRSAVE`($sp) ++___ + ++if ($SZ==4 || $SIZE_T==8) { ++$code.=<<___; + $LD $A,`0*$SZ`($ctx) + mr $inp,r4 ; incarnate $inp + $LD $B,`1*$SZ`($ctx) +@@ -219,7 +226,16 @@ $func: + $LD $F,`5*$SZ`($ctx) + $LD $G,`6*$SZ`($ctx) + $LD $H,`7*$SZ`($ctx) ++___ ++} else { ++ for ($i=16;$i<32;$i++) { ++ $code.=<<___; ++ lwz r$i,`$LITTLE_ENDIAN^(4*($i-16))`($ctx) ++___ ++ } ++} + ++$code.=<<___; + bl LPICmeup + LPICedup: + andi. r0,$inp,3 +@@ -255,6 +271,9 @@ Lunaligned: + Lcross_page: + li $t1,`16*$SZ/4` + mtctr $t1 ++___ ++if ($SZ==4 || $SIZE_T==8) { ++$code.=<<___; + addi r20,$sp,$LOCALS ; aligned spot below the frame + Lmemcpy: + lbz r16,0($inp) +@@ -268,7 +287,26 @@ Lmemcpy: + stb r19,3(r20) + addi r20,r20,4 + bdnz Lmemcpy ++___ ++} else { ++$code.=<<___; ++ addi r12,$sp,$LOCALS ; aligned spot below the frame ++Lmemcpy: ++ lbz r8,0($inp) ++ lbz r9,1($inp) ++ lbz r10,2($inp) ++ lbz r11,3($inp) ++ addi $inp,$inp,4 ++ stb r8,0(r12) ++ stb r9,1(r12) ++ stb r10,2(r12) ++ stb r11,3(r12) ++ addi r12,r12,4 ++ bdnz Lmemcpy ++___ ++} + ++$code.=<<___; + $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp + addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer + addi $inp,$sp,$LOCALS ; fictitious inp pointer +@@ -283,8 +321,6 @@ Lmemcpy: + + Ldone: + $POP r0,`$FRAME+$LRSAVE`($sp) +- $POP $toc,`$FRAME-$SIZE_T*20`($sp) +- $POP r13,`$FRAME-$SIZE_T*19`($sp) + $POP r14,`$FRAME-$SIZE_T*18`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) +@@ -309,27 +345,48 @@ Ldone: + .long 0 + .byte 0,12,4,1,0x80,18,3,0 + .long 0 ++___ + ++if ($SZ==4 || $SIZE_T==8) { ++$code.=<<___; + .align 4 + Lsha2_block_private: ++ $LD $t1,0($Tbl) + ___ + for($i=0;$i<16;$i++) { +-$code.=<<___ if ($SZ==4); ++$code.=<<___ if ($SZ==4 && !$LITTLE_ENDIAN); + lwz @X[$i],`$i*$SZ`($inp) + ___ ++$code.=<<___ if ($SZ==4 && $LITTLE_ENDIAN); ++ lwz $a0,`$i*$SZ`($inp) ++ rotlwi @X[$i],$a0,8 ++ rlwimi @X[$i],$a0,24,0,7 ++ rlwimi @X[$i],$a0,24,16,23 ++___ + # 64-bit loads are split to 2x32-bit ones, as CPU can't handle + # unaligned 64-bit loads, only 32-bit ones... +-$code.=<<___ if ($SZ==8); ++$code.=<<___ if ($SZ==8 && !$LITTLE_ENDIAN); + lwz $t0,`$i*$SZ`($inp) + lwz @X[$i],`$i*$SZ+4`($inp) + insrdi @X[$i],$t0,32,0 + ___ ++$code.=<<___ if ($SZ==8 && $LITTLE_ENDIAN); ++ lwz $a0,`$i*$SZ`($inp) ++ lwz $a1,`$i*$SZ+4`($inp) ++ rotlwi $t0,$a0,8 ++ rotlwi @X[$i],$a1,8 ++ rlwimi $t0,$a0,24,0,7 ++ rlwimi @X[$i],$a1,24,0,7 ++ rlwimi $t0,$a0,24,16,23 ++ rlwimi @X[$i],$a1,24,16,23 ++ insrdi @X[$i],$t0,32,0 ++___ + &ROUND_00_15($i,@V); + unshift(@V,pop(@V)); + } + $code.=<<___; +- li $T,`$rounds/16-1` +- mtctr $T ++ li $t0,`$rounds/16-1` ++ mtctr $t0 + .align 4 + Lrounds: + addi $Tbl,$Tbl,`16*$SZ` +@@ -377,7 +434,282 @@ $code.=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size $func,.-$func ++___ ++} else { ++######################################################################## ++# SHA512 for PPC32, X vector is off-loaded to stack... ++# ++# | sha512 ++# | -m32 ++# ----------------------+----------------------- ++# PPC74x0,gcc-4.0.1 | +48% ++# POWER6,gcc-4.4.6 | +124%(*) ++# POWER7,gcc-4.4.6 | +79%(*) ++# e300,gcc-4.1.0 | +167% ++# ++# (*) ~1/3 of -m64 result [and ~20% better than -m32 code generated ++# by xlc-12.1] ++ ++my $XOFF=$LOCALS; ++ ++my @V=map("r$_",(16..31)); # A..H ++ ++my ($s0,$s1,$t0,$t1,$t2,$t3,$a0,$a1,$a2,$a3)=map("r$_",(0,5,6,8..12,14,15)); ++my ($x0,$x1)=("r3","r4"); # zaps $ctx and $inp ++ ++sub ROUND_00_15_ppc32 { ++my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, ++ $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_; ++ ++$code.=<<___; ++ lwz $t2,`$SZ*($i%16)+($LITTLE_ENDIAN^4)`($Tbl) ++ xor $a0,$flo,$glo ++ lwz $t3,`$SZ*($i%16)+($LITTLE_ENDIAN^0)`($Tbl) ++ xor $a1,$fhi,$ghi ++ addc $hlo,$hlo,$t0 ; h+=x[i] ++ stw $t0,`$XOFF+0+$SZ*($i%16)`($sp) ; save x[i] ++ ++ srwi $s0,$elo,$Sigma1[0] ++ srwi $s1,$ehi,$Sigma1[0] ++ and $a0,$a0,$elo ++ adde $hhi,$hhi,$t1 ++ and $a1,$a1,$ehi ++ stw $t1,`$XOFF+4+$SZ*($i%16)`($sp) ++ srwi $t0,$elo,$Sigma1[1] ++ srwi $t1,$ehi,$Sigma1[1] ++ addc $hlo,$hlo,$t2 ; h+=K512[i] ++ insrwi $s0,$ehi,$Sigma1[0],0 ++ insrwi $s1,$elo,$Sigma1[0],0 ++ xor $a0,$a0,$glo ; Ch(e,f,g) ++ adde $hhi,$hhi,$t3 ++ xor $a1,$a1,$ghi ++ insrwi $t0,$ehi,$Sigma1[1],0 ++ insrwi $t1,$elo,$Sigma1[1],0 ++ addc $hlo,$hlo,$a0 ; h+=Ch(e,f,g) ++ srwi $t2,$ehi,$Sigma1[2]-32 ++ srwi $t3,$elo,$Sigma1[2]-32 ++ xor $s0,$s0,$t0 ++ xor $s1,$s1,$t1 ++ insrwi $t2,$elo,$Sigma1[2]-32,0 ++ insrwi $t3,$ehi,$Sigma1[2]-32,0 ++ xor $a0,$alo,$blo ; a^b, b^c in next round ++ adde $hhi,$hhi,$a1 ++ xor $a1,$ahi,$bhi ++ xor $s0,$s0,$t2 ; Sigma1(e) ++ xor $s1,$s1,$t3 ++ ++ srwi $t0,$alo,$Sigma0[0] ++ and $a2,$a2,$a0 ++ addc $hlo,$hlo,$s0 ; h+=Sigma1(e) ++ and $a3,$a3,$a1 ++ srwi $t1,$ahi,$Sigma0[0] ++ srwi $s0,$ahi,$Sigma0[1]-32 ++ adde $hhi,$hhi,$s1 ++ srwi $s1,$alo,$Sigma0[1]-32 ++ insrwi $t0,$ahi,$Sigma0[0],0 ++ insrwi $t1,$alo,$Sigma0[0],0 ++ xor $a2,$a2,$blo ; Maj(a,b,c) ++ addc $dlo,$dlo,$hlo ; d+=h ++ xor $a3,$a3,$bhi ++ insrwi $s0,$alo,$Sigma0[1]-32,0 ++ insrwi $s1,$ahi,$Sigma0[1]-32,0 ++ adde $dhi,$dhi,$hhi ++ srwi $t2,$ahi,$Sigma0[2]-32 ++ srwi $t3,$alo,$Sigma0[2]-32 ++ xor $s0,$s0,$t0 ++ addc $hlo,$hlo,$a2 ; h+=Maj(a,b,c) ++ xor $s1,$s1,$t1 ++ insrwi $t2,$alo,$Sigma0[2]-32,0 ++ insrwi $t3,$ahi,$Sigma0[2]-32,0 ++ adde $hhi,$hhi,$a3 ++___ ++$code.=<<___ if ($i>=15); ++ lwz $t0,`$XOFF+0+$SZ*(($i+2)%16)`($sp) ++ lwz $t1,`$XOFF+4+$SZ*(($i+2)%16)`($sp) ++___ ++$code.=<<___ if ($i<15 && !$LITTLE_ENDIAN); ++ lwz $t1,`$SZ*($i+1)+0`($inp) ++ lwz $t0,`$SZ*($i+1)+4`($inp) + ___ ++$code.=<<___ if ($i<15 && $LITTLE_ENDIAN); ++ lwz $a2,`$SZ*($i+1)+0`($inp) ++ lwz $a3,`$SZ*($i+1)+4`($inp) ++ rotlwi $t1,$a2,8 ++ rotlwi $t0,$a3,8 ++ rlwimi $t1,$a2,24,0,7 ++ rlwimi $t0,$a3,24,0,7 ++ rlwimi $t1,$a2,24,16,23 ++ rlwimi $t0,$a3,24,16,23 ++___ ++$code.=<<___; ++ xor $s0,$s0,$t2 ; Sigma0(a) ++ xor $s1,$s1,$t3 ++ addc $hlo,$hlo,$s0 ; h+=Sigma0(a) ++ adde $hhi,$hhi,$s1 ++___ ++$code.=<<___ if ($i==15); ++ lwz $x0,`$XOFF+0+$SZ*(($i+1)%16)`($sp) ++ lwz $x1,`$XOFF+4+$SZ*(($i+1)%16)`($sp) ++___ ++} ++sub ROUND_16_xx_ppc32 { ++my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, ++ $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_; ++ ++$code.=<<___; ++ srwi $s0,$t0,$sigma0[0] ++ srwi $s1,$t1,$sigma0[0] ++ srwi $t2,$t0,$sigma0[1] ++ srwi $t3,$t1,$sigma0[1] ++ insrwi $s0,$t1,$sigma0[0],0 ++ insrwi $s1,$t0,$sigma0[0],0 ++ srwi $a0,$t0,$sigma0[2] ++ insrwi $t2,$t1,$sigma0[1],0 ++ insrwi $t3,$t0,$sigma0[1],0 ++ insrwi $a0,$t1,$sigma0[2],0 ++ xor $s0,$s0,$t2 ++ lwz $t2,`$XOFF+0+$SZ*(($i+14)%16)`($sp) ++ srwi $a1,$t1,$sigma0[2] ++ xor $s1,$s1,$t3 ++ lwz $t3,`$XOFF+4+$SZ*(($i+14)%16)`($sp) ++ xor $a0,$a0,$s0 ++ srwi $s0,$t2,$sigma1[0] ++ xor $a1,$a1,$s1 ++ srwi $s1,$t3,$sigma1[0] ++ addc $x0,$x0,$a0 ; x[i]+=sigma0(x[i+1]) ++ srwi $a0,$t3,$sigma1[1]-32 ++ insrwi $s0,$t3,$sigma1[0],0 ++ insrwi $s1,$t2,$sigma1[0],0 ++ adde $x1,$x1,$a1 ++ srwi $a1,$t2,$sigma1[1]-32 ++ ++ insrwi $a0,$t2,$sigma1[1]-32,0 ++ srwi $t2,$t2,$sigma1[2] ++ insrwi $a1,$t3,$sigma1[1]-32,0 ++ insrwi $t2,$t3,$sigma1[2],0 ++ xor $s0,$s0,$a0 ++ lwz $a0,`$XOFF+0+$SZ*(($i+9)%16)`($sp) ++ srwi $t3,$t3,$sigma1[2] ++ xor $s1,$s1,$a1 ++ lwz $a1,`$XOFF+4+$SZ*(($i+9)%16)`($sp) ++ xor $s0,$s0,$t2 ++ addc $x0,$x0,$a0 ; x[i]+=x[i+9] ++ xor $s1,$s1,$t3 ++ adde $x1,$x1,$a1 ++ addc $x0,$x0,$s0 ; x[i]+=sigma1(x[i+14]) ++ adde $x1,$x1,$s1 ++___ ++ ($t0,$t1,$x0,$x1) = ($x0,$x1,$t0,$t1); ++ &ROUND_00_15_ppc32(@_); ++} ++ ++$code.=<<___; ++.align 4 ++Lsha2_block_private: ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); ++ lwz $t1,0($inp) ++ xor $a2,@V[3],@V[5] ; B^C, magic seed ++ lwz $t0,4($inp) ++ xor $a3,@V[2],@V[4] ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $a1,0($inp) ++ xor $a2,@V[3],@V[5] ; B^C, magic seed ++ lwz $a0,4($inp) ++ xor $a3,@V[2],@V[4] ++ rotlwi $t1,$a1,8 ++ rotlwi $t0,$a0,8 ++ rlwimi $t1,$a1,24,0,7 ++ rlwimi $t0,$a0,24,0,7 ++ rlwimi $t1,$a1,24,16,23 ++ rlwimi $t0,$a0,24,16,23 ++___ ++for($i=0;$i<16;$i++) { ++ &ROUND_00_15_ppc32($i,@V); ++ unshift(@V,pop(@V)); unshift(@V,pop(@V)); ++ ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1); ++} ++$code.=<<___; ++ li $a0,`$rounds/16-1` ++ mtctr $a0 ++.align 4 ++Lrounds: ++ addi $Tbl,$Tbl,`16*$SZ` ++___ ++for(;$i<32;$i++) { ++ &ROUND_16_xx_ppc32($i,@V); ++ unshift(@V,pop(@V)); unshift(@V,pop(@V)); ++ ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1); ++} ++$code.=<<___; ++ bdnz- Lrounds ++ ++ $POP $ctx,`$FRAME-$SIZE_T*22`($sp) ++ $POP $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer ++ $POP $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer ++ subi $Tbl,$Tbl,`($rounds-16)*$SZ` ; rewind Tbl ++ ++ lwz $t0,`$LITTLE_ENDIAN^0`($ctx) ++ lwz $t1,`$LITTLE_ENDIAN^4`($ctx) ++ lwz $t2,`$LITTLE_ENDIAN^8`($ctx) ++ lwz $t3,`$LITTLE_ENDIAN^12`($ctx) ++ lwz $a0,`$LITTLE_ENDIAN^16`($ctx) ++ lwz $a1,`$LITTLE_ENDIAN^20`($ctx) ++ lwz $a2,`$LITTLE_ENDIAN^24`($ctx) ++ addc @V[1],@V[1],$t1 ++ lwz $a3,`$LITTLE_ENDIAN^28`($ctx) ++ adde @V[0],@V[0],$t0 ++ lwz $t0,`$LITTLE_ENDIAN^32`($ctx) ++ addc @V[3],@V[3],$t3 ++ lwz $t1,`$LITTLE_ENDIAN^36`($ctx) ++ adde @V[2],@V[2],$t2 ++ lwz $t2,`$LITTLE_ENDIAN^40`($ctx) ++ addc @V[5],@V[5],$a1 ++ lwz $t3,`$LITTLE_ENDIAN^44`($ctx) ++ adde @V[4],@V[4],$a0 ++ lwz $a0,`$LITTLE_ENDIAN^48`($ctx) ++ addc @V[7],@V[7],$a3 ++ lwz $a1,`$LITTLE_ENDIAN^52`($ctx) ++ adde @V[6],@V[6],$a2 ++ lwz $a2,`$LITTLE_ENDIAN^56`($ctx) ++ addc @V[9],@V[9],$t1 ++ lwz $a3,`$LITTLE_ENDIAN^60`($ctx) ++ adde @V[8],@V[8],$t0 ++ stw @V[0],`$LITTLE_ENDIAN^0`($ctx) ++ stw @V[1],`$LITTLE_ENDIAN^4`($ctx) ++ addc @V[11],@V[11],$t3 ++ stw @V[2],`$LITTLE_ENDIAN^8`($ctx) ++ stw @V[3],`$LITTLE_ENDIAN^12`($ctx) ++ adde @V[10],@V[10],$t2 ++ stw @V[4],`$LITTLE_ENDIAN^16`($ctx) ++ stw @V[5],`$LITTLE_ENDIAN^20`($ctx) ++ addc @V[13],@V[13],$a1 ++ stw @V[6],`$LITTLE_ENDIAN^24`($ctx) ++ stw @V[7],`$LITTLE_ENDIAN^28`($ctx) ++ adde @V[12],@V[12],$a0 ++ stw @V[8],`$LITTLE_ENDIAN^32`($ctx) ++ stw @V[9],`$LITTLE_ENDIAN^36`($ctx) ++ addc @V[15],@V[15],$a3 ++ stw @V[10],`$LITTLE_ENDIAN^40`($ctx) ++ stw @V[11],`$LITTLE_ENDIAN^44`($ctx) ++ adde @V[14],@V[14],$a2 ++ stw @V[12],`$LITTLE_ENDIAN^48`($ctx) ++ stw @V[13],`$LITTLE_ENDIAN^52`($ctx) ++ stw @V[14],`$LITTLE_ENDIAN^56`($ctx) ++ stw @V[15],`$LITTLE_ENDIAN^60`($ctx) ++ ++ addi $inp,$inp,`16*$SZ` ; advance inp ++ $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ++ $UCMP $inp,$num ++ bne Lsha2_block_private ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.size $func,.-$func ++___ ++} + + # Ugly hack here, because PPC assembler syntax seem to vary too + # much from platforms to platform... +@@ -395,46 +727,46 @@ LPICmeup: + .space `64-9*4` + ___ + $code.=<<___ if ($SZ==8); +- .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd +- .long 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc +- .long 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 +- .long 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 +- .long 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe +- .long 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 +- .long 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 +- .long 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 +- .long 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 +- .long 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 +- .long 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 +- .long 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 +- .long 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 +- .long 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 +- .long 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 +- .long 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 +- .long 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 +- .long 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df +- .long 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 +- .long 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b +- .long 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 +- .long 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 +- .long 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 +- .long 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 +- .long 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 +- .long 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 +- .long 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb +- .long 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 +- .long 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 +- .long 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec +- .long 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 +- .long 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b +- .long 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 +- .long 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 +- .long 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 +- .long 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b +- .long 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 +- .long 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c +- .long 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a +- .long 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 ++ .quad 0x428a2f98d728ae22,0x7137449123ef65cd ++ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc ++ .quad 0x3956c25bf348b538,0x59f111f1b605d019 ++ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 ++ .quad 0xd807aa98a3030242,0x12835b0145706fbe ++ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 ++ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 ++ .quad 0x9bdc06a725c71235,0xc19bf174cf692694 ++ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 ++ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 ++ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 ++ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 ++ .quad 0x983e5152ee66dfab,0xa831c66d2db43210 ++ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 ++ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 ++ .quad 0x06ca6351e003826f,0x142929670a0e6e70 ++ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 ++ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df ++ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 ++ .quad 0x81c2c92e47edaee6,0x92722c851482353b ++ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 ++ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 ++ .quad 0xd192e819d6ef5218,0xd69906245565a910 ++ .quad 0xf40e35855771202a,0x106aa07032bbd1b8 ++ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 ++ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 ++ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb ++ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 ++ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 ++ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec ++ .quad 0x90befffa23631e28,0xa4506cebde82bde9 ++ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b ++ .quad 0xca273eceea26619c,0xd186b8c721c0c207 ++ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 ++ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 ++ .quad 0x113f9804bef90dae,0x1b710b35131c471b ++ .quad 0x28db77f523047d84,0x32caab7b40c72493 ++ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c ++ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a ++ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + ___ + $code.=<<___ if ($SZ==4); + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +diff -up openssl-1.0.1i/crypto/sha/asm/sha512p8-ppc.pl.ppc-asm openssl-1.0.1i/crypto/sha/asm/sha512p8-ppc.pl +--- openssl-1.0.1i/crypto/sha/asm/sha512p8-ppc.pl.ppc-asm 2014-08-13 19:46:21.096578196 +0200 ++++ openssl-1.0.1i/crypto/sha/asm/sha512p8-ppc.pl 2014-08-13 19:46:21.096578196 +0200 +@@ -0,0 +1,423 @@ ++#!/usr/bin/env perl ++ ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++ ++# SHA256/512 for PowerISA v2.07. ++# ++# Accurate performance measurements are problematic, because it's ++# always virtualized setup with possibly throttled processor. ++# Relative comparison is therefore more informative. This module is ++# ~60% faster than integer-only sha512-ppc.pl. To anchor to something ++# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than ++# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than ++# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting ++# result is degree of computational resources' utilization. POWER8 is ++# "massively multi-threaded chip" and difference between single- and ++# maximum multi-process benchmark results tells that utlization is ++# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and ++# for sha1-ppc.pl - 73%. 100% means that multi-process result equals ++# to single-process one, given that all threads end up on the same ++# physical core. ++ ++$flavour=shift; ++$output =shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T=8; ++ $LRSAVE=2*$SIZE_T; ++ $STU="stdu"; ++ $POP="ld"; ++ $PUSH="std"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T=4; ++ $LRSAVE=$SIZE_T; ++ $STU="stwu"; ++ $POP="lwz"; ++ $PUSH="stw"; ++} else { die "nonsense $flavour"; } ++ ++$LENDIAN=($flavour=~/le/); ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; ++ ++if ($output =~ /512/) { ++ $bits=512; ++ $SZ=8; ++ $sz="d"; ++ $rounds=80; ++} else { ++ $bits=256; ++ $SZ=4; ++ $sz="w"; ++ $rounds=64; ++} ++ ++$func="sha${bits}_block_p8"; ++$FRAME=8*$SIZE_T; ++ ++$sp ="r1"; ++$toc="r2"; ++$ctx="r3"; ++$inp="r4"; ++$num="r5"; ++$Tbl="r6"; ++$idx="r7"; ++$lrsave="r8"; ++$offload="r11"; ++$vrsave="r12"; ++($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,10,26..31)); ++ ++@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7)); ++@X=map("v$_",(8..23)); ++($Ki,$Func,$S0,$S1,$s0,$s1,$lemask)=map("v$_",(24..31)); ++ ++sub ROUND { ++my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; ++my $j=($i+1)%16; ++ ++$code.=<<___ if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1)); ++ lvx_u @X[$i+1],0,$inp ; load X[i] in advance ++ addi $inp,$inp,16 ++___ ++$code.=<<___ if ($i<16 && ($i%(16/$SZ))); ++ vsldoi @X[$i],@X[$i-1],@X[$i-1],$SZ ++___ ++$code.=<<___ if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0); ++ vperm @X[$i],@X[$i],@X[$i],$lemask ++___ ++$code.=<<___; ++ `"vshasigma${sz} $s0,@X[($j+1)%16],0,0" if ($i>=15)` ++ vsel $Func,$g,$f,$e ; Ch(e,f,g) ++ vshasigma${sz} $S1,$e,1,15 ; Sigma1(e) ++ vaddu${sz}m $h,$h,@X[$i%16] ; h+=X[i] ++ vshasigma${sz} $S0,$a,1,0 ; Sigma0(a) ++ `"vshasigma${sz} $s1,@X[($j+14)%16],0,15" if ($i>=15)` ++ vaddu${sz}m $h,$h,$Func ; h+=Ch(e,f,g) ++ vxor $Func,$a,$b ++ `"vaddu${sz}m @X[$j],@X[$j],@X[($j+9)%16]" if ($i>=15)` ++ vaddu${sz}m $h,$h,$S1 ; h+=Sigma1(e) ++ vsel $Func,$b,$c,$Func ; Maj(a,b,c) ++ vaddu${sz}m $g,$g,$Ki ; future h+=K[i] ++ vaddu${sz}m $d,$d,$h ; d+=h ++ vaddu${sz}m $S0,$S0,$Func ; Sigma0(a)+Maj(a,b,c) ++ `"vaddu${sz}m @X[$j],@X[$j],$s0" if ($i>=15)` ++ lvx $Ki,$idx,$Tbl ; load next K[i] ++ addi $idx,$idx,16 ++ vaddu${sz}m $h,$h,$S0 ; h+=Sigma0(a)+Maj(a,b,c) ++ `"vaddu${sz}m @X[$j],@X[$j],$s1" if ($i>=15)` ++___ ++} ++ ++$code=<<___; ++.machine "any" ++.text ++ ++.globl $func ++.align 6 ++$func: ++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) ++ mflr $lrsave ++ li r10,`$FRAME+8*16+15` ++ li r11,`$FRAME+8*16+31` ++ stvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ mfspr $vrsave,256 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ li r11,-1 ++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave ++ li $x10,0x10 ++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ li $x20,0x20 ++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ li $x30,0x30 ++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ li $x40,0x40 ++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ li $x50,0x50 ++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ li $x60,0x60 ++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ li $x70,0x70 ++ $PUSH $lrsave,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) ++ mtspr 256,r11 ++ ++ bl LPICmeup ++ addi $offload,$sp,$FRAME+15 ++___ ++$code.=<<___ if ($LENDIAN); ++ li $idx,8 ++ lvsl $lemask,0,$idx ++ vspltisb $Ki,0x0f ++ vxor $lemask,$lemask,$Ki ++___ ++$code.=<<___ if ($SZ==4); ++ lvx_4w $A,$x00,$ctx ++ lvx_4w $E,$x10,$ctx ++ vsldoi $B,$A,$A,4 # unpack ++ vsldoi $C,$A,$A,8 ++ vsldoi $D,$A,$A,12 ++ vsldoi $F,$E,$E,4 ++ vsldoi $G,$E,$E,8 ++ vsldoi $H,$E,$E,12 ++___ ++$code.=<<___ if ($SZ==8); ++ lvx_u $A,$x00,$ctx ++ lvx_u $C,$x10,$ctx ++ lvx_u $E,$x20,$ctx ++ vsldoi $B,$A,$A,8 # unpack ++ lvx_u $G,$x30,$ctx ++ vsldoi $D,$C,$C,8 ++ vsldoi $F,$E,$E,8 ++ vsldoi $H,$G,$G,8 ++___ ++$code.=<<___; ++ li r0,`($rounds-16)/16` # inner loop counter ++ b Loop ++.align 5 ++Loop: ++ lvx $Ki,$x00,$Tbl ++ li $idx,16 ++ lvx_u @X[0],0,$inp ++ addi $inp,$inp,16 ++ stvx $A,$x00,$offload # offload $A-$H ++ stvx $B,$x10,$offload ++ stvx $C,$x20,$offload ++ stvx $D,$x30,$offload ++ stvx $E,$x40,$offload ++ stvx $F,$x50,$offload ++ stvx $G,$x60,$offload ++ stvx $H,$x70,$offload ++ vaddu${sz}m $H,$H,$Ki # h+K[i] ++ lvx $Ki,$idx,$Tbl ++ addi $idx,$idx,16 ++___ ++for ($i=0;$i<16;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } ++$code.=<<___; ++ mtctr r0 ++ b L16_xx ++.align 5 ++L16_xx: ++___ ++for (;$i<32;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } ++$code.=<<___; ++ bdnz L16_xx ++ ++ lvx @X[2],$x00,$offload ++ subic. $num,$num,1 ++ lvx @X[3],$x10,$offload ++ vaddu${sz}m $A,$A,@X[2] ++ lvx @X[4],$x20,$offload ++ vaddu${sz}m $B,$B,@X[3] ++ lvx @X[5],$x30,$offload ++ vaddu${sz}m $C,$C,@X[4] ++ lvx @X[6],$x40,$offload ++ vaddu${sz}m $D,$D,@X[5] ++ lvx @X[7],$x50,$offload ++ vaddu${sz}m $E,$E,@X[6] ++ lvx @X[8],$x60,$offload ++ vaddu${sz}m $F,$F,@X[7] ++ lvx @X[9],$x70,$offload ++ vaddu${sz}m $G,$G,@X[8] ++ vaddu${sz}m $H,$H,@X[9] ++ bne Loop ++___ ++$code.=<<___ if ($SZ==4); ++ lvx @X[0],$idx,$Tbl ++ addi $idx,$idx,16 ++ vperm $A,$A,$B,$Ki # pack the answer ++ lvx @X[1],$idx,$Tbl ++ vperm $E,$E,$F,$Ki ++ vperm $A,$A,$C,@X[0] ++ vperm $E,$E,$G,@X[0] ++ vperm $A,$A,$D,@X[1] ++ vperm $E,$E,$H,@X[1] ++ stvx_4w $A,$x00,$ctx ++ stvx_4w $E,$x10,$ctx ++___ ++$code.=<<___ if ($SZ==8); ++ vperm $A,$A,$B,$Ki # pack the answer ++ vperm $C,$C,$D,$Ki ++ vperm $E,$E,$F,$Ki ++ vperm $G,$G,$H,$Ki ++ stvx_u $A,$x00,$ctx ++ stvx_u $C,$x10,$ctx ++ stvx_u $E,$x20,$ctx ++ stvx_u $G,$x30,$ctx ++___ ++$code.=<<___; ++ li r10,`$FRAME+8*16+15` ++ mtlr $lrsave ++ li r11,`$FRAME+8*16+31` ++ mtspr 256,$vrsave ++ lvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` ++ blr ++ .long 0 ++ .byte 0,12,4,1,0x80,6,3,0 ++ .long 0 ++.size $func,.-$func ++___ ++ ++# Ugly hack here, because PPC assembler syntax seem to vary too ++# much from platforms to platform... ++$code.=<<___; ++.align 6 ++LPICmeup: ++ mflr r0 ++ bcl 20,31,\$+4 ++ mflr $Tbl ; vvvvvv "distance" between . and 1st data entry ++ addi $Tbl,$Tbl,`64-8` ++ mtlr r0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ .space `64-9*4` ++___ ++ ++if ($SZ==8) { ++ local *table = sub { ++ foreach(@_) { $code.=".quad $_,$_\n"; } ++ }; ++ table( ++ "0x428a2f98d728ae22","0x7137449123ef65cd", ++ "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc", ++ "0x3956c25bf348b538","0x59f111f1b605d019", ++ "0x923f82a4af194f9b","0xab1c5ed5da6d8118", ++ "0xd807aa98a3030242","0x12835b0145706fbe", ++ "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2", ++ "0x72be5d74f27b896f","0x80deb1fe3b1696b1", ++ "0x9bdc06a725c71235","0xc19bf174cf692694", ++ "0xe49b69c19ef14ad2","0xefbe4786384f25e3", ++ "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65", ++ "0x2de92c6f592b0275","0x4a7484aa6ea6e483", ++ "0x5cb0a9dcbd41fbd4","0x76f988da831153b5", ++ "0x983e5152ee66dfab","0xa831c66d2db43210", ++ "0xb00327c898fb213f","0xbf597fc7beef0ee4", ++ "0xc6e00bf33da88fc2","0xd5a79147930aa725", ++ "0x06ca6351e003826f","0x142929670a0e6e70", ++ "0x27b70a8546d22ffc","0x2e1b21385c26c926", ++ "0x4d2c6dfc5ac42aed","0x53380d139d95b3df", ++ "0x650a73548baf63de","0x766a0abb3c77b2a8", ++ "0x81c2c92e47edaee6","0x92722c851482353b", ++ "0xa2bfe8a14cf10364","0xa81a664bbc423001", ++ "0xc24b8b70d0f89791","0xc76c51a30654be30", ++ "0xd192e819d6ef5218","0xd69906245565a910", ++ "0xf40e35855771202a","0x106aa07032bbd1b8", ++ "0x19a4c116b8d2d0c8","0x1e376c085141ab53", ++ "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8", ++ "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb", ++ "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3", ++ "0x748f82ee5defb2fc","0x78a5636f43172f60", ++ "0x84c87814a1f0ab72","0x8cc702081a6439ec", ++ "0x90befffa23631e28","0xa4506cebde82bde9", ++ "0xbef9a3f7b2c67915","0xc67178f2e372532b", ++ "0xca273eceea26619c","0xd186b8c721c0c207", ++ "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178", ++ "0x06f067aa72176fba","0x0a637dc5a2c898a6", ++ "0x113f9804bef90dae","0x1b710b35131c471b", ++ "0x28db77f523047d84","0x32caab7b40c72493", ++ "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c", ++ "0x4cc5d4becb3e42b6","0x597f299cfc657e2a", ++ "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0"); ++$code.=<<___ if (!$LENDIAN); ++.quad 0x0001020304050607,0x1011121314151617 ++___ ++$code.=<<___ if ($LENDIAN); # quad-swapped ++.quad 0x1011121314151617,0x0001020304050607 ++___ ++} else { ++ local *table = sub { ++ foreach(@_) { $code.=".long $_,$_,$_,$_\n"; } ++ }; ++ table( ++ "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5", ++ "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5", ++ "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3", ++ "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174", ++ "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc", ++ "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da", ++ "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7", ++ "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967", ++ "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13", ++ "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85", ++ "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3", ++ "0xd192e819","0xd6990624","0xf40e3585","0x106aa070", ++ "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5", ++ "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3", ++ "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208", ++ "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0"); ++$code.=<<___ if (!$LENDIAN); ++.long 0x00010203,0x10111213,0x10111213,0x10111213 ++.long 0x00010203,0x04050607,0x10111213,0x10111213 ++.long 0x00010203,0x04050607,0x08090a0b,0x10111213 ++___ ++$code.=<<___ if ($LENDIAN); # word-swapped ++.long 0x10111213,0x10111213,0x10111213,0x00010203 ++.long 0x10111213,0x10111213,0x04050607,0x00010203 ++.long 0x10111213,0x08090a0b,0x04050607,0x00010203 ++___ ++} ++$code.=<<___; ++.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by " ++.align 2 ++___ ++ ++$code =~ s/\`([^\`]*)\`/eval $1/gem; ++print $code; ++close STDOUT; +diff -up openssl-1.0.1i/crypto/sha/Makefile.ppc-asm openssl-1.0.1i/crypto/sha/Makefile +--- openssl-1.0.1i/crypto/sha/Makefile.ppc-asm 2014-08-06 23:18:30.000000000 +0200 ++++ openssl-1.0.1i/crypto/sha/Makefile 2014-08-13 19:46:21.096578196 +0200 +@@ -75,6 +75,8 @@ sha512-sparcv9.s:asm/sha512-sparcv9.pl; + sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@ + sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ + sha512-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ ++sha256p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@ ++sha512p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@ + + sha1-parisc.s: asm/sha1-parisc.pl; $(PERL) asm/sha1-parisc.pl $(PERLASM_SCHEME) $@ + sha256-parisc.s:asm/sha512-parisc.pl; $(PERL) asm/sha512-parisc.pl $(PERLASM_SCHEME) $@ diff --git a/openssl-1.0.1e-trusted-first.patch b/openssl-1.0.1i-trusted-first.patch similarity index 67% rename from openssl-1.0.1e-trusted-first.patch rename to openssl-1.0.1i-trusted-first.patch index 08ab639..f11f36d 100644 --- a/openssl-1.0.1e-trusted-first.patch +++ b/openssl-1.0.1i-trusted-first.patch @@ -1,7 +1,7 @@ -diff -up openssl-1.0.1e/apps/apps.c.trusted-first openssl-1.0.1e/apps/apps.c ---- openssl-1.0.1e/apps/apps.c.trusted-first 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/apps/apps.c 2013-08-16 15:42:39.920534769 +0200 -@@ -2361,6 +2361,8 @@ int args_verify(char ***pargs, int *parg +diff -up openssl-1.0.1i/apps/apps.c.trusted-first openssl-1.0.1i/apps/apps.c +--- openssl-1.0.1i/apps/apps.c.trusted-first 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/apps/apps.c 2014-08-07 13:54:27.751103405 +0200 +@@ -2365,6 +2365,8 @@ int args_verify(char ***pargs, int *parg flags |= X509_V_FLAG_NOTIFY_POLICY; else if (!strcmp(arg, "-check_ss_sig")) flags |= X509_V_FLAG_CHECK_SS_SIGNATURE; @@ -10,9 +10,9 @@ diff -up openssl-1.0.1e/apps/apps.c.trusted-first openssl-1.0.1e/apps/apps.c else return 0; -diff -up openssl-1.0.1e/apps/cms.c.trusted-first openssl-1.0.1e/apps/cms.c ---- openssl-1.0.1e/apps/cms.c.trusted-first 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/apps/cms.c 2013-08-16 15:43:56.671213879 +0200 +diff -up openssl-1.0.1i/apps/cms.c.trusted-first openssl-1.0.1i/apps/cms.c +--- openssl-1.0.1i/apps/cms.c.trusted-first 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/apps/cms.c 2014-08-07 13:54:27.751103405 +0200 @@ -642,6 +642,7 @@ int MAIN(int argc, char **argv) BIO_printf (bio_err, "-text include or delete text MIME headers\n"); BIO_printf (bio_err, "-CApath dir trusted certificates directory\n"); @@ -21,10 +21,10 @@ diff -up openssl-1.0.1e/apps/cms.c.trusted-first openssl-1.0.1e/apps/cms.c BIO_printf (bio_err, "-crl_check check revocation status of signer's certificate using CRLs\n"); BIO_printf (bio_err, "-crl_check_all check revocation status of signer's certificate chain using CRLs\n"); #ifndef OPENSSL_NO_ENGINE -diff -up openssl-1.0.1e/apps/ocsp.c.trusted-first openssl-1.0.1e/apps/ocsp.c ---- openssl-1.0.1e/apps/ocsp.c.trusted-first 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/apps/ocsp.c 2013-08-16 15:49:47.477572414 +0200 -@@ -595,6 +595,7 @@ int MAIN(int argc, char **argv) +diff -up openssl-1.0.1i/apps/ocsp.c.trusted-first openssl-1.0.1i/apps/ocsp.c +--- openssl-1.0.1i/apps/ocsp.c.trusted-first 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/apps/ocsp.c 2014-08-07 13:54:27.752103409 +0200 +@@ -605,6 +605,7 @@ int MAIN(int argc, char **argv) BIO_printf (bio_err, "-path path to use in OCSP request\n"); BIO_printf (bio_err, "-CApath dir trusted certificates directory\n"); BIO_printf (bio_err, "-CAfile file trusted certificates file\n"); @@ -32,20 +32,20 @@ diff -up openssl-1.0.1e/apps/ocsp.c.trusted-first openssl-1.0.1e/apps/ocsp.c BIO_printf (bio_err, "-VAfile file validator certificates file\n"); BIO_printf (bio_err, "-validity_period n maximum validity discrepancy in seconds\n"); BIO_printf (bio_err, "-status_age n maximum status age in seconds\n"); -diff -up openssl-1.0.1e/apps/s_client.c.trusted-first openssl-1.0.1e/apps/s_client.c ---- openssl-1.0.1e/apps/s_client.c.trusted-first 2013-08-16 15:42:39.000000000 +0200 -+++ openssl-1.0.1e/apps/s_client.c 2013-08-16 15:49:00.727542994 +0200 -@@ -298,6 +298,7 @@ static void sc_usage(void) +diff -up openssl-1.0.1i/apps/s_client.c.trusted-first openssl-1.0.1i/apps/s_client.c +--- openssl-1.0.1i/apps/s_client.c.trusted-first 2014-08-07 13:54:27.752103409 +0200 ++++ openssl-1.0.1i/apps/s_client.c 2014-08-07 15:06:28.443918055 +0200 +@@ -299,6 +299,7 @@ static void sc_usage(void) BIO_printf(bio_err," -pass arg - private key file pass phrase source\n"); BIO_printf(bio_err," -CApath arg - PEM format directory of CA's\n"); BIO_printf(bio_err," -CAfile arg - PEM format file of CA's\n"); + BIO_printf(bio_err," -trusted_first - Use trusted CA's first when building the trust chain\n"); BIO_printf(bio_err," -reconnect - Drop and re-make the connection with the same Session-ID\n"); BIO_printf(bio_err," -pause - sleep(1) after each read(2) and write(2) system call\n"); - BIO_printf(bio_err," -showcerts - show all certificates in the chain\n"); -diff -up openssl-1.0.1e/apps/smime.c.trusted-first openssl-1.0.1e/apps/smime.c ---- openssl-1.0.1e/apps/smime.c.trusted-first 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/apps/smime.c 2013-08-16 15:46:44.024875150 +0200 + BIO_printf(bio_err," -prexit - print session information even on connection failure\n"); +diff -up openssl-1.0.1i/apps/smime.c.trusted-first openssl-1.0.1i/apps/smime.c +--- openssl-1.0.1i/apps/smime.c.trusted-first 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/apps/smime.c 2014-08-07 13:54:27.753103414 +0200 @@ -479,6 +479,7 @@ int MAIN(int argc, char **argv) BIO_printf (bio_err, "-text include or delete text MIME headers\n"); BIO_printf (bio_err, "-CApath dir trusted certificates directory\n"); @@ -54,10 +54,10 @@ diff -up openssl-1.0.1e/apps/smime.c.trusted-first openssl-1.0.1e/apps/smime.c BIO_printf (bio_err, "-crl_check check revocation status of signer's certificate using CRLs\n"); BIO_printf (bio_err, "-crl_check_all check revocation status of signer's certificate chain using CRLs\n"); #ifndef OPENSSL_NO_ENGINE -diff -up openssl-1.0.1e/apps/s_server.c.trusted-first openssl-1.0.1e/apps/s_server.c ---- openssl-1.0.1e/apps/s_server.c.trusted-first 2013-08-16 15:42:39.000000000 +0200 -+++ openssl-1.0.1e/apps/s_server.c 2013-08-16 15:48:19.469634430 +0200 -@@ -501,6 +501,7 @@ static void sv_usage(void) +diff -up openssl-1.0.1i/apps/s_server.c.trusted-first openssl-1.0.1i/apps/s_server.c +--- openssl-1.0.1i/apps/s_server.c.trusted-first 2014-08-07 13:54:27.718103241 +0200 ++++ openssl-1.0.1i/apps/s_server.c 2014-08-07 13:54:27.753103414 +0200 +@@ -502,6 +502,7 @@ static void sv_usage(void) BIO_printf(bio_err," -state - Print the SSL states\n"); BIO_printf(bio_err," -CApath arg - PEM format directory of CA's\n"); BIO_printf(bio_err," -CAfile arg - PEM format file of CA's\n"); @@ -65,9 +65,9 @@ diff -up openssl-1.0.1e/apps/s_server.c.trusted-first openssl-1.0.1e/apps/s_serv BIO_printf(bio_err," -nocert - Don't use any certificates (Anon-DH)\n"); BIO_printf(bio_err," -cipher arg - play with 'openssl ciphers' to see what goes here\n"); BIO_printf(bio_err," -serverpref - Use server's cipher preferences\n"); -diff -up openssl-1.0.1e/apps/s_time.c.trusted-first openssl-1.0.1e/apps/s_time.c ---- openssl-1.0.1e/apps/s_time.c.trusted-first 2013-08-16 15:42:39.000000000 +0200 -+++ openssl-1.0.1e/apps/s_time.c 2013-08-16 15:47:35.862674188 +0200 +diff -up openssl-1.0.1i/apps/s_time.c.trusted-first openssl-1.0.1i/apps/s_time.c +--- openssl-1.0.1i/apps/s_time.c.trusted-first 2014-08-07 13:54:27.432101823 +0200 ++++ openssl-1.0.1i/apps/s_time.c 2014-08-07 13:54:27.753103414 +0200 @@ -179,6 +179,7 @@ static void s_time_usage(void) file if not specified by this option\n\ -CApath arg - PEM format directory of CA's\n\ @@ -76,9 +76,9 @@ diff -up openssl-1.0.1e/apps/s_time.c.trusted-first openssl-1.0.1e/apps/s_time.c -cipher - preferred cipher to use, play with 'openssl ciphers'\n\n"; printf( "usage: s_time \n\n" ); -diff -up openssl-1.0.1e/apps/ts.c.trusted-first openssl-1.0.1e/apps/ts.c ---- openssl-1.0.1e/apps/ts.c.trusted-first 2013-08-16 15:42:39.000000000 +0200 -+++ openssl-1.0.1e/apps/ts.c 2013-08-16 15:45:27.766206812 +0200 +diff -up openssl-1.0.1i/apps/ts.c.trusted-first openssl-1.0.1i/apps/ts.c +--- openssl-1.0.1i/apps/ts.c.trusted-first 2014-08-07 13:54:27.707103186 +0200 ++++ openssl-1.0.1i/apps/ts.c 2014-08-07 13:54:27.753103414 +0200 @@ -383,7 +383,7 @@ int MAIN(int argc, char **argv) "ts -verify [-data file_to_hash] [-digest digest_bytes] " "[-queryfile request.tsq] " @@ -88,9 +88,9 @@ diff -up openssl-1.0.1e/apps/ts.c.trusted-first openssl-1.0.1e/apps/ts.c "-untrusted cert_file.pem\n"); cleanup: /* Clean up. */ -diff -up openssl-1.0.1e/apps/verify.c.trusted-first openssl-1.0.1e/apps/verify.c ---- openssl-1.0.1e/apps/verify.c.trusted-first 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/apps/verify.c 2013-08-16 15:46:09.720124654 +0200 +diff -up openssl-1.0.1i/apps/verify.c.trusted-first openssl-1.0.1i/apps/verify.c +--- openssl-1.0.1i/apps/verify.c.trusted-first 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/apps/verify.c 2014-08-07 13:54:27.754103419 +0200 @@ -237,7 +237,7 @@ int MAIN(int argc, char **argv) end: @@ -100,9 +100,9 @@ diff -up openssl-1.0.1e/apps/verify.c.trusted-first openssl-1.0.1e/apps/verify.c BIO_printf(bio_err," [-attime timestamp]"); #ifndef OPENSSL_NO_ENGINE BIO_printf(bio_err," [-engine e]"); -diff -up openssl-1.0.1e/crypto/x509/x509_vfy.c.trusted-first openssl-1.0.1e/crypto/x509/x509_vfy.c ---- openssl-1.0.1e/crypto/x509/x509_vfy.c.trusted-first 2013-08-16 15:42:39.864533545 +0200 -+++ openssl-1.0.1e/crypto/x509/x509_vfy.c 2013-08-16 15:42:39.921534791 +0200 +diff -up openssl-1.0.1i/crypto/x509/x509_vfy.c.trusted-first openssl-1.0.1i/crypto/x509/x509_vfy.c +--- openssl-1.0.1i/crypto/x509/x509_vfy.c.trusted-first 2014-08-07 13:54:27.716103231 +0200 ++++ openssl-1.0.1i/crypto/x509/x509_vfy.c 2014-08-07 13:54:27.754103419 +0200 @@ -207,6 +207,21 @@ int X509_verify_cert(X509_STORE_CTX *ctx /* If we are self signed, we break */ @@ -125,9 +125,9 @@ diff -up openssl-1.0.1e/crypto/x509/x509_vfy.c.trusted-first openssl-1.0.1e/cryp /* If we were passed a cert chain, use it first */ if (ctx->untrusted != NULL) -diff -up openssl-1.0.1e/crypto/x509/x509_vfy.h.trusted-first openssl-1.0.1e/crypto/x509/x509_vfy.h ---- openssl-1.0.1e/crypto/x509/x509_vfy.h.trusted-first 2013-08-16 15:42:39.356522432 +0200 -+++ openssl-1.0.1e/crypto/x509/x509_vfy.h 2013-08-16 15:42:39.922534813 +0200 +diff -up openssl-1.0.1i/crypto/x509/x509_vfy.h.trusted-first openssl-1.0.1i/crypto/x509/x509_vfy.h +--- openssl-1.0.1i/crypto/x509/x509_vfy.h.trusted-first 2014-08-07 13:54:27.360101466 +0200 ++++ openssl-1.0.1i/crypto/x509/x509_vfy.h 2014-08-07 13:54:27.754103419 +0200 @@ -389,6 +389,8 @@ void X509_STORE_CTX_set_depth(X509_STORE #define X509_V_FLAG_USE_DELTAS 0x2000 /* Check selfsigned CA signature */ @@ -137,9 +137,9 @@ diff -up openssl-1.0.1e/crypto/x509/x509_vfy.h.trusted-first openssl-1.0.1e/cryp #define X509_VP_FLAG_DEFAULT 0x1 -diff -up openssl-1.0.1e/doc/apps/cms.pod.trusted-first openssl-1.0.1e/doc/apps/cms.pod ---- openssl-1.0.1e/doc/apps/cms.pod.trusted-first 2013-08-16 15:42:39.000000000 +0200 -+++ openssl-1.0.1e/doc/apps/cms.pod 2013-08-16 15:50:48.723921117 +0200 +diff -up openssl-1.0.1i/doc/apps/cms.pod.trusted-first openssl-1.0.1i/doc/apps/cms.pod +--- openssl-1.0.1i/doc/apps/cms.pod.trusted-first 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/doc/apps/cms.pod 2014-08-07 13:54:27.754103419 +0200 @@ -35,6 +35,7 @@ B B [B<-print>] [B<-CAfile file>] @@ -148,7 +148,7 @@ diff -up openssl-1.0.1e/doc/apps/cms.pod.trusted-first openssl-1.0.1e/doc/apps/c [B<-md digest>] [B<-[cipher]>] [B<-nointern>] -@@ -238,6 +239,12 @@ B<-verify>. This directory must be a sta +@@ -243,6 +244,12 @@ B<-verify>. This directory must be a sta is a hash of each subject name (using B) should be linked to each certificate. @@ -161,9 +161,9 @@ diff -up openssl-1.0.1e/doc/apps/cms.pod.trusted-first openssl-1.0.1e/doc/apps/c =item B<-md digest> digest algorithm to use when signing or resigning. If not present then the -diff -up openssl-1.0.1e/doc/apps/ocsp.pod.trusted-first openssl-1.0.1e/doc/apps/ocsp.pod ---- openssl-1.0.1e/doc/apps/ocsp.pod.trusted-first 2013-08-16 15:42:39.000000000 +0200 -+++ openssl-1.0.1e/doc/apps/ocsp.pod 2013-08-16 15:52:20.106933403 +0200 +diff -up openssl-1.0.1i/doc/apps/ocsp.pod.trusted-first openssl-1.0.1i/doc/apps/ocsp.pod +--- openssl-1.0.1i/doc/apps/ocsp.pod.trusted-first 2014-08-07 13:54:27.708103191 +0200 ++++ openssl-1.0.1i/doc/apps/ocsp.pod 2014-08-07 13:54:27.755103424 +0200 @@ -29,6 +29,7 @@ B B [B<-path>] [B<-CApath dir>] @@ -186,10 +186,10 @@ diff -up openssl-1.0.1e/doc/apps/ocsp.pod.trusted-first openssl-1.0.1e/doc/apps/ =item B<-verify_other file> file containing additional certificates to search when attempting to locate -diff -up openssl-1.0.1e/doc/apps/s_client.pod.trusted-first openssl-1.0.1e/doc/apps/s_client.pod ---- openssl-1.0.1e/doc/apps/s_client.pod.trusted-first 2013-08-16 15:42:39.000000000 +0200 -+++ openssl-1.0.1e/doc/apps/s_client.pod 2013-08-16 15:53:17.364194159 +0200 -@@ -17,6 +17,7 @@ B B +diff -up openssl-1.0.1i/doc/apps/s_client.pod.trusted-first openssl-1.0.1i/doc/apps/s_client.pod +--- openssl-1.0.1i/doc/apps/s_client.pod.trusted-first 2014-08-07 13:54:27.726103281 +0200 ++++ openssl-1.0.1i/doc/apps/s_client.pod 2014-08-07 13:54:27.755103424 +0200 +@@ -19,6 +19,7 @@ B B [B<-pass arg>] [B<-CApath directory>] [B<-CAfile filename>] @@ -197,7 +197,7 @@ diff -up openssl-1.0.1e/doc/apps/s_client.pod.trusted-first openssl-1.0.1e/doc/a [B<-reconnect>] [B<-pause>] [B<-showcerts>] -@@ -107,7 +108,7 @@ also used when building the client certi +@@ -121,7 +122,7 @@ also used when building the client certi A file containing trusted certificates to use during server authentication and to use when attempting to build the client certificate chain. @@ -206,9 +206,9 @@ diff -up openssl-1.0.1e/doc/apps/s_client.pod.trusted-first openssl-1.0.1e/doc/a Set various certificate chain valiadition option. See the L|verify(1)> manual page for details. -diff -up openssl-1.0.1e/doc/apps/smime.pod.trusted-first openssl-1.0.1e/doc/apps/smime.pod ---- openssl-1.0.1e/doc/apps/smime.pod.trusted-first 2013-08-16 15:42:39.000000000 +0200 -+++ openssl-1.0.1e/doc/apps/smime.pod 2013-08-16 15:56:12.497050767 +0200 +diff -up openssl-1.0.1i/doc/apps/smime.pod.trusted-first openssl-1.0.1i/doc/apps/smime.pod +--- openssl-1.0.1i/doc/apps/smime.pod.trusted-first 2014-07-22 21:43:11.000000000 +0200 ++++ openssl-1.0.1i/doc/apps/smime.pod 2014-08-07 13:54:27.755103424 +0200 @@ -15,6 +15,9 @@ B B [B<-pk7out>] [B<-[cipher]>] @@ -232,9 +232,9 @@ diff -up openssl-1.0.1e/doc/apps/smime.pod.trusted-first openssl-1.0.1e/doc/apps =item B<-md digest> digest algorithm to use when signing or resigning. If not present then the -diff -up openssl-1.0.1e/doc/apps/s_server.pod.trusted-first openssl-1.0.1e/doc/apps/s_server.pod ---- openssl-1.0.1e/doc/apps/s_server.pod.trusted-first 2013-08-16 15:42:39.000000000 +0200 -+++ openssl-1.0.1e/doc/apps/s_server.pod 2013-08-16 15:54:33.609873214 +0200 +diff -up openssl-1.0.1i/doc/apps/s_server.pod.trusted-first openssl-1.0.1i/doc/apps/s_server.pod +--- openssl-1.0.1i/doc/apps/s_server.pod.trusted-first 2014-08-07 13:54:27.726103281 +0200 ++++ openssl-1.0.1i/doc/apps/s_server.pod 2014-08-07 15:07:12.315099577 +0200 @@ -33,6 +33,7 @@ B B [B<-state>] [B<-CApath directory>] @@ -242,8 +242,8 @@ diff -up openssl-1.0.1e/doc/apps/s_server.pod.trusted-first openssl-1.0.1e/doc/a +[B<-trusted_first>] [B<-nocert>] [B<-cipher cipherlist>] - [B<-quiet>] -@@ -168,6 +169,12 @@ and to use when attempting to build the + [B<-serverpref>] +@@ -178,6 +179,12 @@ and to use when attempting to build the is also used in the list of acceptable client CAs passed to the client when a certificate is requested. @@ -256,9 +256,9 @@ diff -up openssl-1.0.1e/doc/apps/s_server.pod.trusted-first openssl-1.0.1e/doc/a =item B<-state> prints out the SSL session states. -diff -up openssl-1.0.1e/doc/apps/s_time.pod.trusted-first openssl-1.0.1e/doc/apps/s_time.pod ---- openssl-1.0.1e/doc/apps/s_time.pod.trusted-first 2013-02-11 16:02:48.000000000 +0100 -+++ openssl-1.0.1e/doc/apps/s_time.pod 2013-08-16 15:55:12.651732938 +0200 +diff -up openssl-1.0.1i/doc/apps/s_time.pod.trusted-first openssl-1.0.1i/doc/apps/s_time.pod +--- openssl-1.0.1i/doc/apps/s_time.pod.trusted-first 2014-07-22 21:41:23.000000000 +0200 ++++ openssl-1.0.1i/doc/apps/s_time.pod 2014-08-07 13:54:27.755103424 +0200 @@ -14,6 +14,7 @@ B B [B<-key filename>] [B<-CApath directory>] @@ -280,9 +280,9 @@ diff -up openssl-1.0.1e/doc/apps/s_time.pod.trusted-first openssl-1.0.1e/doc/app =item B<-new> performs the timing test using a new session ID for each connection. -diff -up openssl-1.0.1e/doc/apps/ts.pod.trusted-first openssl-1.0.1e/doc/apps/ts.pod ---- openssl-1.0.1e/doc/apps/ts.pod.trusted-first 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/doc/apps/ts.pod 2013-08-16 15:57:17.399479957 +0200 +diff -up openssl-1.0.1i/doc/apps/ts.pod.trusted-first openssl-1.0.1i/doc/apps/ts.pod +--- openssl-1.0.1i/doc/apps/ts.pod.trusted-first 2014-07-22 21:41:23.000000000 +0200 ++++ openssl-1.0.1i/doc/apps/ts.pod 2014-08-07 13:54:27.756103429 +0200 @@ -46,6 +46,7 @@ B<-verify> [B<-token_in>] [B<-CApath> trusted_cert_path] @@ -304,9 +304,9 @@ diff -up openssl-1.0.1e/doc/apps/ts.pod.trusted-first openssl-1.0.1e/doc/apps/ts =item B<-untrusted> cert_file.pem Set of additional untrusted certificates in PEM format which may be -diff -up openssl-1.0.1e/doc/apps/verify.pod.trusted-first openssl-1.0.1e/doc/apps/verify.pod ---- openssl-1.0.1e/doc/apps/verify.pod.trusted-first 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/doc/apps/verify.pod 2013-08-16 15:58:00.267423925 +0200 +diff -up openssl-1.0.1i/doc/apps/verify.pod.trusted-first openssl-1.0.1i/doc/apps/verify.pod +--- openssl-1.0.1i/doc/apps/verify.pod.trusted-first 2014-08-06 23:10:56.000000000 +0200 ++++ openssl-1.0.1i/doc/apps/verify.pod 2014-08-07 13:54:27.756103429 +0200 @@ -9,6 +9,7 @@ verify - Utility to verify certificates. B B [B<-CApath directory>] diff --git a/openssl.git-96db902.patch b/openssl.git-96db902.patch deleted file mode 100644 index 6fed32a..0000000 --- a/openssl.git-96db902.patch +++ /dev/null @@ -1,108 +0,0 @@ -From: Dr. Stephen Henson -Date: Sat, 5 Apr 2014 23:51:06 +0000 (+0100) -Subject: Add heartbeat extension bounds check. -X-Git-Tag: OpenSSL_1_0_1g~3 -X-Git-Url: http://git.openssl.org/gitweb/?p=openssl.git;a=commitdiff_plain;h=96db902 - -Add heartbeat extension bounds check. - -A missing bounds check in the handling of the TLS heartbeat extension -can be used to reveal up to 64k of memory to a connected client or -server. - -Thanks for Neel Mehta of Google Security for discovering this bug and to -Adam Langley and Bodo Moeller for -preparing the fix (CVE-2014-0160) ---- - -diff --git a/ssl/d1_both.c b/ssl/d1_both.c -index 7a5596a..2e8cf68 100644 ---- a/ssl/d1_both.c -+++ b/ssl/d1_both.c -@@ -1459,26 +1459,36 @@ dtls1_process_heartbeat(SSL *s) - unsigned int payload; - unsigned int padding = 16; /* Use minimum padding */ - -- /* Read type and payload length first */ -- hbtype = *p++; -- n2s(p, payload); -- pl = p; -- - if (s->msg_callback) - s->msg_callback(0, s->version, TLS1_RT_HEARTBEAT, - &s->s3->rrec.data[0], s->s3->rrec.length, - s, s->msg_callback_arg); - -+ /* Read type and payload length first */ -+ if (1 + 2 + 16 > s->s3->rrec.length) -+ return 0; /* silently discard */ -+ hbtype = *p++; -+ n2s(p, payload); -+ if (1 + 2 + payload + 16 > s->s3->rrec.length) -+ return 0; /* silently discard per RFC 6520 sec. 4 */ -+ pl = p; -+ - if (hbtype == TLS1_HB_REQUEST) - { - unsigned char *buffer, *bp; -+ unsigned int write_length = 1 /* heartbeat type */ + -+ 2 /* heartbeat length */ + -+ payload + padding; - int r; - -+ if (write_length > SSL3_RT_MAX_PLAIN_LENGTH) -+ return 0; -+ - /* Allocate memory for the response, size is 1 byte - * message type, plus 2 bytes payload length, plus - * payload, plus padding - */ -- buffer = OPENSSL_malloc(1 + 2 + payload + padding); -+ buffer = OPENSSL_malloc(write_length); - bp = buffer; - - /* Enter response type, length and copy payload */ -@@ -1489,11 +1499,11 @@ dtls1_process_heartbeat(SSL *s) - /* Random padding */ - RAND_pseudo_bytes(bp, padding); - -- r = dtls1_write_bytes(s, TLS1_RT_HEARTBEAT, buffer, 3 + payload + padding); -+ r = dtls1_write_bytes(s, TLS1_RT_HEARTBEAT, buffer, write_length); - - if (r >= 0 && s->msg_callback) - s->msg_callback(1, s->version, TLS1_RT_HEARTBEAT, -- buffer, 3 + payload + padding, -+ buffer, write_length, - s, s->msg_callback_arg); - - OPENSSL_free(buffer); -diff --git a/ssl/t1_lib.c b/ssl/t1_lib.c -index b82fada..bddffd9 100644 ---- a/ssl/t1_lib.c -+++ b/ssl/t1_lib.c -@@ -2588,16 +2588,20 @@ tls1_process_heartbeat(SSL *s) - unsigned int payload; - unsigned int padding = 16; /* Use minimum padding */ - -- /* Read type and payload length first */ -- hbtype = *p++; -- n2s(p, payload); -- pl = p; -- - if (s->msg_callback) - s->msg_callback(0, s->version, TLS1_RT_HEARTBEAT, - &s->s3->rrec.data[0], s->s3->rrec.length, - s, s->msg_callback_arg); - -+ /* Read type and payload length first */ -+ if (1 + 2 + 16 > s->s3->rrec.length) -+ return 0; /* silently discard */ -+ hbtype = *p++; -+ n2s(p, payload); -+ if (1 + 2 + payload + 16 > s->s3->rrec.length) -+ return 0; /* silently discard per RFC 6520 sec. 4 */ -+ pl = p; -+ - if (hbtype == TLS1_HB_REQUEST) - { - unsigned char *buffer, *bp; diff --git a/sources b/sources index 556dcb4..b97a288 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -6115ae0bb61b481a9195baef72514c2e openssl-1.0.1e-hobbled.tar.xz +c152e5284765c3325301a62b01a48fc0 openssl-1.0.1i-hobbled.tar.xz