32489 lines
1.1 MiB
32489 lines
1.1 MiB
From 98550817e5afabb4b254cc838e1efe01ceea516e Mon Sep 17 00:00:00 2001
|
|
From: Gerd Hoffmann <kraxel@redhat.com>
|
|
Date: Mon, 7 Apr 2025 12:06:45 +0200
|
|
Subject: [PATCH] CryptoPkg/openssl: update generated files
|
|
|
|
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
|
|
---
|
|
.../AARCH64-GCC/crypto/aes/aesv8-armx.S | 6 +-
|
|
.../AARCH64-GCC/crypto/aes/bsaes-armv8.S | 26 +-
|
|
.../AARCH64-GCC/crypto/aes/vpaes-armv8.S | 53 +-
|
|
.../AARCH64-GCC/crypto/bn/armv8-mont.S | 1 +
|
|
.../crypto/ec/ecp_nistz256-armv8.S | 100 +-
|
|
.../AARCH64-GCC/crypto/md5/md5-aarch64.S | 128 +-
|
|
.../crypto/modes/aes-gcm-armv8_64.S | 1 +
|
|
.../AARCH64-GCC/crypto/modes/ghashv8-armx.S | 1 +
|
|
.../AARCH64-GCC/crypto/sha/keccak1600-armv8.S | 10 +-
|
|
.../AARCH64-GCC/crypto/sha/sha1-armv8.S | 6 +-
|
|
.../AARCH64-GCC/crypto/sha/sha256-armv8.S | 15 +-
|
|
.../AARCH64-GCC/crypto/sha/sha512-armv8.S | 12 +-
|
|
.../AARCH64-GCC/crypto/sm3/sm3-armv8.S | 15 +-
|
|
.../OpensslGen/IA32-GCC/crypto/aes/aes-586.S | 2 +-
|
|
.../IA32-GCC/crypto/aes/aesni-x86.S | 2 +-
|
|
.../OpensslGen/IA32-GCC/crypto/bn/bn-586.S | 2 +-
|
|
.../OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S | 2 +-
|
|
.../OpensslGen/IA32-GCC/crypto/bn/x86-mont.S | 2 +-
|
|
.../IA32-GCC/crypto/ec/ecp_nistz256-x86.S | 2 +-
|
|
.../OpensslGen/IA32-GCC/crypto/sha/sha1-586.S | 2 +-
|
|
.../IA32-GCC/crypto/sha/sha256-586.S | 2 +-
|
|
.../IA32-GCC/crypto/sha/sha512-586.S | 2 +-
|
|
.../OpensslGen/IA32-GCC/crypto/x86cpuid.S | 21 +-
|
|
.../IA32-MSFT/crypto/aes/aes-586.nasm | 2 +-
|
|
.../IA32-MSFT/crypto/aes/aesni-x86.nasm | 2 +-
|
|
.../IA32-MSFT/crypto/bn/bn-586.nasm | 2 +-
|
|
.../IA32-MSFT/crypto/bn/x86-gf2m.nasm | 2 +-
|
|
.../IA32-MSFT/crypto/bn/x86-mont.nasm | 2 +-
|
|
.../IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm | 2 +-
|
|
.../IA32-MSFT/crypto/sha/sha1-586.nasm | 2 +-
|
|
.../IA32-MSFT/crypto/sha/sha256-586.nasm | 2 +-
|
|
.../IA32-MSFT/crypto/sha/sha512-586.nasm | 2 +-
|
|
.../OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm | 21 +-
|
|
.../X64-GCC/crypto/aes/aesni-xts-avx512.s | 8145 ++++++++++++++++
|
|
.../X64-GCC/crypto/bn/rsaz-2k-avxifma.s | 1167 +++
|
|
.../X64-GCC/crypto/bn/rsaz-3k-avxifma.s | 1768 ++++
|
|
.../X64-GCC/crypto/bn/rsaz-4k-avxifma.s | 1922 ++++
|
|
.../X64-GCC/crypto/md5/md5-x86_64.s | 32 +-
|
|
.../OpensslGen/X64-GCC/crypto/x86_64cpuid.s | 30 +-
|
|
.../X64-MSFT/crypto/aes/aesni-xts-avx512.nasm | 8350 +++++++++++++++++
|
|
.../X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm | 1276 +++
|
|
.../X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm | 1927 ++++
|
|
.../X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm | 2081 ++++
|
|
.../X64-MSFT/crypto/md5/md5-x86_64.nasm | 32 +-
|
|
.../X64-MSFT/crypto/x86_64cpuid.nasm | 30 +-
|
|
.../OpensslLib/OpensslGen/crypto/params_idx.c | 151 +-
|
|
.../OpensslGen/include/internal/param_names.h | 607 +-
|
|
.../OpensslGen/include/openssl/asn1.h | 31 +-
|
|
.../OpensslGen/include/openssl/bio.h | 47 +-
|
|
.../OpensslGen/include/openssl/cms.h | 5 +-
|
|
.../include/openssl/configuration-ec.h | 12 +
|
|
.../include/openssl/configuration-noec.h | 12 +
|
|
.../OpensslGen/include/openssl/core_names.h | 32 +-
|
|
.../OpensslGen/include/openssl/crmf.h | 30 +-
|
|
.../OpensslGen/include/openssl/crypto.h | 3 +
|
|
.../OpensslGen/include/openssl/opensslv.h | 12 +-
|
|
.../OpensslGen/include/openssl/ssl.h | 59 +-
|
|
.../OpensslGen/include/openssl/x509_acert.h | 31 +
|
|
.../OpensslGen/include/openssl/x509_vfy.h | 3 +-
|
|
.../OpensslGen/include/openssl/x509v3.h | 480 +-
|
|
.../providers/common/der/der_ml_dsa_gen.c | 37 +
|
|
.../providers/common/der/der_slh_dsa_gen.c | 100 +
|
|
.../common/include/prov/der_ml_dsa.h | 40 +
|
|
.../common/include/prov/der_slh_dsa.h | 103 +
|
|
CryptoPkg/Library/OpensslLib/OpensslLib.inf | 47 +-
|
|
.../Library/OpensslLib/OpensslLibAccel.inf | 143 +-
|
|
.../Library/OpensslLib/OpensslLibCrypto.inf | 44 +
|
|
.../Library/OpensslLib/OpensslLibFull.inf | 49 +-
|
|
.../OpensslLib/OpensslLibFullAccel.inf | 149 +-
|
|
69 files changed, 28877 insertions(+), 562 deletions(-)
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h
|
|
create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h
|
|
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S
|
|
index 96a66379e1..31c750f875 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S
|
|
@@ -3,12 +3,13 @@
|
|
#if __ARM_MAX_ARCH__>=7
|
|
.arch armv8-a+crypto
|
|
.text
|
|
+.section .rodata
|
|
.align 5
|
|
.Lrcon:
|
|
.long 0x01,0x01,0x01,0x01
|
|
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
|
|
.long 0x1b,0x1b,0x1b,0x1b
|
|
-
|
|
+.previous
|
|
.globl aes_v8_set_encrypt_key
|
|
.type aes_v8_set_encrypt_key,%function
|
|
.align 5
|
|
@@ -31,7 +32,8 @@ aes_v8_set_encrypt_key:
|
|
tst w1,#0x3f
|
|
b.ne .Lenc_key_abort
|
|
|
|
- adr x3,.Lrcon
|
|
+ adrp x3,.Lrcon
|
|
+ add x3,x3,#:lo12:.Lrcon
|
|
cmp w1,#192
|
|
|
|
eor v0.16b,v0.16b,v0.16b
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S
|
|
index 8a8668262a..bb05512c17 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S
|
|
@@ -1,4 +1,4 @@
|
|
-// Copyright 2021-2024 The OpenSSL Project Authors. All Rights Reserved.
|
|
+// Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the OpenSSL license (the "License"). You may not use
|
|
// this file except in compliance with the License. You can obtain a copy
|
|
@@ -44,7 +44,8 @@
|
|
// other SIMD registers corrupted
|
|
_bsaes_decrypt8:
|
|
ldr q8, [x9], #16
|
|
- adr x11, .LM0ISR
|
|
+ adrp x11, .LM0ISR
|
|
+ add x11, x11, #:lo12:.LM0ISR
|
|
movi v9.16b, #0x55
|
|
ldr q10, [x11], #16
|
|
movi v16.16b, #0x33
|
|
@@ -460,9 +461,10 @@ _bsaes_decrypt8:
|
|
ret
|
|
.size _bsaes_decrypt8,.-_bsaes_decrypt8
|
|
|
|
-.type _bsaes_const,%object
|
|
+.section .rodata
|
|
+.type _bsaes_consts,%object
|
|
.align 6
|
|
-_bsaes_const:
|
|
+_bsaes_consts:
|
|
// InvShiftRows constants
|
|
// Used in _bsaes_decrypt8, which assumes contiguity
|
|
// .LM0ISR used with round 0 key
|
|
@@ -498,7 +500,9 @@ _bsaes_const:
|
|
.quad 0x090d01050c000408, 0x03070b0f060a0e02
|
|
|
|
.align 6
|
|
-.size _bsaes_const,.-_bsaes_const
|
|
+.size _bsaes_consts,.-_bsaes_consts
|
|
+
|
|
+.previous
|
|
|
|
.type _bsaes_encrypt8,%function
|
|
.align 4
|
|
@@ -514,7 +518,8 @@ _bsaes_const:
|
|
// other SIMD registers corrupted
|
|
_bsaes_encrypt8:
|
|
ldr q8, [x9], #16
|
|
- adr x11, .LM0SR
|
|
+ adrp x11, .LM0SR
|
|
+ add x11, x11, #:lo12:.LM0SR
|
|
ldr q9, [x11], #16
|
|
_bsaes_encrypt8_alt:
|
|
eor v0.16b, v0.16b, v8.16b
|
|
@@ -918,9 +923,11 @@ _bsaes_encrypt8_alt:
|
|
// other SIMD registers corrupted
|
|
_bsaes_key_convert:
|
|
#ifdef __AARCH64EL__
|
|
- adr x11, .LM0_littleendian
|
|
+ adrp x11, .LM0_littleendian
|
|
+ add x11, x11, #:lo12:.LM0_littleendian
|
|
#else
|
|
- adr x11, .LM0_bigendian
|
|
+ adrp x11, .LM0_bigendian
|
|
+ add x11, x11, #:lo12:.LM0_bigendian
|
|
#endif
|
|
ldr q0, [x9], #16 // load round 0 key
|
|
ldr q1, [x11] // .LM0
|
|
@@ -964,7 +971,8 @@ _bsaes_key_convert:
|
|
// don't save last round key
|
|
#ifdef __AARCH64EL__
|
|
rev32 v15.16b, v15.16b
|
|
- adr x11, .LM0_bigendian
|
|
+ adrp x11, .LM0_bigendian
|
|
+ add x11, x11, #:lo12:.LM0_bigendian
|
|
#endif
|
|
ret
|
|
.size _bsaes_key_convert,.-_bsaes_key_convert
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S
|
|
index 9aef5acd86..e78961d334 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S
|
|
@@ -1,6 +1,6 @@
|
|
#include "arm_arch.h"
|
|
|
|
-.text
|
|
+.section .rodata
|
|
|
|
.type _vpaes_consts,%object
|
|
.align 7 // totally strategic alignment
|
|
@@ -92,6 +92,9 @@ _vpaes_consts:
|
|
.align 2
|
|
.size _vpaes_consts,.-_vpaes_consts
|
|
.align 6
|
|
+
|
|
+.text
|
|
+
|
|
//
|
|
// _aes_preheat
|
|
//
|
|
@@ -101,7 +104,8 @@ _vpaes_consts:
|
|
.type _vpaes_encrypt_preheat,%function
|
|
.align 4
|
|
_vpaes_encrypt_preheat:
|
|
- adr x10, .Lk_inv
|
|
+ adrp x10, .Lk_inv
|
|
+ add x10, x10, #:lo12:.Lk_inv
|
|
movi v17.16b, #0x0f
|
|
ld1 {v18.2d,v19.2d}, [x10],#32 // .Lk_inv
|
|
ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo
|
|
@@ -129,7 +133,8 @@ _vpaes_encrypt_preheat:
|
|
_vpaes_encrypt_core:
|
|
mov x9, x2
|
|
ldr w8, [x2,#240] // pull rounds
|
|
- adr x11, .Lk_mc_forward+16
|
|
+ adrp x11, .Lk_mc_forward+16
|
|
+ add x11, x11, #:lo12:.Lk_mc_forward+16
|
|
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
|
|
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
|
|
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
|
@@ -216,7 +221,8 @@ vpaes_encrypt:
|
|
_vpaes_encrypt_2x:
|
|
mov x9, x2
|
|
ldr w8, [x2,#240] // pull rounds
|
|
- adr x11, .Lk_mc_forward+16
|
|
+ adrp x11, .Lk_mc_forward+16
|
|
+ add x11, x11, #:lo12:.Lk_mc_forward+16
|
|
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
|
|
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
|
|
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
|
@@ -319,9 +325,11 @@ _vpaes_encrypt_2x:
|
|
.type _vpaes_decrypt_preheat,%function
|
|
.align 4
|
|
_vpaes_decrypt_preheat:
|
|
- adr x10, .Lk_inv
|
|
+ adrp x10, .Lk_inv
|
|
+ add x10, x10, #:lo12:.Lk_inv
|
|
movi v17.16b, #0x0f
|
|
- adr x11, .Lk_dipt
|
|
+ adrp x11, .Lk_dipt
|
|
+ add x11, x11, #:lo12:.Lk_dipt
|
|
ld1 {v18.2d,v19.2d}, [x10],#32 // .Lk_inv
|
|
ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo
|
|
ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd
|
|
@@ -343,10 +351,12 @@ _vpaes_decrypt_core:
|
|
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
|
|
lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11
|
|
eor x11, x11, #0x30 // xor $0x30, %r11
|
|
- adr x10, .Lk_sr
|
|
+ adrp x10, .Lk_sr
|
|
+ add x10, x10, #:lo12:.Lk_sr
|
|
and x11, x11, #0x30 // and $0x30, %r11
|
|
add x11, x11, x10
|
|
- adr x10, .Lk_mc_forward+48
|
|
+ adrp x10, .Lk_mc_forward+48
|
|
+ add x10, x10, #:lo12:.Lk_mc_forward+48
|
|
|
|
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
|
|
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
|
@@ -454,10 +464,12 @@ _vpaes_decrypt_2x:
|
|
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
|
|
lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11
|
|
eor x11, x11, #0x30 // xor $0x30, %r11
|
|
- adr x10, .Lk_sr
|
|
+ adrp x10, .Lk_sr
|
|
+ add x10, x10, #:lo12:.Lk_sr
|
|
and x11, x11, #0x30 // and $0x30, %r11
|
|
add x11, x11, x10
|
|
- adr x10, .Lk_mc_forward+48
|
|
+ adrp x10, .Lk_mc_forward+48
|
|
+ add x10, x10, #:lo12:.Lk_mc_forward+48
|
|
|
|
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
|
|
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
|
@@ -586,14 +598,18 @@ _vpaes_decrypt_2x:
|
|
.type _vpaes_key_preheat,%function
|
|
.align 4
|
|
_vpaes_key_preheat:
|
|
- adr x10, .Lk_inv
|
|
+ adrp x10, .Lk_inv
|
|
+ add x10, x10, #:lo12:.Lk_inv
|
|
movi v16.16b, #0x5b // .Lk_s63
|
|
- adr x11, .Lk_sb1
|
|
+ adrp x11, .Lk_sb1
|
|
+ add x11, x11, #:lo12:.Lk_sb1
|
|
movi v17.16b, #0x0f // .Lk_s0F
|
|
ld1 {v18.2d,v19.2d,v20.2d,v21.2d}, [x10] // .Lk_inv, .Lk_ipt
|
|
- adr x10, .Lk_dksd
|
|
+ adrp x10, .Lk_dksd
|
|
+ add x10, x10, #:lo12:.Lk_dksd
|
|
ld1 {v22.2d,v23.2d}, [x11] // .Lk_sb1
|
|
- adr x11, .Lk_mc_forward
|
|
+ adrp x11, .Lk_mc_forward
|
|
+ add x11, x11, #:lo12:.Lk_mc_forward
|
|
ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb
|
|
ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9
|
|
ld1 {v8.2d}, [x10] // .Lk_rcon
|
|
@@ -617,7 +633,8 @@ _vpaes_schedule_core:
|
|
bl _vpaes_schedule_transform
|
|
mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7
|
|
|
|
- adr x10, .Lk_sr // lea .Lk_sr(%rip),%r10
|
|
+ adrp x10, .Lk_sr
|
|
+ add x10, x10, #:lo12:.Lk_sr
|
|
add x8, x8, x10
|
|
cbnz w3, .Lschedule_am_decrypting
|
|
|
|
@@ -743,12 +760,14 @@ _vpaes_schedule_core:
|
|
.align 4
|
|
.Lschedule_mangle_last:
|
|
// schedule last round key from xmm0
|
|
- adr x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
|
|
+ adrp x11, .Lk_deskew
|
|
+ add x11, x11, #:lo12:.Lk_deskew
|
|
cbnz w3, .Lschedule_mangle_last_dec
|
|
|
|
// encrypting
|
|
ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1
|
|
- adr x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
|
|
+ adrp x11, .Lk_opt
|
|
+ add x11, x11, #:lo12:.Lk_opt
|
|
add x2, x2, #32 // add $32, %rdx
|
|
tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute
|
|
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S
|
|
index 111de65451..98c5b76576 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S
|
|
@@ -2130,6 +2130,7 @@ __bn_mul4x_mont:
|
|
AARCH64_VALIDATE_LINK_REGISTER
|
|
ret
|
|
.size __bn_mul4x_mont,.-__bn_mul4x_mont
|
|
+.section .rodata
|
|
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
.align 2
|
|
.align 4
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S
|
|
index 6fe86a4020..8c42109669 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S
|
|
@@ -1,6 +1,6 @@
|
|
#include "arm_arch.h"
|
|
|
|
-.text
|
|
+.section .rodata
|
|
.globl ecp_nistz256_precomputed
|
|
.type ecp_nistz256_precomputed,%object
|
|
.align 12
|
|
@@ -2390,6 +2390,8 @@ ecp_nistz256_precomputed:
|
|
.byte 69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
.align 2
|
|
|
|
+.text
|
|
+
|
|
// void ecp_nistz256_to_mont(BN_ULONG x0[4],const BN_ULONG x1[4]);
|
|
.globl ecp_nistz256_to_mont
|
|
.type ecp_nistz256_to_mont,%function
|
|
@@ -2400,12 +2402,16 @@ ecp_nistz256_to_mont:
|
|
add x29,sp,#0
|
|
stp x19,x20,[sp,#16]
|
|
|
|
- ldr x3,.LRR // bp[0]
|
|
+ adrp x3,.LRR
|
|
+ ldr x3,[x3,#:lo12:.LRR] // bp[0]
|
|
ldp x4,x5,[x1]
|
|
ldp x6,x7,[x1,#16]
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
- adr x2,.LRR // &bp[0]
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
+ adrp x2,.LRR
|
|
+ add x2,x2,#:lo12:.LRR
|
|
|
|
bl __ecp_nistz256_mul_mont
|
|
|
|
@@ -2428,9 +2434,12 @@ ecp_nistz256_from_mont:
|
|
mov x3,#1 // bp[0]
|
|
ldp x4,x5,[x1]
|
|
ldp x6,x7,[x1,#16]
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
- adr x2,.Lone // &bp[0]
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
+ adrp x2,.Lone
|
|
+ add x2,x2,#:lo12:.Lone
|
|
|
|
bl __ecp_nistz256_mul_mont
|
|
|
|
@@ -2454,8 +2463,10 @@ ecp_nistz256_mul_mont:
|
|
ldr x3,[x2] // bp[0]
|
|
ldp x4,x5,[x1]
|
|
ldp x6,x7,[x1,#16]
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
|
|
bl __ecp_nistz256_mul_mont
|
|
|
|
@@ -2477,8 +2488,10 @@ ecp_nistz256_sqr_mont:
|
|
|
|
ldp x4,x5,[x1]
|
|
ldp x6,x7,[x1,#16]
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
|
|
bl __ecp_nistz256_sqr_mont
|
|
|
|
@@ -2502,8 +2515,10 @@ ecp_nistz256_add:
|
|
ldp x8,x9,[x2]
|
|
ldp x16,x17,[x1,#16]
|
|
ldp x10,x11,[x2,#16]
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
|
|
bl __ecp_nistz256_add
|
|
|
|
@@ -2523,8 +2538,10 @@ ecp_nistz256_div_by_2:
|
|
|
|
ldp x14,x15,[x1]
|
|
ldp x16,x17,[x1,#16]
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
|
|
bl __ecp_nistz256_div_by_2
|
|
|
|
@@ -2544,8 +2561,10 @@ ecp_nistz256_mul_by_2:
|
|
|
|
ldp x14,x15,[x1]
|
|
ldp x16,x17,[x1,#16]
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
mov x8,x14
|
|
mov x9,x15
|
|
mov x10,x16
|
|
@@ -2569,8 +2588,10 @@ ecp_nistz256_mul_by_3:
|
|
|
|
ldp x14,x15,[x1]
|
|
ldp x16,x17,[x1,#16]
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
mov x8,x14
|
|
mov x9,x15
|
|
mov x10,x16
|
|
@@ -2606,8 +2627,10 @@ ecp_nistz256_sub:
|
|
|
|
ldp x14,x15,[x1]
|
|
ldp x16,x17,[x1,#16]
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
|
|
bl __ecp_nistz256_sub_from
|
|
|
|
@@ -2630,8 +2653,10 @@ ecp_nistz256_neg:
|
|
mov x15,xzr
|
|
mov x16,xzr
|
|
mov x17,xzr
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
|
|
bl __ecp_nistz256_sub_from
|
|
|
|
@@ -3026,9 +3051,11 @@ ecp_nistz256_point_double:
|
|
mov x21,x0
|
|
ldp x16,x17,[x1,#48]
|
|
mov x22,x1
|
|
- ldr x12,.Lpoly+8
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
mov x8,x14
|
|
- ldr x13,.Lpoly+24
|
|
+ ldr x13,[x13,#24]
|
|
mov x9,x15
|
|
ldp x4,x5,[x22,#64] // forward load for p256_sqr_mont
|
|
mov x10,x16
|
|
@@ -3171,8 +3198,10 @@ ecp_nistz256_point_add:
|
|
mov x21,x0
|
|
mov x22,x1
|
|
mov x23,x2
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
orr x8,x4,x5
|
|
orr x10,x6,x7
|
|
orr x25,x8,x10
|
|
@@ -3422,8 +3451,10 @@ ecp_nistz256_point_add_affine:
|
|
mov x21,x0
|
|
mov x22,x1
|
|
mov x23,x2
|
|
- ldr x12,.Lpoly+8
|
|
- ldr x13,.Lpoly+24
|
|
+ adrp x13,.Lpoly
|
|
+ add x13,x13,#:lo12:.Lpoly
|
|
+ ldr x12,[x13,#8]
|
|
+ ldr x13,[x13,#24]
|
|
|
|
ldp x4,x5,[x1,#64] // in1_z
|
|
ldp x6,x7,[x1,#64+16]
|
|
@@ -3569,7 +3600,8 @@ ecp_nistz256_point_add_affine:
|
|
ldp x10,x11,[x23,#0+48]
|
|
stp x14,x15,[x21,#0]
|
|
stp x16,x17,[x21,#0+16]
|
|
- adr x23,.Lone_mont-64
|
|
+ adrp x23,.Lone_mont-64
|
|
+ add x23,x23,#:lo12:.Lone_mont-64
|
|
ldp x14,x15,[x22,#32] // in1
|
|
cmp x24,#0 // ~, remember?
|
|
ldp x16,x17,[x22,#32+16]
|
|
@@ -3627,7 +3659,8 @@ ecp_nistz256_ord_mul_mont:
|
|
stp x21,x22,[sp,#32]
|
|
stp x23,x24,[sp,#48]
|
|
|
|
- adr x23,.Lord
|
|
+ adrp x23,.Lord
|
|
+ add x23,x23,#:lo12:.Lord
|
|
ldr x3,[x2] // bp[0]
|
|
ldp x4,x5,[x1]
|
|
ldp x6,x7,[x1,#16]
|
|
@@ -3837,7 +3870,8 @@ ecp_nistz256_ord_sqr_mont:
|
|
stp x21,x22,[sp,#32]
|
|
stp x23,x24,[sp,#48]
|
|
|
|
- adr x23,.Lord
|
|
+ adrp x23,.Lord
|
|
+ add x23,x23,#:lo12:.Lord
|
|
ldp x4,x5,[x1]
|
|
ldp x6,x7,[x1,#16]
|
|
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S
|
|
index 7045e31f18..76e4de346a 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S
|
|
@@ -207,165 +207,165 @@ ossl_md5_blocks_loop:
|
|
add w9, w9, w13 // Add constant 0x49b40821
|
|
add w9, w9, w6 // Add aux function result
|
|
ror w9, w9, #10 // Rotate left s=22 bits
|
|
- bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x8, x17 // Aux function round 2 (~z & y)
|
|
add w9, w8, w9 // Add X parameter round 1 B=FF(B, C, D, A, 0x49b40821, s=22, M[15])
|
|
- and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0x2562 // .Load lower half of constant 0xf61e2562
|
|
movk x13, #0xf61e, lsl #16 // .Load upper half of constant 0xf61e2562
|
|
add w4, w4, w20 // Add dest value
|
|
add w4, w4, w13 // Add constant 0xf61e2562
|
|
- add w4, w4, w6 // Add aux function result
|
|
+ and x13, x9, x17 // Aux function round 2 (x & z)
|
|
+ add w4, w4, w6 // Add (~z & y)
|
|
+ add w4, w4, w13 // Add (x & z)
|
|
ror w4, w4, #27 // Rotate left s=5 bits
|
|
- bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x9, x8 // Aux function round 2 (~z & y)
|
|
add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xf61e2562, s=5, M[1])
|
|
- and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0xb340 // .Load lower half of constant 0xc040b340
|
|
movk x13, #0xc040, lsl #16 // .Load upper half of constant 0xc040b340
|
|
add w17, w17, w7 // Add dest value
|
|
add w17, w17, w13 // Add constant 0xc040b340
|
|
- add w17, w17, w6 // Add aux function result
|
|
+ and x13, x4, x8 // Aux function round 2 (x & z)
|
|
+ add w17, w17, w6 // Add (~z & y)
|
|
+ add w17, w17, w13 // Add (x & z)
|
|
ror w17, w17, #23 // Rotate left s=9 bits
|
|
- bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x4, x9 // Aux function round 2 (~z & y)
|
|
add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xc040b340, s=9, M[6])
|
|
- and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0x5a51 // .Load lower half of constant 0x265e5a51
|
|
movk x13, #0x265e, lsl #16 // .Load upper half of constant 0x265e5a51
|
|
add w8, w8, w25 // Add dest value
|
|
add w8, w8, w13 // Add constant 0x265e5a51
|
|
- add w8, w8, w6 // Add aux function result
|
|
+ and x13, x17, x9 // Aux function round 2 (x & z)
|
|
+ add w8, w8, w6 // Add (~z & y)
|
|
+ add w8, w8, w13 // Add (x & z)
|
|
ror w8, w8, #18 // Rotate left s=14 bits
|
|
- bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x17, x4 // Aux function round 2 (~z & y)
|
|
add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0x265e5a51, s=14, M[11])
|
|
- and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0xc7aa // .Load lower half of constant 0xe9b6c7aa
|
|
movk x13, #0xe9b6, lsl #16 // .Load upper half of constant 0xe9b6c7aa
|
|
add w9, w9, w15 // Add dest value
|
|
add w9, w9, w13 // Add constant 0xe9b6c7aa
|
|
- add w9, w9, w6 // Add aux function result
|
|
+ and x13, x8, x4 // Aux function round 2 (x & z)
|
|
+ add w9, w9, w6 // Add (~z & y)
|
|
+ add w9, w9, w13 // Add (x & z)
|
|
ror w9, w9, #12 // Rotate left s=20 bits
|
|
- bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x8, x17 // Aux function round 2 (~z & y)
|
|
add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0xe9b6c7aa, s=20, M[0])
|
|
- and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0x105d // .Load lower half of constant 0xd62f105d
|
|
movk x13, #0xd62f, lsl #16 // .Load upper half of constant 0xd62f105d
|
|
add w4, w4, w22 // Add dest value
|
|
add w4, w4, w13 // Add constant 0xd62f105d
|
|
- add w4, w4, w6 // Add aux function result
|
|
+ and x13, x9, x17 // Aux function round 2 (x & z)
|
|
+ add w4, w4, w6 // Add (~z & y)
|
|
+ add w4, w4, w13 // Add (x & z)
|
|
ror w4, w4, #27 // Rotate left s=5 bits
|
|
- bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x9, x8 // Aux function round 2 (~z & y)
|
|
add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xd62f105d, s=5, M[5])
|
|
- and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0x1453 // .Load lower half of constant 0x2441453
|
|
movk x13, #0x244, lsl #16 // .Load upper half of constant 0x2441453
|
|
add w17, w17, w16 // Add dest value
|
|
add w17, w17, w13 // Add constant 0x2441453
|
|
- add w17, w17, w6 // Add aux function result
|
|
+ and x13, x4, x8 // Aux function round 2 (x & z)
|
|
+ add w17, w17, w6 // Add (~z & y)
|
|
+ add w17, w17, w13 // Add (x & z)
|
|
ror w17, w17, #23 // Rotate left s=9 bits
|
|
- bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x4, x9 // Aux function round 2 (~z & y)
|
|
add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0x2441453, s=9, M[10])
|
|
- and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0xe681 // .Load lower half of constant 0xd8a1e681
|
|
movk x13, #0xd8a1, lsl #16 // .Load upper half of constant 0xd8a1e681
|
|
add w8, w8, w27 // Add dest value
|
|
add w8, w8, w13 // Add constant 0xd8a1e681
|
|
- add w8, w8, w6 // Add aux function result
|
|
+ and x13, x17, x9 // Aux function round 2 (x & z)
|
|
+ add w8, w8, w6 // Add (~z & y)
|
|
+ add w8, w8, w13 // Add (x & z)
|
|
ror w8, w8, #18 // Rotate left s=14 bits
|
|
- bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x17, x4 // Aux function round 2 (~z & y)
|
|
add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0xd8a1e681, s=14, M[15])
|
|
- and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0xfbc8 // .Load lower half of constant 0xe7d3fbc8
|
|
movk x13, #0xe7d3, lsl #16 // .Load upper half of constant 0xe7d3fbc8
|
|
add w9, w9, w14 // Add dest value
|
|
add w9, w9, w13 // Add constant 0xe7d3fbc8
|
|
- add w9, w9, w6 // Add aux function result
|
|
+ and x13, x8, x4 // Aux function round 2 (x & z)
|
|
+ add w9, w9, w6 // Add (~z & y)
|
|
+ add w9, w9, w13 // Add (x & z)
|
|
ror w9, w9, #12 // Rotate left s=20 bits
|
|
- bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x8, x17 // Aux function round 2 (~z & y)
|
|
add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0xe7d3fbc8, s=20, M[4])
|
|
- and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0xcde6 // .Load lower half of constant 0x21e1cde6
|
|
movk x13, #0x21e1, lsl #16 // .Load upper half of constant 0x21e1cde6
|
|
add w4, w4, w24 // Add dest value
|
|
add w4, w4, w13 // Add constant 0x21e1cde6
|
|
- add w4, w4, w6 // Add aux function result
|
|
+ and x13, x9, x17 // Aux function round 2 (x & z)
|
|
+ add w4, w4, w6 // Add (~z & y)
|
|
+ add w4, w4, w13 // Add (x & z)
|
|
ror w4, w4, #27 // Rotate left s=5 bits
|
|
- bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x9, x8 // Aux function round 2 (~z & y)
|
|
add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0x21e1cde6, s=5, M[9])
|
|
- and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0x7d6 // .Load lower half of constant 0xc33707d6
|
|
movk x13, #0xc337, lsl #16 // .Load upper half of constant 0xc33707d6
|
|
add w17, w17, w12 // Add dest value
|
|
add w17, w17, w13 // Add constant 0xc33707d6
|
|
- add w17, w17, w6 // Add aux function result
|
|
+ and x13, x4, x8 // Aux function round 2 (x & z)
|
|
+ add w17, w17, w6 // Add (~z & y)
|
|
+ add w17, w17, w13 // Add (x & z)
|
|
ror w17, w17, #23 // Rotate left s=9 bits
|
|
- bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x4, x9 // Aux function round 2 (~z & y)
|
|
add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xc33707d6, s=9, M[14])
|
|
- and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0xd87 // .Load lower half of constant 0xf4d50d87
|
|
movk x13, #0xf4d5, lsl #16 // .Load upper half of constant 0xf4d50d87
|
|
add w8, w8, w21 // Add dest value
|
|
add w8, w8, w13 // Add constant 0xf4d50d87
|
|
- add w8, w8, w6 // Add aux function result
|
|
+ and x13, x17, x9 // Aux function round 2 (x & z)
|
|
+ add w8, w8, w6 // Add (~z & y)
|
|
+ add w8, w8, w13 // Add (x & z)
|
|
ror w8, w8, #18 // Rotate left s=14 bits
|
|
- bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x17, x4 // Aux function round 2 (~z & y)
|
|
add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0xf4d50d87, s=14, M[3])
|
|
- and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0x14ed // .Load lower half of constant 0x455a14ed
|
|
movk x13, #0x455a, lsl #16 // .Load upper half of constant 0x455a14ed
|
|
add w9, w9, w5 // Add dest value
|
|
add w9, w9, w13 // Add constant 0x455a14ed
|
|
- add w9, w9, w6 // Add aux function result
|
|
+ and x13, x8, x4 // Aux function round 2 (x & z)
|
|
+ add w9, w9, w6 // Add (~z & y)
|
|
+ add w9, w9, w13 // Add (x & z)
|
|
ror w9, w9, #12 // Rotate left s=20 bits
|
|
- bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x8, x17 // Aux function round 2 (~z & y)
|
|
add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0x455a14ed, s=20, M[8])
|
|
- and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0xe905 // .Load lower half of constant 0xa9e3e905
|
|
movk x13, #0xa9e3, lsl #16 // .Load upper half of constant 0xa9e3e905
|
|
add w4, w4, w26 // Add dest value
|
|
add w4, w4, w13 // Add constant 0xa9e3e905
|
|
- add w4, w4, w6 // Add aux function result
|
|
+ and x13, x9, x17 // Aux function round 2 (x & z)
|
|
+ add w4, w4, w6 // Add (~z & y)
|
|
+ add w4, w4, w13 // Add (x & z)
|
|
ror w4, w4, #27 // Rotate left s=5 bits
|
|
- bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x9, x8 // Aux function round 2 (~z & y)
|
|
add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xa9e3e905, s=5, M[13])
|
|
- and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0xa3f8 // .Load lower half of constant 0xfcefa3f8
|
|
movk x13, #0xfcef, lsl #16 // .Load upper half of constant 0xfcefa3f8
|
|
add w17, w17, w3 // Add dest value
|
|
add w17, w17, w13 // Add constant 0xfcefa3f8
|
|
- add w17, w17, w6 // Add aux function result
|
|
+ and x13, x4, x8 // Aux function round 2 (x & z)
|
|
+ add w17, w17, w6 // Add (~z & y)
|
|
+ add w17, w17, w13 // Add (x & z)
|
|
ror w17, w17, #23 // Rotate left s=9 bits
|
|
- bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x4, x9 // Aux function round 2 (~z & y)
|
|
add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xfcefa3f8, s=9, M[2])
|
|
- and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0x2d9 // .Load lower half of constant 0x676f02d9
|
|
movk x13, #0x676f, lsl #16 // .Load upper half of constant 0x676f02d9
|
|
add w8, w8, w23 // Add dest value
|
|
add w8, w8, w13 // Add constant 0x676f02d9
|
|
- add w8, w8, w6 // Add aux function result
|
|
+ and x13, x17, x9 // Aux function round 2 (x & z)
|
|
+ add w8, w8, w6 // Add (~z & y)
|
|
+ add w8, w8, w13 // Add (x & z)
|
|
ror w8, w8, #18 // Rotate left s=14 bits
|
|
- bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
+ bic x6, x17, x4 // Aux function round 2 (~z & y)
|
|
add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0x676f02d9, s=14, M[7])
|
|
- and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
- orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y))
|
|
movz x13, #0x4c8a // .Load lower half of constant 0x8d2a4c8a
|
|
movk x13, #0x8d2a, lsl #16 // .Load upper half of constant 0x8d2a4c8a
|
|
add w9, w9, w11 // Add dest value
|
|
add w9, w9, w13 // Add constant 0x8d2a4c8a
|
|
- add w9, w9, w6 // Add aux function result
|
|
+ and x13, x8, x4 // Aux function round 2 (x & z)
|
|
+ add w9, w9, w6 // Add (~z & y)
|
|
+ add w9, w9, w13 // Add (x & z)
|
|
eor x6, x8, x17 // Begin aux function round 3 H(x,y,z)=(x^y^z)
|
|
ror w9, w9, #12 // Rotate left s=20 bits
|
|
movz x10, #0x3942 // .Load lower half of constant 0xfffa3942
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S
|
|
index 75e29532bc..117c52ebc0 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S
|
|
@@ -6389,6 +6389,7 @@ aes_gcm_dec_256_kernel:
|
|
mov w0, #0x0
|
|
ret
|
|
.size aes_gcm_dec_256_kernel,.-aes_gcm_dec_256_kernel
|
|
+.section .rodata
|
|
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
.align 2
|
|
.align 2
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S
|
|
index 9553790361..0b50e2af31 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S
|
|
@@ -637,6 +637,7 @@ gcm_ghash_v8_4x:
|
|
|
|
ret
|
|
.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
|
|
+.section .rodata
|
|
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
.align 2
|
|
.align 2
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S
|
|
index a6f0f603a0..a4f0f0f7d0 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S
|
|
@@ -1,6 +1,6 @@
|
|
#include "arm_arch.h"
|
|
|
|
-.text
|
|
+.section .rodata
|
|
|
|
.align 8 // strategic alignment and padding that allows to use
|
|
// address value as loop termination condition...
|
|
@@ -32,11 +32,14 @@ iotas:
|
|
.quad 0x0000000080000001
|
|
.quad 0x8000000080008008
|
|
.size iotas,.-iotas
|
|
+.text
|
|
+
|
|
.type KeccakF1600_int,%function
|
|
.align 5
|
|
KeccakF1600_int:
|
|
AARCH64_SIGN_LINK_REGISTER
|
|
- adr x28,iotas
|
|
+ adrp x28,iotas
|
|
+ add x28,x28,#:lo12:iotas
|
|
stp x28,x30,[sp,#16] // 32 bytes on top are mine
|
|
b .Loop
|
|
.align 4
|
|
@@ -578,7 +581,8 @@ SHA3_squeeze:
|
|
.align 5
|
|
KeccakF1600_ce:
|
|
mov x9,#24
|
|
- adr x10,iotas
|
|
+ adrp x10,iotas
|
|
+ add x10,x10,#:lo12:iotas
|
|
b .Loop_ce
|
|
.align 4
|
|
.Loop_ce:
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S
|
|
index 42fc0a74c1..507262b368 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S
|
|
@@ -1080,7 +1080,8 @@ sha1_block_armv8:
|
|
stp x29,x30,[sp,#-16]!
|
|
add x29,sp,#0
|
|
|
|
- adr x4,.Lconst
|
|
+ adrp x4,.Lconst
|
|
+ add x4,x4,#:lo12:.Lconst
|
|
eor v1.16b,v1.16b,v1.16b
|
|
ld1 {v0.4s},[x0],#16
|
|
ld1 {v1.s}[0],[x0]
|
|
@@ -1203,6 +1204,9 @@ sha1_block_armv8:
|
|
ldr x29,[sp],#16
|
|
ret
|
|
.size sha1_block_armv8,.-sha1_block_armv8
|
|
+
|
|
+.section .rodata
|
|
+
|
|
.align 6
|
|
.Lconst:
|
|
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S
|
|
index 9d397fae8f..b0644b849d 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S
|
|
@@ -1,4 +1,4 @@
|
|
-// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
|
|
+// Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
// this file except in compliance with the License. You can obtain a copy
|
|
@@ -92,7 +92,8 @@ sha256_block_data_order:
|
|
ldp w24,w25,[x0,#4*4]
|
|
add x2,x1,x2,lsl#6 // end of input
|
|
ldp w26,w27,[x0,#6*4]
|
|
- adr x30,.LK256
|
|
+ adrp x30,.LK256
|
|
+ add x30,x30,#:lo12:.LK256
|
|
stp x0,x2,[x29,#96]
|
|
|
|
.Loop:
|
|
@@ -1040,6 +1041,8 @@ sha256_block_data_order:
|
|
ret
|
|
.size sha256_block_data_order,.-sha256_block_data_order
|
|
|
|
+.section .rodata
|
|
+
|
|
.align 6
|
|
.type .LK256,%object
|
|
.LK256:
|
|
@@ -1064,6 +1067,8 @@ sha256_block_data_order:
|
|
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
.align 2
|
|
.align 2
|
|
+
|
|
+.text
|
|
#ifndef __KERNEL__
|
|
.type sha256_block_armv8,%function
|
|
.align 6
|
|
@@ -1074,7 +1079,8 @@ sha256_block_armv8:
|
|
add x29,sp,#0
|
|
|
|
ld1 {v0.4s,v1.4s},[x0]
|
|
- adr x3,.LK256
|
|
+ adrp x3,.LK256
|
|
+ add x3,x3,#:lo12:.LK256
|
|
|
|
.Loop_hw:
|
|
ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
|
|
@@ -1218,7 +1224,8 @@ sha256_block_neon:
|
|
mov x29, sp
|
|
sub sp,sp,#16*4
|
|
|
|
- adr x16,.LK256
|
|
+ adrp x16,.LK256
|
|
+ add x16,x16,#:lo12:.LK256
|
|
add x2,x1,x2,lsl#6 // len to point at the end of inp
|
|
|
|
ld1 {v0.16b},[x1], #16
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S
|
|
index dbc688df71..0a45eb06ed 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S
|
|
@@ -1,4 +1,4 @@
|
|
-// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
|
|
+// Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
// this file except in compliance with the License. You can obtain a copy
|
|
@@ -90,7 +90,8 @@ sha512_block_data_order:
|
|
ldp x24,x25,[x0,#4*8]
|
|
add x2,x1,x2,lsl#7 // end of input
|
|
ldp x26,x27,[x0,#6*8]
|
|
- adr x30,.LK512
|
|
+ adrp x30,.LK512
|
|
+ add x30,x30,#:lo12:.LK512
|
|
stp x0,x2,[x29,#96]
|
|
|
|
.Loop:
|
|
@@ -1038,6 +1039,8 @@ sha512_block_data_order:
|
|
ret
|
|
.size sha512_block_data_order,.-sha512_block_data_order
|
|
|
|
+.section .rodata
|
|
+
|
|
.align 6
|
|
.type .LK512,%object
|
|
.LK512:
|
|
@@ -1086,6 +1089,8 @@ sha512_block_data_order:
|
|
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
.align 2
|
|
.align 2
|
|
+
|
|
+.text
|
|
#ifndef __KERNEL__
|
|
.type sha512_block_armv8,%function
|
|
.align 6
|
|
@@ -1099,7 +1104,8 @@ sha512_block_armv8:
|
|
ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
|
|
|
|
ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context
|
|
- adr x3,.LK512
|
|
+ adrp x3,.LK512
|
|
+ add x3,x3,#:lo12:.LK512
|
|
|
|
rev64 v16.16b,v16.16b
|
|
rev64 v17.16b,v17.16b
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S
|
|
index f1678ff143..39ffc201d7 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S
|
|
@@ -1,4 +1,4 @@
|
|
-// Copyright 2021-2023 The OpenSSL Project Authors. All Rights Reserved.
|
|
+// Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
// this file except in compliance with the License. You can obtain a copy
|
|
@@ -22,19 +22,19 @@ ossl_hwsm3_block_data_order:
|
|
rev64 v6.4s, v6.4s
|
|
ext v5.16b, v5.16b, v5.16b, #8
|
|
ext v6.16b, v6.16b, v6.16b, #8
|
|
-
|
|
- adr x8, .Tj
|
|
+ adrp x8, .Tj
|
|
+ add x8, x8, #:lo12:.Tj
|
|
ldp s16, s17, [x8]
|
|
|
|
.Loop:
|
|
// load input
|
|
- ld1 {v0.16b,v1.16b,v2.16b,v3.16b}, [x1], #64
|
|
+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x1], #64
|
|
sub w2, w2, #1
|
|
|
|
mov v18.16b, v5.16b
|
|
mov v19.16b, v6.16b
|
|
|
|
-#ifndef __ARMEB__
|
|
+#ifndef __AARCH64EB__
|
|
rev32 v0.16b, v0.16b
|
|
rev32 v1.16b, v1.16b
|
|
rev32 v2.16b, v2.16b
|
|
@@ -497,7 +497,12 @@ ossl_hwsm3_block_data_order:
|
|
st1 {v5.4s,v6.4s}, [x0]
|
|
ret
|
|
.size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order
|
|
+.section .rodata
|
|
|
|
+.type _sm3_consts,%object
|
|
.align 3
|
|
+_sm3_consts:
|
|
.Tj:
|
|
.word 0x79cc4519, 0x9d8a7a87
|
|
+.size _sm3_consts,.-_sm3_consts
|
|
+.previous
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S
|
|
index 9792a50a8c..c55c427c35 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S
|
|
@@ -3300,7 +3300,7 @@ AES_set_decrypt_key:
|
|
.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
|
|
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
|
|
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
-.comm OPENSSL_ia32cap_P,16,4
|
|
+.comm OPENSSL_ia32cap_P,40,4
|
|
|
|
.section ".note.gnu.property", "a"
|
|
.p2align 2
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S
|
|
index 28e4f1a50a..2f9563bdca 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S
|
|
@@ -3344,7 +3344,7 @@ aesni_set_decrypt_key:
|
|
.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
|
|
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
|
|
.byte 115,108,46,111,114,103,62,0
|
|
-.comm OPENSSL_ia32cap_P,16,4
|
|
+.comm OPENSSL_ia32cap_P,40,4
|
|
|
|
.section ".note.gnu.property", "a"
|
|
.p2align 2
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S
|
|
index b26aa4b799..73b87147f0 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S
|
|
@@ -1552,7 +1552,7 @@ bn_sub_part_words:
|
|
popl %ebp
|
|
ret
|
|
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
|
|
-.comm OPENSSL_ia32cap_P,16,4
|
|
+.comm OPENSSL_ia32cap_P,40,4
|
|
|
|
.section ".note.gnu.property", "a"
|
|
.p2align 2
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S
|
|
index 9ec7e16258..74f0986953 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S
|
|
@@ -354,7 +354,7 @@ bn_GF2m_mul_2x2:
|
|
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
|
|
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
|
|
.byte 62,0
|
|
-.comm OPENSSL_ia32cap_P,16,4
|
|
+.comm OPENSSL_ia32cap_P,40,4
|
|
|
|
.section ".note.gnu.property", "a"
|
|
.p2align 2
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S
|
|
index 73fad9f75d..eedb0f4b97 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S
|
|
@@ -474,7 +474,7 @@ bn_mul_mont:
|
|
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
|
|
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
|
.byte 111,114,103,62,0
|
|
-.comm OPENSSL_ia32cap_P,16,4
|
|
+.comm OPENSSL_ia32cap_P,40,4
|
|
|
|
.section ".note.gnu.property", "a"
|
|
.p2align 2
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S
|
|
index ccd96e4167..63f333ed91 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S
|
|
@@ -5270,7 +5270,7 @@ ecp_nistz256_point_add_affine:
|
|
popl %ebp
|
|
ret
|
|
.size ecp_nistz256_point_add_affine,.-.L_ecp_nistz256_point_add_affine_begin
|
|
-.comm OPENSSL_ia32cap_P,16,4
|
|
+.comm OPENSSL_ia32cap_P,40,4
|
|
|
|
.section ".note.gnu.property", "a"
|
|
.p2align 2
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S
|
|
index 9cfe5a4660..a3f81ed903 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S
|
|
@@ -3986,7 +3986,7 @@ _sha1_block_data_order_avx:
|
|
.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
|
|
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
|
|
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
-.comm OPENSSL_ia32cap_P,16,4
|
|
+.comm OPENSSL_ia32cap_P,40,4
|
|
|
|
.section ".note.gnu.property", "a"
|
|
.p2align 2
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S
|
|
index 9253ab18d0..8497be1bb2 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S
|
|
@@ -6784,7 +6784,7 @@ sha256_block_data_order:
|
|
popl %ebp
|
|
ret
|
|
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
|
|
-.comm OPENSSL_ia32cap_P,16,4
|
|
+.comm OPENSSL_ia32cap_P,40,4
|
|
|
|
.section ".note.gnu.property", "a"
|
|
.p2align 2
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S
|
|
index 5e849365a6..74624d5a50 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S
|
|
@@ -2830,7 +2830,7 @@ sha512_block_data_order:
|
|
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
|
|
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
|
|
.byte 62,0
|
|
-.comm OPENSSL_ia32cap_P,16,4
|
|
+.comm OPENSSL_ia32cap_P,40,4
|
|
|
|
.section ".note.gnu.property", "a"
|
|
.p2align 2
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S
|
|
index 3a3533b0f1..76d15e57db 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S
|
|
@@ -122,6 +122,24 @@ OPENSSL_ia32_cpuid:
|
|
xorl %ecx,%ecx
|
|
.byte 0x0f,0xa2
|
|
movl %ebx,8(%edi)
|
|
+ movl %ecx,12(%edi)
|
|
+ movl %edx,16(%edi)
|
|
+ cmpl $1,%eax
|
|
+ jb .L005no_extended_info
|
|
+ movl $7,%eax
|
|
+ movl $1,%ecx
|
|
+ .byte 0x0f,0xa2
|
|
+ movl %eax,20(%edi)
|
|
+ movl %edx,24(%edi)
|
|
+ movl %ebx,28(%edi)
|
|
+ movl %ecx,32(%edi)
|
|
+ andl $524288,%edx
|
|
+ cmpl $0,%edx
|
|
+ je .L005no_extended_info
|
|
+ movl $36,%eax
|
|
+ movl $0,%ecx
|
|
+ .byte 0x0f,0xa2
|
|
+ movl %ebx,36(%edi)
|
|
.L005no_extended_info:
|
|
btl $27,%ebp
|
|
jnc .L006clear_avx
|
|
@@ -137,6 +155,7 @@ OPENSSL_ia32_cpuid:
|
|
andl $4278190079,%esi
|
|
.L006clear_avx:
|
|
andl $4026525695,%ebp
|
|
+ andl $4286578687,20(%edi)
|
|
andl $4294967263,8(%edi)
|
|
.L007done:
|
|
movl %esi,%eax
|
|
@@ -577,7 +596,7 @@ OPENSSL_ia32_rdseed_bytes:
|
|
.size OPENSSL_ia32_rdseed_bytes,.-.L_OPENSSL_ia32_rdseed_bytes_begin
|
|
.hidden OPENSSL_cpuid_setup
|
|
.hidden OPENSSL_ia32cap_P
|
|
-.comm OPENSSL_ia32cap_P,16,4
|
|
+.comm OPENSSL_ia32cap_P,40,4
|
|
.section .init
|
|
call OPENSSL_cpuid_setup
|
|
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm
|
|
index 6c21227279..71c812ab39 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm
|
|
@@ -3208,4 +3208,4 @@ db 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
|
|
db 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
|
|
db 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
segment .bss
|
|
-common _OPENSSL_ia32cap_P 16
|
|
+common _OPENSSL_ia32cap_P 40
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm
|
|
index ca5544aebe..37198a2e5f 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm
|
|
@@ -3199,4 +3199,4 @@ db 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
|
|
db 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
|
|
db 115,108,46,111,114,103,62,0
|
|
segment .bss
|
|
-common _OPENSSL_ia32cap_P 16
|
|
+common _OPENSSL_ia32cap_P 40
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm
|
|
index e46d8457cf..d79fd8012a 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm
|
|
@@ -1512,4 +1512,4 @@ L$029pw_end:
|
|
pop ebp
|
|
ret
|
|
segment .bss
|
|
-common _OPENSSL_ia32cap_P 16
|
|
+common _OPENSSL_ia32cap_P 40
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm
|
|
index 7750777de2..52be719fc0 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm
|
|
@@ -342,4 +342,4 @@ db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
|
|
db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
|
|
db 62,0
|
|
segment .bss
|
|
-common _OPENSSL_ia32cap_P 16
|
|
+common _OPENSSL_ia32cap_P 40
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm
|
|
index 6547b0d7fa..545085381b 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm
|
|
@@ -476,4 +476,4 @@ db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
|
|
db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
|
db 111,114,103,62,0
|
|
segment .bss
|
|
-common _OPENSSL_ia32cap_P 16
|
|
+common _OPENSSL_ia32cap_P 40
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm
|
|
index 1e487970e3..cd5381520d 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm
|
|
@@ -5125,4 +5125,4 @@ L$013pic:
|
|
pop ebp
|
|
ret
|
|
segment .bss
|
|
-common _OPENSSL_ia32cap_P 16
|
|
+common _OPENSSL_ia32cap_P 40
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm
|
|
index 0d644acce0..41cc58af83 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm
|
|
@@ -3967,4 +3967,4 @@ db 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
|
|
db 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
|
|
db 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
segment .bss
|
|
-common _OPENSSL_ia32cap_P 16
|
|
+common _OPENSSL_ia32cap_P 40
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm
|
|
index 7d8398c7d3..ec4ac3e7d4 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm
|
|
@@ -6786,4 +6786,4 @@ L$018avx_bmi_00_47:
|
|
pop ebp
|
|
ret
|
|
segment .bss
|
|
-common _OPENSSL_ia32cap_P 16
|
|
+common _OPENSSL_ia32cap_P 40
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm
|
|
index 9410d5c35d..d04b03ec2a 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm
|
|
@@ -2832,4 +2832,4 @@ db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
|
|
db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
|
|
db 62,0
|
|
segment .bss
|
|
-common _OPENSSL_ia32cap_P 16
|
|
+common _OPENSSL_ia32cap_P 40
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm
|
|
index 5f3599ce1b..a0ca9aeadc 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm
|
|
@@ -123,6 +123,24 @@ L$002generic:
|
|
xor ecx,ecx
|
|
cpuid
|
|
mov DWORD [8+edi],ebx
|
|
+ mov DWORD [12+edi],ecx
|
|
+ mov DWORD [16+edi],edx
|
|
+ cmp eax,1
|
|
+ jb NEAR L$005no_extended_info
|
|
+ mov eax,7
|
|
+ mov ecx,1
|
|
+ cpuid
|
|
+ mov DWORD [20+edi],eax
|
|
+ mov DWORD [24+edi],edx
|
|
+ mov DWORD [28+edi],ebx
|
|
+ mov DWORD [32+edi],ecx
|
|
+ and edx,524288
|
|
+ cmp edx,0
|
|
+ je NEAR L$005no_extended_info
|
|
+ mov eax,36
|
|
+ mov ecx,0
|
|
+ cpuid
|
|
+ mov DWORD [36+edi],ebx
|
|
L$005no_extended_info:
|
|
bt ebp,27
|
|
jnc NEAR L$006clear_avx
|
|
@@ -138,6 +156,7 @@ L$008clear_xmm:
|
|
and esi,4278190079
|
|
L$006clear_avx:
|
|
and ebp,4026525695
|
|
+ and DWORD [20+edi],4286578687
|
|
and DWORD [8+edi],4294967263
|
|
L$007done:
|
|
mov eax,esi
|
|
@@ -500,7 +519,7 @@ L$031done:
|
|
pop edi
|
|
ret
|
|
segment .bss
|
|
-common _OPENSSL_ia32cap_P 16
|
|
+common _OPENSSL_ia32cap_P 40
|
|
segment .CRT$XCU data align=4
|
|
extern _OPENSSL_cpuid_setup
|
|
dd _OPENSSL_cpuid_setup
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s
|
|
new file mode 100644
|
|
index 0000000000..a07316de2d
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s
|
|
@@ -0,0 +1,8145 @@
|
|
+.text
|
|
+
|
|
+.globl aesni_xts_avx512_eligible
|
|
+.type aesni_xts_avx512_eligible,@function
|
|
+.align 32
|
|
+aesni_xts_avx512_eligible:
|
|
+ movl OPENSSL_ia32cap_P+8(%rip),%ecx
|
|
+ xorl %eax,%eax
|
|
+
|
|
+ andl $0xc0030000,%ecx
|
|
+ cmpl $0xc0030000,%ecx
|
|
+ jne .L_done
|
|
+ movl OPENSSL_ia32cap_P+12(%rip),%ecx
|
|
+
|
|
+ andl $0x640,%ecx
|
|
+ cmpl $0x640,%ecx
|
|
+ cmovel %ecx,%eax
|
|
+.L_done:
|
|
+ .byte 0xf3,0xc3
|
|
+.size aesni_xts_avx512_eligible, .-aesni_xts_avx512_eligible
|
|
+.globl aesni_xts_128_encrypt_avx512
|
|
+.hidden aesni_xts_128_encrypt_avx512
|
|
+.type aesni_xts_128_encrypt_avx512,@function
|
|
+.align 32
|
|
+aesni_xts_128_encrypt_avx512:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ pushq %rbp
|
|
+ movq %rsp,%rbp
|
|
+ subq $136,%rsp
|
|
+ andq $0xffffffffffffffc0,%rsp
|
|
+ movq %rbx,128(%rsp)
|
|
+ movq $0x87,%r10
|
|
+ vmovdqu (%r9),%xmm1
|
|
+ vpxor (%r8),%xmm1,%xmm1
|
|
+ vaesenc 16(%r8),%xmm1,%xmm1
|
|
+ vaesenc 32(%r8),%xmm1,%xmm1
|
|
+ vaesenc 48(%r8),%xmm1,%xmm1
|
|
+ vaesenc 64(%r8),%xmm1,%xmm1
|
|
+ vaesenc 80(%r8),%xmm1,%xmm1
|
|
+ vaesenc 96(%r8),%xmm1,%xmm1
|
|
+ vaesenc 112(%r8),%xmm1,%xmm1
|
|
+ vaesenc 128(%r8),%xmm1,%xmm1
|
|
+ vaesenc 144(%r8),%xmm1,%xmm1
|
|
+ vaesenclast 160(%r8),%xmm1,%xmm1
|
|
+ vmovdqa %xmm1,(%rsp)
|
|
+
|
|
+ cmpq $0x80,%rdx
|
|
+ jl .L_less_than_128_bytes_hEgxyDlCngwrfFe
|
|
+ vpbroadcastq %r10,%zmm25
|
|
+ cmpq $0x100,%rdx
|
|
+ jge .L_start_by16_hEgxyDlCngwrfFe
|
|
+ cmpq $0x80,%rdx
|
|
+ jge .L_start_by8_hEgxyDlCngwrfFe
|
|
+
|
|
+.L_do_n_blocks_hEgxyDlCngwrfFe:
|
|
+ cmpq $0x0,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ cmpq $0x70,%rdx
|
|
+ jge .L_remaining_num_blocks_is_7_hEgxyDlCngwrfFe
|
|
+ cmpq $0x60,%rdx
|
|
+ jge .L_remaining_num_blocks_is_6_hEgxyDlCngwrfFe
|
|
+ cmpq $0x50,%rdx
|
|
+ jge .L_remaining_num_blocks_is_5_hEgxyDlCngwrfFe
|
|
+ cmpq $0x40,%rdx
|
|
+ jge .L_remaining_num_blocks_is_4_hEgxyDlCngwrfFe
|
|
+ cmpq $0x30,%rdx
|
|
+ jge .L_remaining_num_blocks_is_3_hEgxyDlCngwrfFe
|
|
+ cmpq $0x20,%rdx
|
|
+ jge .L_remaining_num_blocks_is_2_hEgxyDlCngwrfFe
|
|
+ cmpq $0x10,%rdx
|
|
+ jge .L_remaining_num_blocks_is_1_hEgxyDlCngwrfFe
|
|
+ vmovdqa %xmm0,%xmm8
|
|
+ vmovdqa %xmm9,%xmm0
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+
|
|
+.L_remaining_num_blocks_is_7_hEgxyDlCngwrfFe:
|
|
+ movq $0x0000ffffffffffff,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2{%k1}
|
|
+ addq $0x70,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi){%k1}
|
|
+ addq $0x70,%rsi
|
|
+ vextracti32x4 $0x2,%zmm2,%xmm8
|
|
+ vextracti32x4 $0x3,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+
|
|
+.L_remaining_num_blocks_is_6_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%ymm2
|
|
+ addq $0x60,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %ymm2,64(%rsi)
|
|
+ addq $0x60,%rsi
|
|
+ vextracti32x4 $0x1,%zmm2,%xmm8
|
|
+ vextracti32x4 $0x2,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+
|
|
+.L_remaining_num_blocks_is_5_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu 64(%rdi),%xmm2
|
|
+ addq $0x50,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu %xmm2,64(%rsi)
|
|
+ addq $0x50,%rsi
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ vextracti32x4 $0x1,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+
|
|
+.L_remaining_num_blocks_is_4_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ addq $0x40,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ addq $0x40,%rsi
|
|
+ vextracti32x4 $0x3,%zmm1,%xmm8
|
|
+ vmovdqa64 %xmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+.L_remaining_num_blocks_is_3_hEgxyDlCngwrfFe:
|
|
+ movq $-1,%r8
|
|
+ shrq $0x10,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vmovdqu8 (%rdi),%zmm1{%k1}
|
|
+ addq $0x30,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vmovdqu8 %zmm1,(%rsi){%k1}
|
|
+ addq $0x30,%rsi
|
|
+ vextracti32x4 $0x2,%zmm1,%xmm8
|
|
+ vextracti32x4 $0x3,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+.L_remaining_num_blocks_is_2_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 (%rdi),%ymm1
|
|
+ addq $0x20,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%ymm0
|
|
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
|
|
+ vbroadcasti32x4 16(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 32(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 48(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 64(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 80(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 96(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 112(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 128(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 144(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 160(%rcx),%ymm0
|
|
+ vaesenclast %ymm0,%ymm1,%ymm1
|
|
+ vpxorq %ymm9,%ymm1,%ymm1
|
|
+ vmovdqu %ymm1,(%rsi)
|
|
+ addq $0x20,%rsi
|
|
+ vextracti32x4 $0x1,%zmm1,%xmm8
|
|
+ vextracti32x4 $0x2,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+.L_remaining_num_blocks_is_1_hEgxyDlCngwrfFe:
|
|
+ vmovdqu (%rdi),%xmm1
|
|
+ addq $0x10,%rdi
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor (%rcx),%xmm1,%xmm1
|
|
+ vaesenc 16(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 32(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 48(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 64(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 80(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 96(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 112(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 128(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 144(%rcx),%xmm1,%xmm1
|
|
+ vaesenclast 160(%rcx),%xmm1,%xmm1
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ addq $0x10,%rsi
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+
|
|
+
|
|
+.L_start_by16_hEgxyDlCngwrfFe:
|
|
+ vbroadcasti32x4 (%rsp),%zmm0
|
|
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
|
|
+ movq $0xaa,%r8
|
|
+ kmovq %r8,%k2
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+ vpsrldq $0xf,%zmm9,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm9,%zmm11
|
|
+ vpxord %zmm14,%zmm11,%zmm11
|
|
+ vpsrldq $0xf,%zmm10,%zmm15
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16
|
|
+ vpslldq $0x1,%zmm10,%zmm12
|
|
+ vpxord %zmm16,%zmm12,%zmm12
|
|
+
|
|
+.L_main_loop_run_16_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2
|
|
+ vmovdqu8 128(%rdi),%zmm3
|
|
+ vmovdqu8 192(%rdi),%zmm4
|
|
+ addq $0x100,%rdi
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vpxorq %zmm11,%zmm3,%zmm3
|
|
+ vpxorq %zmm12,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm0,%zmm3,%zmm3
|
|
+ vpxorq %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm11,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm11,%zmm15
|
|
+ vpxord %zmm14,%zmm15,%zmm15
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm12,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm12,%zmm16
|
|
+ vpxord %zmm14,%zmm16,%zmm16
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm15,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm15,%zmm17
|
|
+ vpxord %zmm14,%zmm17,%zmm17
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm16,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm16,%zmm18
|
|
+ vpxord %zmm14,%zmm18,%zmm18
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vaesenclast %zmm0,%zmm3,%zmm3
|
|
+ vaesenclast %zmm0,%zmm4,%zmm4
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vpxorq %zmm11,%zmm3,%zmm3
|
|
+ vpxorq %zmm12,%zmm4,%zmm4
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqa32 %zmm17,%zmm11
|
|
+ vmovdqa32 %zmm18,%zmm12
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi)
|
|
+ vmovdqu8 %zmm3,128(%rsi)
|
|
+ vmovdqu8 %zmm4,192(%rsi)
|
|
+ addq $0x100,%rsi
|
|
+ subq $0x100,%rdx
|
|
+ cmpq $0x100,%rdx
|
|
+ jae .L_main_loop_run_16_hEgxyDlCngwrfFe
|
|
+ cmpq $0x80,%rdx
|
|
+ jae .L_main_loop_run_8_hEgxyDlCngwrfFe
|
|
+ vextracti32x4 $0x3,%zmm4,%xmm0
|
|
+ jmp .L_do_n_blocks_hEgxyDlCngwrfFe
|
|
+
|
|
+.L_start_by8_hEgxyDlCngwrfFe:
|
|
+ vbroadcasti32x4 (%rsp),%zmm0
|
|
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
|
|
+ movq $0xaa,%r8
|
|
+ kmovq %r8,%k2
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+
|
|
+.L_main_loop_run_8_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2
|
|
+ addq $0x80,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vpsrldq $0xf,%zmm9,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm9,%zmm15
|
|
+ vpxord %zmm14,%zmm15,%zmm15
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vpsrldq $0xf,%zmm10,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm10,%zmm16
|
|
+ vpxord %zmm14,%zmm16,%zmm16
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi)
|
|
+ addq $0x80,%rsi
|
|
+ subq $0x80,%rdx
|
|
+ cmpq $0x80,%rdx
|
|
+ jae .L_main_loop_run_8_hEgxyDlCngwrfFe
|
|
+ vextracti32x4 $0x3,%zmm2,%xmm0
|
|
+ jmp .L_do_n_blocks_hEgxyDlCngwrfFe
|
|
+
|
|
+.L_steal_cipher_hEgxyDlCngwrfFe:
|
|
+ vmovdqa %xmm8,%xmm2
|
|
+ leaq vpshufb_shf_table(%rip),%rax
|
|
+ vmovdqu (%rax,%rdx,1),%xmm10
|
|
+ vpshufb %xmm10,%xmm8,%xmm8
|
|
+ vmovdqu -16(%rdi,%rdx,1),%xmm3
|
|
+ vmovdqu %xmm8,-16(%rsi,%rdx,1)
|
|
+ leaq vpshufb_shf_table(%rip),%rax
|
|
+ addq $16,%rax
|
|
+ subq %rdx,%rax
|
|
+ vmovdqu (%rax),%xmm10
|
|
+ vpxor mask1(%rip),%xmm10,%xmm10
|
|
+ vpshufb %xmm10,%xmm3,%xmm3
|
|
+ vpblendvb %xmm10,%xmm2,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm3,%xmm8
|
|
+ vpxor (%rcx),%xmm8,%xmm8
|
|
+ vaesenc 16(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 32(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 48(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 64(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 80(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 96(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 112(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 128(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 144(%rcx),%xmm8,%xmm8
|
|
+ vaesenclast 160(%rcx),%xmm8,%xmm8
|
|
+ vpxor %xmm0,%xmm8,%xmm8
|
|
+ vmovdqu %xmm8,-16(%rsi)
|
|
+.L_ret_hEgxyDlCngwrfFe:
|
|
+ movq 128(%rsp),%rbx
|
|
+ xorq %r8,%r8
|
|
+ movq %r8,128(%rsp)
|
|
+
|
|
+ vpxorq %zmm0,%zmm0,%zmm0
|
|
+ movq %rbp,%rsp
|
|
+ popq %rbp
|
|
+ vzeroupper
|
|
+ .byte 0xf3,0xc3
|
|
+
|
|
+.L_less_than_128_bytes_hEgxyDlCngwrfFe:
|
|
+ vpbroadcastq %r10,%zmm25
|
|
+ cmpq $0x10,%rdx
|
|
+ jb .L_ret_hEgxyDlCngwrfFe
|
|
+ vbroadcasti32x4 (%rsp),%zmm0
|
|
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
|
|
+ movl $0xaa,%r8d
|
|
+ kmovq %r8,%k2
|
|
+ movq %rdx,%r8
|
|
+ andq $0x70,%r8
|
|
+ cmpq $0x60,%r8
|
|
+ je .L_num_blocks_is_6_hEgxyDlCngwrfFe
|
|
+ cmpq $0x50,%r8
|
|
+ je .L_num_blocks_is_5_hEgxyDlCngwrfFe
|
|
+ cmpq $0x40,%r8
|
|
+ je .L_num_blocks_is_4_hEgxyDlCngwrfFe
|
|
+ cmpq $0x30,%r8
|
|
+ je .L_num_blocks_is_3_hEgxyDlCngwrfFe
|
|
+ cmpq $0x20,%r8
|
|
+ je .L_num_blocks_is_2_hEgxyDlCngwrfFe
|
|
+ cmpq $0x10,%r8
|
|
+ je .L_num_blocks_is_1_hEgxyDlCngwrfFe
|
|
+
|
|
+.L_num_blocks_is_7_hEgxyDlCngwrfFe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+ movq $0x0000ffffffffffff,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vmovdqu8 0(%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2{%k1}
|
|
+
|
|
+ addq $0x70,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,0(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi){%k1}
|
|
+ addq $0x70,%rsi
|
|
+ vextracti32x4 $0x2,%zmm2,%xmm8
|
|
+ vextracti32x4 $0x3,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+.L_num_blocks_is_6_hEgxyDlCngwrfFe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+ vmovdqu8 0(%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%ymm2
|
|
+ addq $96,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,0(%rsi)
|
|
+ vmovdqu8 %ymm2,64(%rsi)
|
|
+ addq $96,%rsi
|
|
+
|
|
+ vextracti32x4 $0x1,%ymm2,%xmm8
|
|
+ vextracti32x4 $0x2,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+.L_num_blocks_is_5_hEgxyDlCngwrfFe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+ vmovdqu8 0(%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%xmm2
|
|
+ addq $80,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,0(%rsi)
|
|
+ vmovdqu8 %xmm2,64(%rsi)
|
|
+ addq $80,%rsi
|
|
+
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ vextracti32x4 $0x1,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+.L_num_blocks_is_4_hEgxyDlCngwrfFe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+ vmovdqu8 0(%rdi),%zmm1
|
|
+ addq $64,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vmovdqu8 %zmm1,0(%rsi)
|
|
+ addq $64,%rsi
|
|
+ vextracti32x4 $0x3,%zmm1,%xmm8
|
|
+ vmovdqa %xmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+.L_num_blocks_is_3_hEgxyDlCngwrfFe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ movq $0x0000ffffffffffff,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vmovdqu8 0(%rdi),%zmm1{%k1}
|
|
+ addq $48,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vmovdqu8 %zmm1,0(%rsi){%k1}
|
|
+ addq $48,%rsi
|
|
+ vextracti32x4 $2,%zmm1,%xmm8
|
|
+ vextracti32x4 $3,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+.L_num_blocks_is_2_hEgxyDlCngwrfFe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+
|
|
+ vmovdqu8 0(%rdi),%ymm1
|
|
+ addq $32,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%ymm0
|
|
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
|
|
+ vbroadcasti32x4 16(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 32(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 48(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 64(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 80(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 96(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 112(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 128(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 144(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 160(%rcx),%ymm0
|
|
+ vaesenclast %ymm0,%ymm1,%ymm1
|
|
+ vpxorq %ymm9,%ymm1,%ymm1
|
|
+ vmovdqu8 %ymm1,0(%rsi)
|
|
+ addq $32,%rsi
|
|
+
|
|
+ vextracti32x4 $1,%ymm1,%xmm8
|
|
+ vextracti32x4 $2,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+.L_num_blocks_is_1_hEgxyDlCngwrfFe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+
|
|
+ vmovdqu8 0(%rdi),%xmm1
|
|
+ addq $16,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%ymm0
|
|
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
|
|
+ vbroadcasti32x4 16(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 32(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 48(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 64(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 80(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 96(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 112(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 128(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 144(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 160(%rcx),%ymm0
|
|
+ vaesenclast %ymm0,%ymm1,%ymm1
|
|
+ vpxorq %ymm9,%ymm1,%ymm1
|
|
+ vmovdqu8 %xmm1,0(%rsi)
|
|
+ addq $16,%rsi
|
|
+
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+ vextracti32x4 $1,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_hEgxyDlCngwrfFe
|
|
+ jmp .L_steal_cipher_hEgxyDlCngwrfFe
|
|
+.cfi_endproc
|
|
+.globl aesni_xts_128_decrypt_avx512
|
|
+.hidden aesni_xts_128_decrypt_avx512
|
|
+.type aesni_xts_128_decrypt_avx512,@function
|
|
+.align 32
|
|
+aesni_xts_128_decrypt_avx512:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ pushq %rbp
|
|
+ movq %rsp,%rbp
|
|
+ subq $136,%rsp
|
|
+ andq $0xffffffffffffffc0,%rsp
|
|
+ movq %rbx,128(%rsp)
|
|
+ movq $0x87,%r10
|
|
+ vmovdqu (%r9),%xmm1
|
|
+ vpxor (%r8),%xmm1,%xmm1
|
|
+ vaesenc 16(%r8),%xmm1,%xmm1
|
|
+ vaesenc 32(%r8),%xmm1,%xmm1
|
|
+ vaesenc 48(%r8),%xmm1,%xmm1
|
|
+ vaesenc 64(%r8),%xmm1,%xmm1
|
|
+ vaesenc 80(%r8),%xmm1,%xmm1
|
|
+ vaesenc 96(%r8),%xmm1,%xmm1
|
|
+ vaesenc 112(%r8),%xmm1,%xmm1
|
|
+ vaesenc 128(%r8),%xmm1,%xmm1
|
|
+ vaesenc 144(%r8),%xmm1,%xmm1
|
|
+ vaesenclast 160(%r8),%xmm1,%xmm1
|
|
+ vmovdqa %xmm1,(%rsp)
|
|
+
|
|
+ cmpq $0x80,%rdx
|
|
+ jb .L_less_than_128_bytes_amivrujEyduiFoi
|
|
+ vpbroadcastq %r10,%zmm25
|
|
+ cmpq $0x100,%rdx
|
|
+ jge .L_start_by16_amivrujEyduiFoi
|
|
+ jmp .L_start_by8_amivrujEyduiFoi
|
|
+
|
|
+.L_do_n_blocks_amivrujEyduiFoi:
|
|
+ cmpq $0x0,%rdx
|
|
+ je .L_ret_amivrujEyduiFoi
|
|
+ cmpq $0x70,%rdx
|
|
+ jge .L_remaining_num_blocks_is_7_amivrujEyduiFoi
|
|
+ cmpq $0x60,%rdx
|
|
+ jge .L_remaining_num_blocks_is_6_amivrujEyduiFoi
|
|
+ cmpq $0x50,%rdx
|
|
+ jge .L_remaining_num_blocks_is_5_amivrujEyduiFoi
|
|
+ cmpq $0x40,%rdx
|
|
+ jge .L_remaining_num_blocks_is_4_amivrujEyduiFoi
|
|
+ cmpq $0x30,%rdx
|
|
+ jge .L_remaining_num_blocks_is_3_amivrujEyduiFoi
|
|
+ cmpq $0x20,%rdx
|
|
+ jge .L_remaining_num_blocks_is_2_amivrujEyduiFoi
|
|
+ cmpq $0x10,%rdx
|
|
+ jge .L_remaining_num_blocks_is_1_amivrujEyduiFoi
|
|
+
|
|
+
|
|
+ vmovdqu %xmm5,%xmm1
|
|
+
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu %xmm1,-16(%rsi)
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+
|
|
+
|
|
+ movq $0x1,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vpsllq $0x3f,%xmm9,%xmm13
|
|
+ vpsraq $0x3f,%xmm13,%xmm14
|
|
+ vpandq %xmm25,%xmm14,%xmm5
|
|
+ vpxorq %xmm5,%xmm9,%xmm9{%k1}
|
|
+ vpsrldq $0x8,%xmm9,%xmm10
|
|
+.byte 98, 211, 181, 8, 115, 194, 1
|
|
+ vpslldq $0x8,%xmm13,%xmm13
|
|
+ vpxorq %xmm13,%xmm0,%xmm0
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_remaining_num_blocks_is_7_amivrujEyduiFoi:
|
|
+ movq $0xffffffffffffffff,%r8
|
|
+ shrq $0x10,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2{%k1}
|
|
+ addq $0x70,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_7_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 $0x2,%zmm10,%xmm12
|
|
+ vextracti32x4 $0x3,%zmm10,%xmm13
|
|
+ vinserti32x4 $0x2,%xmm13,%zmm10,%zmm10
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi){%k1}
|
|
+ addq $0x70,%rsi
|
|
+ vextracti32x4 $0x2,%zmm2,%xmm8
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_7_remain_amivrujEyduiFoi:
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi){%k1}
|
|
+ jmp .L_ret_amivrujEyduiFoi
|
|
+
|
|
+.L_remaining_num_blocks_is_6_amivrujEyduiFoi:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%ymm2
|
|
+ addq $0x60,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_6_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 $0x1,%zmm10,%xmm12
|
|
+ vextracti32x4 $0x2,%zmm10,%xmm13
|
|
+ vinserti32x4 $0x1,%xmm13,%zmm10,%zmm10
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %ymm2,64(%rsi)
|
|
+ addq $0x60,%rsi
|
|
+ vextracti32x4 $0x1,%zmm2,%xmm8
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_6_remain_amivrujEyduiFoi:
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %ymm2,64(%rsi)
|
|
+ jmp .L_ret_amivrujEyduiFoi
|
|
+
|
|
+.L_remaining_num_blocks_is_5_amivrujEyduiFoi:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu 64(%rdi),%xmm2
|
|
+ addq $0x50,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_5_remain_amivrujEyduiFoi
|
|
+ vmovdqa %xmm10,%xmm12
|
|
+ vextracti32x4 $0x1,%zmm10,%xmm10
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu %xmm2,64(%rsi)
|
|
+ addq $0x50,%rsi
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_5_remain_amivrujEyduiFoi:
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %xmm2,64(%rsi)
|
|
+ jmp .L_ret_amivrujEyduiFoi
|
|
+
|
|
+.L_remaining_num_blocks_is_4_amivrujEyduiFoi:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ addq $0x40,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_4_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 $0x3,%zmm9,%xmm12
|
|
+ vinserti32x4 $0x3,%xmm10,%zmm9,%zmm9
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ addq $0x40,%rsi
|
|
+ vextracti32x4 $0x3,%zmm1,%xmm8
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_4_remain_amivrujEyduiFoi:
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ jmp .L_ret_amivrujEyduiFoi
|
|
+
|
|
+.L_remaining_num_blocks_is_3_amivrujEyduiFoi:
|
|
+ vmovdqu (%rdi),%xmm1
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ addq $0x30,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_3_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 $0x2,%zmm9,%xmm13
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm10
|
|
+ vextracti32x4 $0x3,%zmm9,%xmm11
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ addq $0x30,%rsi
|
|
+ vmovdqa %xmm3,%xmm8
|
|
+ vmovdqa %xmm13,%xmm0
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_3_remain_amivrujEyduiFoi:
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm10
|
|
+ vextracti32x4 $0x2,%zmm9,%xmm11
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ jmp .L_ret_amivrujEyduiFoi
|
|
+
|
|
+.L_remaining_num_blocks_is_2_amivrujEyduiFoi:
|
|
+ vmovdqu (%rdi),%xmm1
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ addq $0x20,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_2_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 $0x2,%zmm9,%xmm10
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm12
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ addq $0x20,%rsi
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_2_remain_amivrujEyduiFoi:
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm10
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ jmp .L_ret_amivrujEyduiFoi
|
|
+
|
|
+.L_remaining_num_blocks_is_1_amivrujEyduiFoi:
|
|
+ vmovdqu (%rdi),%xmm1
|
|
+ addq $0x10,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_1_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm11
|
|
+ vpxor %xmm11,%xmm1,%xmm1
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm11,%xmm1,%xmm1
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ addq $0x10,%rsi
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+ vmovdqa %xmm9,%xmm0
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_1_remain_amivrujEyduiFoi:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ jmp .L_ret_amivrujEyduiFoi
|
|
+
|
|
+.L_start_by16_amivrujEyduiFoi:
|
|
+ vbroadcasti32x4 (%rsp),%zmm0
|
|
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
|
|
+ movq $0xaa,%r8
|
|
+ kmovq %r8,%k2
|
|
+
|
|
+
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+
|
|
+
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+
|
|
+
|
|
+ vpsrldq $0xf,%zmm9,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm9,%zmm11
|
|
+ vpxord %zmm14,%zmm11,%zmm11
|
|
+
|
|
+ vpsrldq $0xf,%zmm10,%zmm15
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16
|
|
+ vpslldq $0x1,%zmm10,%zmm12
|
|
+ vpxord %zmm16,%zmm12,%zmm12
|
|
+
|
|
+.L_main_loop_run_16_amivrujEyduiFoi:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2
|
|
+ vmovdqu8 128(%rdi),%zmm3
|
|
+ vmovdqu8 192(%rdi),%zmm4
|
|
+ vmovdqu8 240(%rdi),%xmm5
|
|
+ addq $0x100,%rdi
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vpxorq %zmm11,%zmm3,%zmm3
|
|
+ vpxorq %zmm12,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm0,%zmm3,%zmm3
|
|
+ vpxorq %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm11,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm11,%zmm15
|
|
+ vpxord %zmm14,%zmm15,%zmm15
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm12,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm12,%zmm16
|
|
+ vpxord %zmm14,%zmm16,%zmm16
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm15,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm15,%zmm17
|
|
+ vpxord %zmm14,%zmm17,%zmm17
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm16,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm16,%zmm18
|
|
+ vpxord %zmm14,%zmm18,%zmm18
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+ vaesdeclast %zmm0,%zmm3,%zmm3
|
|
+ vaesdeclast %zmm0,%zmm4,%zmm4
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vpxorq %zmm11,%zmm3,%zmm3
|
|
+ vpxorq %zmm12,%zmm4,%zmm4
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqa32 %zmm17,%zmm11
|
|
+ vmovdqa32 %zmm18,%zmm12
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi)
|
|
+ vmovdqu8 %zmm3,128(%rsi)
|
|
+ vmovdqu8 %zmm4,192(%rsi)
|
|
+ addq $0x100,%rsi
|
|
+ subq $0x100,%rdx
|
|
+ cmpq $0x100,%rdx
|
|
+ jge .L_main_loop_run_16_amivrujEyduiFoi
|
|
+
|
|
+ cmpq $0x80,%rdx
|
|
+ jge .L_main_loop_run_8_amivrujEyduiFoi
|
|
+ jmp .L_do_n_blocks_amivrujEyduiFoi
|
|
+
|
|
+.L_start_by8_amivrujEyduiFoi:
|
|
+
|
|
+ vbroadcasti32x4 (%rsp),%zmm0
|
|
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
|
|
+ movq $0xaa,%r8
|
|
+ kmovq %r8,%k2
|
|
+
|
|
+
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+
|
|
+
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+
|
|
+.L_main_loop_run_8_amivrujEyduiFoi:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2
|
|
+ vmovdqu8 112(%rdi),%xmm5
|
|
+ addq $0x80,%rdi
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vpsrldq $0xf,%zmm9,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm9,%zmm15
|
|
+ vpxord %zmm14,%zmm15,%zmm15
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vpsrldq $0xf,%zmm10,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm10,%zmm16
|
|
+ vpxord %zmm14,%zmm16,%zmm16
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi)
|
|
+ addq $0x80,%rsi
|
|
+ subq $0x80,%rdx
|
|
+ cmpq $0x80,%rdx
|
|
+ jge .L_main_loop_run_8_amivrujEyduiFoi
|
|
+ jmp .L_do_n_blocks_amivrujEyduiFoi
|
|
+
|
|
+.L_steal_cipher_amivrujEyduiFoi:
|
|
+
|
|
+ vmovdqa %xmm8,%xmm2
|
|
+
|
|
+
|
|
+ leaq vpshufb_shf_table(%rip),%rax
|
|
+ vmovdqu (%rax,%rdx,1),%xmm10
|
|
+ vpshufb %xmm10,%xmm8,%xmm8
|
|
+
|
|
+
|
|
+ vmovdqu -16(%rdi,%rdx,1),%xmm3
|
|
+ vmovdqu %xmm8,-16(%rsi,%rdx,1)
|
|
+
|
|
+
|
|
+ leaq vpshufb_shf_table(%rip),%rax
|
|
+ addq $16,%rax
|
|
+ subq %rdx,%rax
|
|
+ vmovdqu (%rax),%xmm10
|
|
+ vpxor mask1(%rip),%xmm10,%xmm10
|
|
+ vpshufb %xmm10,%xmm3,%xmm3
|
|
+
|
|
+ vpblendvb %xmm10,%xmm2,%xmm3,%xmm3
|
|
+
|
|
+
|
|
+ vpxor %xmm0,%xmm3,%xmm8
|
|
+
|
|
+
|
|
+ vpxor (%rcx),%xmm8,%xmm8
|
|
+ vaesdec 16(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 32(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 48(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 64(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 80(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 96(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 112(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 128(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 144(%rcx),%xmm8,%xmm8
|
|
+ vaesdeclast 160(%rcx),%xmm8,%xmm8
|
|
+
|
|
+ vpxor %xmm0,%xmm8,%xmm8
|
|
+
|
|
+.L_done_amivrujEyduiFoi:
|
|
+
|
|
+ vmovdqu %xmm8,-16(%rsi)
|
|
+.L_ret_amivrujEyduiFoi:
|
|
+ movq 128(%rsp),%rbx
|
|
+ xorq %r8,%r8
|
|
+ movq %r8,128(%rsp)
|
|
+
|
|
+ vpxorq %zmm0,%zmm0,%zmm0
|
|
+ movq %rbp,%rsp
|
|
+ popq %rbp
|
|
+ vzeroupper
|
|
+ .byte 0xf3,0xc3
|
|
+
|
|
+.L_less_than_128_bytes_amivrujEyduiFoi:
|
|
+ cmpq $0x10,%rdx
|
|
+ jb .L_ret_amivrujEyduiFoi
|
|
+
|
|
+ movq %rdx,%r8
|
|
+ andq $0x70,%r8
|
|
+ cmpq $0x60,%r8
|
|
+ je .L_num_blocks_is_6_amivrujEyduiFoi
|
|
+ cmpq $0x50,%r8
|
|
+ je .L_num_blocks_is_5_amivrujEyduiFoi
|
|
+ cmpq $0x40,%r8
|
|
+ je .L_num_blocks_is_4_amivrujEyduiFoi
|
|
+ cmpq $0x30,%r8
|
|
+ je .L_num_blocks_is_3_amivrujEyduiFoi
|
|
+ cmpq $0x20,%r8
|
|
+ je .L_num_blocks_is_2_amivrujEyduiFoi
|
|
+ cmpq $0x10,%r8
|
|
+ je .L_num_blocks_is_1_amivrujEyduiFoi
|
|
+
|
|
+.L_num_blocks_is_7_amivrujEyduiFoi:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,32(%rsp)
|
|
+ movq %rbx,32 + 8(%rsp)
|
|
+ vmovdqa 32(%rsp),%xmm11
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,48(%rsp)
|
|
+ movq %rbx,48 + 8(%rsp)
|
|
+ vmovdqa 48(%rsp),%xmm12
|
|
+ vmovdqu 48(%rdi),%xmm4
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,64(%rsp)
|
|
+ movq %rbx,64 + 8(%rsp)
|
|
+ vmovdqa 64(%rsp),%xmm13
|
|
+ vmovdqu 64(%rdi),%xmm5
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,80(%rsp)
|
|
+ movq %rbx,80 + 8(%rsp)
|
|
+ vmovdqa 80(%rsp),%xmm14
|
|
+ vmovdqu 80(%rdi),%xmm6
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,96(%rsp)
|
|
+ movq %rbx,96 + 8(%rsp)
|
|
+ vmovdqa 96(%rsp),%xmm15
|
|
+ vmovdqu 96(%rdi),%xmm7
|
|
+ addq $0x70,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_7_amivrujEyduiFoi
|
|
+
|
|
+.L_steal_cipher_7_amivrujEyduiFoi:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm15,%xmm16
|
|
+ vmovdqa 16(%rsp),%xmm15
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vpxor %xmm15,%xmm7,%xmm7
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm0,%xmm6,%xmm6
|
|
+ vpxor %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vaesdeclast %xmm0,%xmm6,%xmm6
|
|
+ vaesdeclast %xmm0,%xmm7,%xmm7
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vpxor %xmm15,%xmm7,%xmm7
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ vmovdqu %xmm5,64(%rsi)
|
|
+ vmovdqu %xmm6,80(%rsi)
|
|
+ addq $0x70,%rsi
|
|
+ vmovdqa64 %xmm16,%xmm0
|
|
+ vmovdqa %xmm7,%xmm8
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_7_amivrujEyduiFoi:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vpxor %xmm15,%xmm7,%xmm7
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm0,%xmm6,%xmm6
|
|
+ vpxor %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vaesdeclast %xmm0,%xmm6,%xmm6
|
|
+ vaesdeclast %xmm0,%xmm7,%xmm7
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vpxor %xmm15,%xmm7,%xmm7
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ vmovdqu %xmm5,64(%rsi)
|
|
+ vmovdqu %xmm6,80(%rsi)
|
|
+ addq $0x70,%rsi
|
|
+ vmovdqa %xmm7,%xmm8
|
|
+ jmp .L_done_amivrujEyduiFoi
|
|
+
|
|
+.L_num_blocks_is_6_amivrujEyduiFoi:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,32(%rsp)
|
|
+ movq %rbx,32 + 8(%rsp)
|
|
+ vmovdqa 32(%rsp),%xmm11
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,48(%rsp)
|
|
+ movq %rbx,48 + 8(%rsp)
|
|
+ vmovdqa 48(%rsp),%xmm12
|
|
+ vmovdqu 48(%rdi),%xmm4
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,64(%rsp)
|
|
+ movq %rbx,64 + 8(%rsp)
|
|
+ vmovdqa 64(%rsp),%xmm13
|
|
+ vmovdqu 64(%rdi),%xmm5
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,80(%rsp)
|
|
+ movq %rbx,80 + 8(%rsp)
|
|
+ vmovdqa 80(%rsp),%xmm14
|
|
+ vmovdqu 80(%rdi),%xmm6
|
|
+ addq $0x60,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_6_amivrujEyduiFoi
|
|
+
|
|
+.L_steal_cipher_6_amivrujEyduiFoi:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm14,%xmm15
|
|
+ vmovdqa 16(%rsp),%xmm14
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vaesdeclast %xmm0,%xmm6,%xmm6
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ vmovdqu %xmm5,64(%rsi)
|
|
+ addq $0x60,%rsi
|
|
+ vmovdqa %xmm15,%xmm0
|
|
+ vmovdqa %xmm6,%xmm8
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_6_amivrujEyduiFoi:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vaesdeclast %xmm0,%xmm6,%xmm6
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ vmovdqu %xmm5,64(%rsi)
|
|
+ addq $0x60,%rsi
|
|
+ vmovdqa %xmm6,%xmm8
|
|
+ jmp .L_done_amivrujEyduiFoi
|
|
+
|
|
+.L_num_blocks_is_5_amivrujEyduiFoi:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,32(%rsp)
|
|
+ movq %rbx,32 + 8(%rsp)
|
|
+ vmovdqa 32(%rsp),%xmm11
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,48(%rsp)
|
|
+ movq %rbx,48 + 8(%rsp)
|
|
+ vmovdqa 48(%rsp),%xmm12
|
|
+ vmovdqu 48(%rdi),%xmm4
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,64(%rsp)
|
|
+ movq %rbx,64 + 8(%rsp)
|
|
+ vmovdqa 64(%rsp),%xmm13
|
|
+ vmovdqu 64(%rdi),%xmm5
|
|
+ addq $0x50,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_5_amivrujEyduiFoi
|
|
+
|
|
+.L_steal_cipher_5_amivrujEyduiFoi:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm13,%xmm14
|
|
+ vmovdqa 16(%rsp),%xmm13
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ addq $0x50,%rsi
|
|
+ vmovdqa %xmm14,%xmm0
|
|
+ vmovdqa %xmm5,%xmm8
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_5_amivrujEyduiFoi:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ addq $0x50,%rsi
|
|
+ vmovdqa %xmm5,%xmm8
|
|
+ jmp .L_done_amivrujEyduiFoi
|
|
+
|
|
+.L_num_blocks_is_4_amivrujEyduiFoi:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,32(%rsp)
|
|
+ movq %rbx,32 + 8(%rsp)
|
|
+ vmovdqa 32(%rsp),%xmm11
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,48(%rsp)
|
|
+ movq %rbx,48 + 8(%rsp)
|
|
+ vmovdqa 48(%rsp),%xmm12
|
|
+ vmovdqu 48(%rdi),%xmm4
|
|
+ addq $0x40,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_4_amivrujEyduiFoi
|
|
+
|
|
+.L_steal_cipher_4_amivrujEyduiFoi:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm12,%xmm13
|
|
+ vmovdqa 16(%rsp),%xmm12
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ addq $0x40,%rsi
|
|
+ vmovdqa %xmm13,%xmm0
|
|
+ vmovdqa %xmm4,%xmm8
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_4_amivrujEyduiFoi:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ addq $0x40,%rsi
|
|
+ vmovdqa %xmm4,%xmm8
|
|
+ jmp .L_done_amivrujEyduiFoi
|
|
+
|
|
+.L_num_blocks_is_3_amivrujEyduiFoi:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,32(%rsp)
|
|
+ movq %rbx,32 + 8(%rsp)
|
|
+ vmovdqa 32(%rsp),%xmm11
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ addq $0x30,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_3_amivrujEyduiFoi
|
|
+
|
|
+.L_steal_cipher_3_amivrujEyduiFoi:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm11,%xmm12
|
|
+ vmovdqa 16(%rsp),%xmm11
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ addq $0x30,%rsi
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ vmovdqa %xmm3,%xmm8
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_3_amivrujEyduiFoi:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ addq $0x30,%rsi
|
|
+ vmovdqa %xmm3,%xmm8
|
|
+ jmp .L_done_amivrujEyduiFoi
|
|
+
|
|
+.L_num_blocks_is_2_amivrujEyduiFoi:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ addq $0x20,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_2_amivrujEyduiFoi
|
|
+
|
|
+.L_steal_cipher_2_amivrujEyduiFoi:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm10,%xmm11
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ addq $0x20,%rsi
|
|
+ vmovdqa %xmm11,%xmm0
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_2_amivrujEyduiFoi:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ addq $0x20,%rsi
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ jmp .L_done_amivrujEyduiFoi
|
|
+
|
|
+.L_num_blocks_is_1_amivrujEyduiFoi:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ addq $0x10,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_1_amivrujEyduiFoi
|
|
+
|
|
+.L_steal_cipher_1_amivrujEyduiFoi:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm9,%xmm10
|
|
+ vmovdqa 16(%rsp),%xmm9
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ addq $0x10,%rsi
|
|
+ vmovdqa %xmm10,%xmm0
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+ jmp .L_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+.L_done_1_amivrujEyduiFoi:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ addq $0x10,%rsi
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+ jmp .L_done_amivrujEyduiFoi
|
|
+.cfi_endproc
|
|
+.globl aesni_xts_256_encrypt_avx512
|
|
+.hidden aesni_xts_256_encrypt_avx512
|
|
+.type aesni_xts_256_encrypt_avx512,@function
|
|
+.align 32
|
|
+aesni_xts_256_encrypt_avx512:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ pushq %rbp
|
|
+ movq %rsp,%rbp
|
|
+ subq $136,%rsp
|
|
+ andq $0xffffffffffffffc0,%rsp
|
|
+ movq %rbx,128(%rsp)
|
|
+ movq $0x87,%r10
|
|
+ vmovdqu (%r9),%xmm1
|
|
+ vpxor (%r8),%xmm1,%xmm1
|
|
+ vaesenc 16(%r8),%xmm1,%xmm1
|
|
+ vaesenc 32(%r8),%xmm1,%xmm1
|
|
+ vaesenc 48(%r8),%xmm1,%xmm1
|
|
+ vaesenc 64(%r8),%xmm1,%xmm1
|
|
+ vaesenc 80(%r8),%xmm1,%xmm1
|
|
+ vaesenc 96(%r8),%xmm1,%xmm1
|
|
+ vaesenc 112(%r8),%xmm1,%xmm1
|
|
+ vaesenc 128(%r8),%xmm1,%xmm1
|
|
+ vaesenc 144(%r8),%xmm1,%xmm1
|
|
+ vaesenc 160(%r8),%xmm1,%xmm1
|
|
+ vaesenc 176(%r8),%xmm1,%xmm1
|
|
+ vaesenc 192(%r8),%xmm1,%xmm1
|
|
+ vaesenc 208(%r8),%xmm1,%xmm1
|
|
+ vaesenclast 224(%r8),%xmm1,%xmm1
|
|
+ vmovdqa %xmm1,(%rsp)
|
|
+
|
|
+ cmpq $0x80,%rdx
|
|
+ jl .L_less_than_128_bytes_wcpqaDvsGlbjGoe
|
|
+ vpbroadcastq %r10,%zmm25
|
|
+ cmpq $0x100,%rdx
|
|
+ jge .L_start_by16_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x80,%rdx
|
|
+ jge .L_start_by8_wcpqaDvsGlbjGoe
|
|
+
|
|
+.L_do_n_blocks_wcpqaDvsGlbjGoe:
|
|
+ cmpq $0x0,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x70,%rdx
|
|
+ jge .L_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x60,%rdx
|
|
+ jge .L_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x50,%rdx
|
|
+ jge .L_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x40,%rdx
|
|
+ jge .L_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x30,%rdx
|
|
+ jge .L_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x20,%rdx
|
|
+ jge .L_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x10,%rdx
|
|
+ jge .L_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe
|
|
+ vmovdqa %xmm0,%xmm8
|
|
+ vmovdqa %xmm9,%xmm0
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+
|
|
+.L_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe:
|
|
+ movq $0x0000ffffffffffff,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2{%k1}
|
|
+ addq $0x70,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi){%k1}
|
|
+ addq $0x70,%rsi
|
|
+ vextracti32x4 $0x2,%zmm2,%xmm8
|
|
+ vextracti32x4 $0x3,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+
|
|
+.L_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%ymm2
|
|
+ addq $0x60,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %ymm2,64(%rsi)
|
|
+ addq $0x60,%rsi
|
|
+ vextracti32x4 $0x1,%zmm2,%xmm8
|
|
+ vextracti32x4 $0x2,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+
|
|
+.L_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu 64(%rdi),%xmm2
|
|
+ addq $0x50,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu %xmm2,64(%rsi)
|
|
+ addq $0x50,%rsi
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ vextracti32x4 $0x1,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+
|
|
+.L_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ addq $0x40,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ addq $0x40,%rsi
|
|
+ vextracti32x4 $0x3,%zmm1,%xmm8
|
|
+ vmovdqa64 %xmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+.L_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe:
|
|
+ movq $-1,%r8
|
|
+ shrq $0x10,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vmovdqu8 (%rdi),%zmm1{%k1}
|
|
+ addq $0x30,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vmovdqu8 %zmm1,(%rsi){%k1}
|
|
+ addq $0x30,%rsi
|
|
+ vextracti32x4 $0x2,%zmm1,%xmm8
|
|
+ vextracti32x4 $0x3,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+.L_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 (%rdi),%ymm1
|
|
+ addq $0x20,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%ymm0
|
|
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
|
|
+ vbroadcasti32x4 16(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 32(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 48(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 64(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 80(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 96(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 112(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 128(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 144(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 160(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 176(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 192(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 208(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 224(%rcx),%ymm0
|
|
+ vaesenclast %ymm0,%ymm1,%ymm1
|
|
+ vpxorq %ymm9,%ymm1,%ymm1
|
|
+ vmovdqu %ymm1,(%rsi)
|
|
+ addq $0x20,%rsi
|
|
+ vextracti32x4 $0x1,%zmm1,%xmm8
|
|
+ vextracti32x4 $0x2,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+.L_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu (%rdi),%xmm1
|
|
+ addq $0x10,%rdi
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor (%rcx),%xmm1,%xmm1
|
|
+ vaesenc 16(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 32(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 48(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 64(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 80(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 96(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 112(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 128(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 144(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 160(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 176(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 192(%rcx),%xmm1,%xmm1
|
|
+ vaesenc 208(%rcx),%xmm1,%xmm1
|
|
+ vaesenclast 224(%rcx),%xmm1,%xmm1
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ addq $0x10,%rsi
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+
|
|
+
|
|
+.L_start_by16_wcpqaDvsGlbjGoe:
|
|
+ vbroadcasti32x4 (%rsp),%zmm0
|
|
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
|
|
+ movq $0xaa,%r8
|
|
+ kmovq %r8,%k2
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+ vpsrldq $0xf,%zmm9,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm9,%zmm11
|
|
+ vpxord %zmm14,%zmm11,%zmm11
|
|
+ vpsrldq $0xf,%zmm10,%zmm15
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16
|
|
+ vpslldq $0x1,%zmm10,%zmm12
|
|
+ vpxord %zmm16,%zmm12,%zmm12
|
|
+
|
|
+.L_main_loop_run_16_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2
|
|
+ vmovdqu8 128(%rdi),%zmm3
|
|
+ vmovdqu8 192(%rdi),%zmm4
|
|
+ addq $0x100,%rdi
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vpxorq %zmm11,%zmm3,%zmm3
|
|
+ vpxorq %zmm12,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm0,%zmm3,%zmm3
|
|
+ vpxorq %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm11,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm11,%zmm15
|
|
+ vpxord %zmm14,%zmm15,%zmm15
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm12,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm12,%zmm16
|
|
+ vpxord %zmm14,%zmm16,%zmm16
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm15,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm15,%zmm17
|
|
+ vpxord %zmm14,%zmm17,%zmm17
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm16,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm16,%zmm18
|
|
+ vpxord %zmm14,%zmm18,%zmm18
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vaesenc %zmm0,%zmm3,%zmm3
|
|
+ vaesenc %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vaesenclast %zmm0,%zmm3,%zmm3
|
|
+ vaesenclast %zmm0,%zmm4,%zmm4
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vpxorq %zmm11,%zmm3,%zmm3
|
|
+ vpxorq %zmm12,%zmm4,%zmm4
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqa32 %zmm17,%zmm11
|
|
+ vmovdqa32 %zmm18,%zmm12
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi)
|
|
+ vmovdqu8 %zmm3,128(%rsi)
|
|
+ vmovdqu8 %zmm4,192(%rsi)
|
|
+ addq $0x100,%rsi
|
|
+ subq $0x100,%rdx
|
|
+ cmpq $0x100,%rdx
|
|
+ jae .L_main_loop_run_16_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x80,%rdx
|
|
+ jae .L_main_loop_run_8_wcpqaDvsGlbjGoe
|
|
+ vextracti32x4 $0x3,%zmm4,%xmm0
|
|
+ jmp .L_do_n_blocks_wcpqaDvsGlbjGoe
|
|
+
|
|
+.L_start_by8_wcpqaDvsGlbjGoe:
|
|
+ vbroadcasti32x4 (%rsp),%zmm0
|
|
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
|
|
+ movq $0xaa,%r8
|
|
+ kmovq %r8,%k2
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+
|
|
+.L_main_loop_run_8_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2
|
|
+ addq $0x80,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vpsrldq $0xf,%zmm9,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm9,%zmm15
|
|
+ vpxord %zmm14,%zmm15,%zmm15
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+ vpsrldq $0xf,%zmm10,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm10,%zmm16
|
|
+ vpxord %zmm14,%zmm16,%zmm16
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi)
|
|
+ addq $0x80,%rsi
|
|
+ subq $0x80,%rdx
|
|
+ cmpq $0x80,%rdx
|
|
+ jae .L_main_loop_run_8_wcpqaDvsGlbjGoe
|
|
+ vextracti32x4 $0x3,%zmm2,%xmm0
|
|
+ jmp .L_do_n_blocks_wcpqaDvsGlbjGoe
|
|
+
|
|
+.L_steal_cipher_wcpqaDvsGlbjGoe:
|
|
+ vmovdqa %xmm8,%xmm2
|
|
+ leaq vpshufb_shf_table(%rip),%rax
|
|
+ vmovdqu (%rax,%rdx,1),%xmm10
|
|
+ vpshufb %xmm10,%xmm8,%xmm8
|
|
+ vmovdqu -16(%rdi,%rdx,1),%xmm3
|
|
+ vmovdqu %xmm8,-16(%rsi,%rdx,1)
|
|
+ leaq vpshufb_shf_table(%rip),%rax
|
|
+ addq $16,%rax
|
|
+ subq %rdx,%rax
|
|
+ vmovdqu (%rax),%xmm10
|
|
+ vpxor mask1(%rip),%xmm10,%xmm10
|
|
+ vpshufb %xmm10,%xmm3,%xmm3
|
|
+ vpblendvb %xmm10,%xmm2,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm3,%xmm8
|
|
+ vpxor (%rcx),%xmm8,%xmm8
|
|
+ vaesenc 16(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 32(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 48(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 64(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 80(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 96(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 112(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 128(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 144(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 160(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 176(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 192(%rcx),%xmm8,%xmm8
|
|
+ vaesenc 208(%rcx),%xmm8,%xmm8
|
|
+ vaesenclast 224(%rcx),%xmm8,%xmm8
|
|
+ vpxor %xmm0,%xmm8,%xmm8
|
|
+ vmovdqu %xmm8,-16(%rsi)
|
|
+.L_ret_wcpqaDvsGlbjGoe:
|
|
+ movq 128(%rsp),%rbx
|
|
+ xorq %r8,%r8
|
|
+ movq %r8,128(%rsp)
|
|
+
|
|
+ vpxorq %zmm0,%zmm0,%zmm0
|
|
+ movq %rbp,%rsp
|
|
+ popq %rbp
|
|
+ vzeroupper
|
|
+ .byte 0xf3,0xc3
|
|
+
|
|
+.L_less_than_128_bytes_wcpqaDvsGlbjGoe:
|
|
+ vpbroadcastq %r10,%zmm25
|
|
+ cmpq $0x10,%rdx
|
|
+ jb .L_ret_wcpqaDvsGlbjGoe
|
|
+ vbroadcasti32x4 (%rsp),%zmm0
|
|
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
|
|
+ movl $0xaa,%r8d
|
|
+ kmovq %r8,%k2
|
|
+ movq %rdx,%r8
|
|
+ andq $0x70,%r8
|
|
+ cmpq $0x60,%r8
|
|
+ je .L_num_blocks_is_6_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x50,%r8
|
|
+ je .L_num_blocks_is_5_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x40,%r8
|
|
+ je .L_num_blocks_is_4_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x30,%r8
|
|
+ je .L_num_blocks_is_3_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x20,%r8
|
|
+ je .L_num_blocks_is_2_wcpqaDvsGlbjGoe
|
|
+ cmpq $0x10,%r8
|
|
+ je .L_num_blocks_is_1_wcpqaDvsGlbjGoe
|
|
+
|
|
+.L_num_blocks_is_7_wcpqaDvsGlbjGoe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+ movq $0x0000ffffffffffff,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vmovdqu8 0(%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2{%k1}
|
|
+
|
|
+ addq $0x70,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,0(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi){%k1}
|
|
+ addq $0x70,%rsi
|
|
+ vextracti32x4 $0x2,%zmm2,%xmm8
|
|
+ vextracti32x4 $0x3,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+.L_num_blocks_is_6_wcpqaDvsGlbjGoe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+ vmovdqu8 0(%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%ymm2
|
|
+ addq $96,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,0(%rsi)
|
|
+ vmovdqu8 %ymm2,64(%rsi)
|
|
+ addq $96,%rsi
|
|
+
|
|
+ vextracti32x4 $0x1,%ymm2,%xmm8
|
|
+ vextracti32x4 $0x2,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+.L_num_blocks_is_5_wcpqaDvsGlbjGoe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+ vmovdqu8 0(%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%xmm2
|
|
+ addq $80,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vpternlogq $0x96,%zmm0,%zmm10,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vaesenc %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vaesenclast %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vmovdqu8 %zmm1,0(%rsi)
|
|
+ vmovdqu8 %xmm2,64(%rsi)
|
|
+ addq $80,%rsi
|
|
+
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ vextracti32x4 $0x1,%zmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+.L_num_blocks_is_4_wcpqaDvsGlbjGoe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+ vmovdqu8 0(%rdi),%zmm1
|
|
+ addq $64,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vmovdqu8 %zmm1,0(%rsi)
|
|
+ addq $64,%rsi
|
|
+ vextracti32x4 $0x3,%zmm1,%xmm8
|
|
+ vmovdqa %xmm10,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+.L_num_blocks_is_3_wcpqaDvsGlbjGoe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+ movq $0x0000ffffffffffff,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vmovdqu8 0(%rdi),%zmm1{%k1}
|
|
+ addq $48,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpternlogq $0x96,%zmm0,%zmm9,%zmm1
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesenc %zmm0,%zmm1,%zmm1
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesenclast %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vmovdqu8 %zmm1,0(%rsi){%k1}
|
|
+ addq $48,%rsi
|
|
+ vextracti32x4 $2,%zmm1,%xmm8
|
|
+ vextracti32x4 $3,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+.L_num_blocks_is_2_wcpqaDvsGlbjGoe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+
|
|
+ vmovdqu8 0(%rdi),%ymm1
|
|
+ addq $32,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%ymm0
|
|
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
|
|
+ vbroadcasti32x4 16(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 32(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 48(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 64(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 80(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 96(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 112(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 128(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 144(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 160(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 176(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 192(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 208(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 224(%rcx),%ymm0
|
|
+ vaesenclast %ymm0,%ymm1,%ymm1
|
|
+ vpxorq %ymm9,%ymm1,%ymm1
|
|
+ vmovdqu8 %ymm1,0(%rsi)
|
|
+ addq $32,%rsi
|
|
+
|
|
+ vextracti32x4 $1,%ymm1,%xmm8
|
|
+ vextracti32x4 $2,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+.L_num_blocks_is_1_wcpqaDvsGlbjGoe:
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+
|
|
+ vmovdqu8 0(%rdi),%xmm1
|
|
+ addq $16,%rdi
|
|
+ vbroadcasti32x4 (%rcx),%ymm0
|
|
+ vpternlogq $0x96,%ymm0,%ymm9,%ymm1
|
|
+ vbroadcasti32x4 16(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 32(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 48(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 64(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 80(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 96(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 112(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 128(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 144(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 160(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 176(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 192(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 208(%rcx),%ymm0
|
|
+ vaesenc %ymm0,%ymm1,%ymm1
|
|
+ vbroadcasti32x4 224(%rcx),%ymm0
|
|
+ vaesenclast %ymm0,%ymm1,%ymm1
|
|
+ vpxorq %ymm9,%ymm1,%ymm1
|
|
+ vmovdqu8 %xmm1,0(%rsi)
|
|
+ addq $16,%rsi
|
|
+
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+ vextracti32x4 $1,%zmm9,%xmm0
|
|
+ andq $0xf,%rdx
|
|
+ je .L_ret_wcpqaDvsGlbjGoe
|
|
+ jmp .L_steal_cipher_wcpqaDvsGlbjGoe
|
|
+.cfi_endproc
|
|
+.globl aesni_xts_256_decrypt_avx512
|
|
+.hidden aesni_xts_256_decrypt_avx512
|
|
+.type aesni_xts_256_decrypt_avx512,@function
|
|
+.align 32
|
|
+aesni_xts_256_decrypt_avx512:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ pushq %rbp
|
|
+ movq %rsp,%rbp
|
|
+ subq $136,%rsp
|
|
+ andq $0xffffffffffffffc0,%rsp
|
|
+ movq %rbx,128(%rsp)
|
|
+ movq $0x87,%r10
|
|
+ vmovdqu (%r9),%xmm1
|
|
+ vpxor (%r8),%xmm1,%xmm1
|
|
+ vaesenc 16(%r8),%xmm1,%xmm1
|
|
+ vaesenc 32(%r8),%xmm1,%xmm1
|
|
+ vaesenc 48(%r8),%xmm1,%xmm1
|
|
+ vaesenc 64(%r8),%xmm1,%xmm1
|
|
+ vaesenc 80(%r8),%xmm1,%xmm1
|
|
+ vaesenc 96(%r8),%xmm1,%xmm1
|
|
+ vaesenc 112(%r8),%xmm1,%xmm1
|
|
+ vaesenc 128(%r8),%xmm1,%xmm1
|
|
+ vaesenc 144(%r8),%xmm1,%xmm1
|
|
+ vaesenc 160(%r8),%xmm1,%xmm1
|
|
+ vaesenc 176(%r8),%xmm1,%xmm1
|
|
+ vaesenc 192(%r8),%xmm1,%xmm1
|
|
+ vaesenc 208(%r8),%xmm1,%xmm1
|
|
+ vaesenclast 224(%r8),%xmm1,%xmm1
|
|
+ vmovdqa %xmm1,(%rsp)
|
|
+
|
|
+ cmpq $0x80,%rdx
|
|
+ jb .L_less_than_128_bytes_EmbgEptodyewbFa
|
|
+ vpbroadcastq %r10,%zmm25
|
|
+ cmpq $0x100,%rdx
|
|
+ jge .L_start_by16_EmbgEptodyewbFa
|
|
+ jmp .L_start_by8_EmbgEptodyewbFa
|
|
+
|
|
+.L_do_n_blocks_EmbgEptodyewbFa:
|
|
+ cmpq $0x0,%rdx
|
|
+ je .L_ret_EmbgEptodyewbFa
|
|
+ cmpq $0x70,%rdx
|
|
+ jge .L_remaining_num_blocks_is_7_EmbgEptodyewbFa
|
|
+ cmpq $0x60,%rdx
|
|
+ jge .L_remaining_num_blocks_is_6_EmbgEptodyewbFa
|
|
+ cmpq $0x50,%rdx
|
|
+ jge .L_remaining_num_blocks_is_5_EmbgEptodyewbFa
|
|
+ cmpq $0x40,%rdx
|
|
+ jge .L_remaining_num_blocks_is_4_EmbgEptodyewbFa
|
|
+ cmpq $0x30,%rdx
|
|
+ jge .L_remaining_num_blocks_is_3_EmbgEptodyewbFa
|
|
+ cmpq $0x20,%rdx
|
|
+ jge .L_remaining_num_blocks_is_2_EmbgEptodyewbFa
|
|
+ cmpq $0x10,%rdx
|
|
+ jge .L_remaining_num_blocks_is_1_EmbgEptodyewbFa
|
|
+
|
|
+
|
|
+ vmovdqu %xmm5,%xmm1
|
|
+
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu %xmm1,-16(%rsi)
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+
|
|
+
|
|
+ movq $0x1,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vpsllq $0x3f,%xmm9,%xmm13
|
|
+ vpsraq $0x3f,%xmm13,%xmm14
|
|
+ vpandq %xmm25,%xmm14,%xmm5
|
|
+ vpxorq %xmm5,%xmm9,%xmm9{%k1}
|
|
+ vpsrldq $0x8,%xmm9,%xmm10
|
|
+.byte 98, 211, 181, 8, 115, 194, 1
|
|
+ vpslldq $0x8,%xmm13,%xmm13
|
|
+ vpxorq %xmm13,%xmm0,%xmm0
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_remaining_num_blocks_is_7_EmbgEptodyewbFa:
|
|
+ movq $0xffffffffffffffff,%r8
|
|
+ shrq $0x10,%r8
|
|
+ kmovq %r8,%k1
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2{%k1}
|
|
+ addq $0x70,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_7_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 $0x2,%zmm10,%xmm12
|
|
+ vextracti32x4 $0x3,%zmm10,%xmm13
|
|
+ vinserti32x4 $0x2,%xmm13,%zmm10,%zmm10
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi){%k1}
|
|
+ addq $0x70,%rsi
|
|
+ vextracti32x4 $0x2,%zmm2,%xmm8
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_7_remain_EmbgEptodyewbFa:
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi){%k1}
|
|
+ jmp .L_ret_EmbgEptodyewbFa
|
|
+
|
|
+.L_remaining_num_blocks_is_6_EmbgEptodyewbFa:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%ymm2
|
|
+ addq $0x60,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_6_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 $0x1,%zmm10,%xmm12
|
|
+ vextracti32x4 $0x2,%zmm10,%xmm13
|
|
+ vinserti32x4 $0x1,%xmm13,%zmm10,%zmm10
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %ymm2,64(%rsi)
|
|
+ addq $0x60,%rsi
|
|
+ vextracti32x4 $0x1,%zmm2,%xmm8
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_6_remain_EmbgEptodyewbFa:
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %ymm2,64(%rsi)
|
|
+ jmp .L_ret_EmbgEptodyewbFa
|
|
+
|
|
+.L_remaining_num_blocks_is_5_EmbgEptodyewbFa:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu 64(%rdi),%xmm2
|
|
+ addq $0x50,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_5_remain_EmbgEptodyewbFa
|
|
+ vmovdqa %xmm10,%xmm12
|
|
+ vextracti32x4 $0x1,%zmm10,%xmm10
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu %xmm2,64(%rsi)
|
|
+ addq $0x50,%rsi
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_5_remain_EmbgEptodyewbFa:
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %xmm2,64(%rsi)
|
|
+ jmp .L_ret_EmbgEptodyewbFa
|
|
+
|
|
+.L_remaining_num_blocks_is_4_EmbgEptodyewbFa:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ addq $0x40,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_4_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 $0x3,%zmm9,%xmm12
|
|
+ vinserti32x4 $0x3,%xmm10,%zmm9,%zmm9
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ addq $0x40,%rsi
|
|
+ vextracti32x4 $0x3,%zmm1,%xmm8
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_4_remain_EmbgEptodyewbFa:
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ jmp .L_ret_EmbgEptodyewbFa
|
|
+
|
|
+.L_remaining_num_blocks_is_3_EmbgEptodyewbFa:
|
|
+ vmovdqu (%rdi),%xmm1
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ addq $0x30,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_3_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 $0x2,%zmm9,%xmm13
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm10
|
|
+ vextracti32x4 $0x3,%zmm9,%xmm11
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ addq $0x30,%rsi
|
|
+ vmovdqa %xmm3,%xmm8
|
|
+ vmovdqa %xmm13,%xmm0
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_3_remain_EmbgEptodyewbFa:
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm10
|
|
+ vextracti32x4 $0x2,%zmm9,%xmm11
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ jmp .L_ret_EmbgEptodyewbFa
|
|
+
|
|
+.L_remaining_num_blocks_is_2_EmbgEptodyewbFa:
|
|
+ vmovdqu (%rdi),%xmm1
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ addq $0x20,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_2_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 $0x2,%zmm9,%xmm10
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm12
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ addq $0x20,%rsi
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_2_remain_EmbgEptodyewbFa:
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm10
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ jmp .L_ret_EmbgEptodyewbFa
|
|
+
|
|
+.L_remaining_num_blocks_is_1_EmbgEptodyewbFa:
|
|
+ vmovdqu (%rdi),%xmm1
|
|
+ addq $0x10,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_1_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 $0x1,%zmm9,%xmm11
|
|
+ vpxor %xmm11,%xmm1,%xmm1
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm11,%xmm1,%xmm1
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ addq $0x10,%rsi
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+ vmovdqa %xmm9,%xmm0
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_1_remain_EmbgEptodyewbFa:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ jmp .L_ret_EmbgEptodyewbFa
|
|
+
|
|
+.L_start_by16_EmbgEptodyewbFa:
|
|
+ vbroadcasti32x4 (%rsp),%zmm0
|
|
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
|
|
+ movq $0xaa,%r8
|
|
+ kmovq %r8,%k2
|
|
+
|
|
+
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+
|
|
+
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+
|
|
+
|
|
+ vpsrldq $0xf,%zmm9,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm9,%zmm11
|
|
+ vpxord %zmm14,%zmm11,%zmm11
|
|
+
|
|
+ vpsrldq $0xf,%zmm10,%zmm15
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16
|
|
+ vpslldq $0x1,%zmm10,%zmm12
|
|
+ vpxord %zmm16,%zmm12,%zmm12
|
|
+
|
|
+.L_main_loop_run_16_EmbgEptodyewbFa:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2
|
|
+ vmovdqu8 128(%rdi),%zmm3
|
|
+ vmovdqu8 192(%rdi),%zmm4
|
|
+ vmovdqu8 240(%rdi),%xmm5
|
|
+ addq $0x100,%rdi
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vpxorq %zmm11,%zmm3,%zmm3
|
|
+ vpxorq %zmm12,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vpxorq %zmm0,%zmm3,%zmm3
|
|
+ vpxorq %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm11,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm11,%zmm15
|
|
+ vpxord %zmm14,%zmm15,%zmm15
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm12,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm12,%zmm16
|
|
+ vpxord %zmm14,%zmm16,%zmm16
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm15,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm15,%zmm17
|
|
+ vpxord %zmm14,%zmm17,%zmm17
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vpsrldq $0xf,%zmm16,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm16,%zmm18
|
|
+ vpxord %zmm14,%zmm18,%zmm18
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vaesdec %zmm0,%zmm3,%zmm3
|
|
+ vaesdec %zmm0,%zmm4,%zmm4
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+ vaesdeclast %zmm0,%zmm3,%zmm3
|
|
+ vaesdeclast %zmm0,%zmm4,%zmm4
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+ vpxorq %zmm11,%zmm3,%zmm3
|
|
+ vpxorq %zmm12,%zmm4,%zmm4
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqa32 %zmm17,%zmm11
|
|
+ vmovdqa32 %zmm18,%zmm12
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi)
|
|
+ vmovdqu8 %zmm3,128(%rsi)
|
|
+ vmovdqu8 %zmm4,192(%rsi)
|
|
+ addq $0x100,%rsi
|
|
+ subq $0x100,%rdx
|
|
+ cmpq $0x100,%rdx
|
|
+ jge .L_main_loop_run_16_EmbgEptodyewbFa
|
|
+
|
|
+ cmpq $0x80,%rdx
|
|
+ jge .L_main_loop_run_8_EmbgEptodyewbFa
|
|
+ jmp .L_do_n_blocks_EmbgEptodyewbFa
|
|
+
|
|
+.L_start_by8_EmbgEptodyewbFa:
|
|
+
|
|
+ vbroadcasti32x4 (%rsp),%zmm0
|
|
+ vbroadcasti32x4 shufb_15_7(%rip),%zmm8
|
|
+ movq $0xaa,%r8
|
|
+ kmovq %r8,%k2
|
|
+
|
|
+
|
|
+ vpshufb %zmm8,%zmm0,%zmm1
|
|
+ vpsllvq const_dq3210(%rip),%zmm0,%zmm4
|
|
+ vpsrlvq const_dq5678(%rip),%zmm1,%zmm2
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3
|
|
+ vpxorq %zmm2,%zmm4,%zmm4{%k2}
|
|
+ vpxord %zmm4,%zmm3,%zmm9
|
|
+
|
|
+
|
|
+ vpsllvq const_dq7654(%rip),%zmm0,%zmm5
|
|
+ vpsrlvq const_dq1234(%rip),%zmm1,%zmm6
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7
|
|
+ vpxorq %zmm6,%zmm5,%zmm5{%k2}
|
|
+ vpxord %zmm5,%zmm7,%zmm10
|
|
+
|
|
+.L_main_loop_run_8_EmbgEptodyewbFa:
|
|
+ vmovdqu8 (%rdi),%zmm1
|
|
+ vmovdqu8 64(%rdi),%zmm2
|
|
+ vmovdqu8 112(%rdi),%xmm5
|
|
+ addq $0x80,%rdi
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 (%rcx),%zmm0
|
|
+ vpxorq %zmm0,%zmm1,%zmm1
|
|
+ vpxorq %zmm0,%zmm2,%zmm2
|
|
+ vpsrldq $0xf,%zmm9,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm9,%zmm15
|
|
+ vpxord %zmm14,%zmm15,%zmm15
|
|
+ vbroadcasti32x4 16(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 32(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 48(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+ vpsrldq $0xf,%zmm10,%zmm13
|
|
+ vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14
|
|
+ vpslldq $0x1,%zmm10,%zmm16
|
|
+ vpxord %zmm14,%zmm16,%zmm16
|
|
+
|
|
+ vbroadcasti32x4 64(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 80(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 96(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 112(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 128(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 144(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 160(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 176(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 192(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 208(%rcx),%zmm0
|
|
+ vaesdec %zmm0,%zmm1,%zmm1
|
|
+ vaesdec %zmm0,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 224(%rcx),%zmm0
|
|
+ vaesdeclast %zmm0,%zmm1,%zmm1
|
|
+ vaesdeclast %zmm0,%zmm2,%zmm2
|
|
+
|
|
+ vpxorq %zmm9,%zmm1,%zmm1
|
|
+ vpxorq %zmm10,%zmm2,%zmm2
|
|
+
|
|
+
|
|
+ vmovdqa32 %zmm15,%zmm9
|
|
+ vmovdqa32 %zmm16,%zmm10
|
|
+ vmovdqu8 %zmm1,(%rsi)
|
|
+ vmovdqu8 %zmm2,64(%rsi)
|
|
+ addq $0x80,%rsi
|
|
+ subq $0x80,%rdx
|
|
+ cmpq $0x80,%rdx
|
|
+ jge .L_main_loop_run_8_EmbgEptodyewbFa
|
|
+ jmp .L_do_n_blocks_EmbgEptodyewbFa
|
|
+
|
|
+.L_steal_cipher_EmbgEptodyewbFa:
|
|
+
|
|
+ vmovdqa %xmm8,%xmm2
|
|
+
|
|
+
|
|
+ leaq vpshufb_shf_table(%rip),%rax
|
|
+ vmovdqu (%rax,%rdx,1),%xmm10
|
|
+ vpshufb %xmm10,%xmm8,%xmm8
|
|
+
|
|
+
|
|
+ vmovdqu -16(%rdi,%rdx,1),%xmm3
|
|
+ vmovdqu %xmm8,-16(%rsi,%rdx,1)
|
|
+
|
|
+
|
|
+ leaq vpshufb_shf_table(%rip),%rax
|
|
+ addq $16,%rax
|
|
+ subq %rdx,%rax
|
|
+ vmovdqu (%rax),%xmm10
|
|
+ vpxor mask1(%rip),%xmm10,%xmm10
|
|
+ vpshufb %xmm10,%xmm3,%xmm3
|
|
+
|
|
+ vpblendvb %xmm10,%xmm2,%xmm3,%xmm3
|
|
+
|
|
+
|
|
+ vpxor %xmm0,%xmm3,%xmm8
|
|
+
|
|
+
|
|
+ vpxor (%rcx),%xmm8,%xmm8
|
|
+ vaesdec 16(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 32(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 48(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 64(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 80(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 96(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 112(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 128(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 144(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 160(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 176(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 192(%rcx),%xmm8,%xmm8
|
|
+ vaesdec 208(%rcx),%xmm8,%xmm8
|
|
+ vaesdeclast 224(%rcx),%xmm8,%xmm8
|
|
+
|
|
+ vpxor %xmm0,%xmm8,%xmm8
|
|
+
|
|
+.L_done_EmbgEptodyewbFa:
|
|
+
|
|
+ vmovdqu %xmm8,-16(%rsi)
|
|
+.L_ret_EmbgEptodyewbFa:
|
|
+ movq 128(%rsp),%rbx
|
|
+ xorq %r8,%r8
|
|
+ movq %r8,128(%rsp)
|
|
+
|
|
+ vpxorq %zmm0,%zmm0,%zmm0
|
|
+ movq %rbp,%rsp
|
|
+ popq %rbp
|
|
+ vzeroupper
|
|
+ .byte 0xf3,0xc3
|
|
+
|
|
+.L_less_than_128_bytes_EmbgEptodyewbFa:
|
|
+ cmpq $0x10,%rdx
|
|
+ jb .L_ret_EmbgEptodyewbFa
|
|
+
|
|
+ movq %rdx,%r8
|
|
+ andq $0x70,%r8
|
|
+ cmpq $0x60,%r8
|
|
+ je .L_num_blocks_is_6_EmbgEptodyewbFa
|
|
+ cmpq $0x50,%r8
|
|
+ je .L_num_blocks_is_5_EmbgEptodyewbFa
|
|
+ cmpq $0x40,%r8
|
|
+ je .L_num_blocks_is_4_EmbgEptodyewbFa
|
|
+ cmpq $0x30,%r8
|
|
+ je .L_num_blocks_is_3_EmbgEptodyewbFa
|
|
+ cmpq $0x20,%r8
|
|
+ je .L_num_blocks_is_2_EmbgEptodyewbFa
|
|
+ cmpq $0x10,%r8
|
|
+ je .L_num_blocks_is_1_EmbgEptodyewbFa
|
|
+
|
|
+.L_num_blocks_is_7_EmbgEptodyewbFa:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,32(%rsp)
|
|
+ movq %rbx,32 + 8(%rsp)
|
|
+ vmovdqa 32(%rsp),%xmm11
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,48(%rsp)
|
|
+ movq %rbx,48 + 8(%rsp)
|
|
+ vmovdqa 48(%rsp),%xmm12
|
|
+ vmovdqu 48(%rdi),%xmm4
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,64(%rsp)
|
|
+ movq %rbx,64 + 8(%rsp)
|
|
+ vmovdqa 64(%rsp),%xmm13
|
|
+ vmovdqu 64(%rdi),%xmm5
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,80(%rsp)
|
|
+ movq %rbx,80 + 8(%rsp)
|
|
+ vmovdqa 80(%rsp),%xmm14
|
|
+ vmovdqu 80(%rdi),%xmm6
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,96(%rsp)
|
|
+ movq %rbx,96 + 8(%rsp)
|
|
+ vmovdqa 96(%rsp),%xmm15
|
|
+ vmovdqu 96(%rdi),%xmm7
|
|
+ addq $0x70,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_7_EmbgEptodyewbFa
|
|
+
|
|
+.L_steal_cipher_7_EmbgEptodyewbFa:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm15,%xmm16
|
|
+ vmovdqa 16(%rsp),%xmm15
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vpxor %xmm15,%xmm7,%xmm7
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm0,%xmm6,%xmm6
|
|
+ vpxor %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vaesdeclast %xmm0,%xmm6,%xmm6
|
|
+ vaesdeclast %xmm0,%xmm7,%xmm7
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vpxor %xmm15,%xmm7,%xmm7
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ vmovdqu %xmm5,64(%rsi)
|
|
+ vmovdqu %xmm6,80(%rsi)
|
|
+ addq $0x70,%rsi
|
|
+ vmovdqa64 %xmm16,%xmm0
|
|
+ vmovdqa %xmm7,%xmm8
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_7_EmbgEptodyewbFa:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vpxor %xmm15,%xmm7,%xmm7
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm0,%xmm6,%xmm6
|
|
+ vpxor %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vaesdec %xmm0,%xmm7,%xmm7
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vaesdeclast %xmm0,%xmm6,%xmm6
|
|
+ vaesdeclast %xmm0,%xmm7,%xmm7
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vpxor %xmm15,%xmm7,%xmm7
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ vmovdqu %xmm5,64(%rsi)
|
|
+ vmovdqu %xmm6,80(%rsi)
|
|
+ addq $0x70,%rsi
|
|
+ vmovdqa %xmm7,%xmm8
|
|
+ jmp .L_done_EmbgEptodyewbFa
|
|
+
|
|
+.L_num_blocks_is_6_EmbgEptodyewbFa:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,32(%rsp)
|
|
+ movq %rbx,32 + 8(%rsp)
|
|
+ vmovdqa 32(%rsp),%xmm11
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,48(%rsp)
|
|
+ movq %rbx,48 + 8(%rsp)
|
|
+ vmovdqa 48(%rsp),%xmm12
|
|
+ vmovdqu 48(%rdi),%xmm4
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,64(%rsp)
|
|
+ movq %rbx,64 + 8(%rsp)
|
|
+ vmovdqa 64(%rsp),%xmm13
|
|
+ vmovdqu 64(%rdi),%xmm5
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,80(%rsp)
|
|
+ movq %rbx,80 + 8(%rsp)
|
|
+ vmovdqa 80(%rsp),%xmm14
|
|
+ vmovdqu 80(%rdi),%xmm6
|
|
+ addq $0x60,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_6_EmbgEptodyewbFa
|
|
+
|
|
+.L_steal_cipher_6_EmbgEptodyewbFa:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm14,%xmm15
|
|
+ vmovdqa 16(%rsp),%xmm14
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vaesdeclast %xmm0,%xmm6,%xmm6
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ vmovdqu %xmm5,64(%rsi)
|
|
+ addq $0x60,%rsi
|
|
+ vmovdqa %xmm15,%xmm0
|
|
+ vmovdqa %xmm6,%xmm8
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_6_EmbgEptodyewbFa:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vaesdec %xmm0,%xmm6,%xmm6
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vaesdeclast %xmm0,%xmm6,%xmm6
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vpxor %xmm14,%xmm6,%xmm6
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ vmovdqu %xmm5,64(%rsi)
|
|
+ addq $0x60,%rsi
|
|
+ vmovdqa %xmm6,%xmm8
|
|
+ jmp .L_done_EmbgEptodyewbFa
|
|
+
|
|
+.L_num_blocks_is_5_EmbgEptodyewbFa:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,32(%rsp)
|
|
+ movq %rbx,32 + 8(%rsp)
|
|
+ vmovdqa 32(%rsp),%xmm11
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,48(%rsp)
|
|
+ movq %rbx,48 + 8(%rsp)
|
|
+ vmovdqa 48(%rsp),%xmm12
|
|
+ vmovdqu 48(%rdi),%xmm4
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,64(%rsp)
|
|
+ movq %rbx,64 + 8(%rsp)
|
|
+ vmovdqa 64(%rsp),%xmm13
|
|
+ vmovdqu 64(%rdi),%xmm5
|
|
+ addq $0x50,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_5_EmbgEptodyewbFa
|
|
+
|
|
+.L_steal_cipher_5_EmbgEptodyewbFa:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm13,%xmm14
|
|
+ vmovdqa 16(%rsp),%xmm13
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ addq $0x50,%rsi
|
|
+ vmovdqa %xmm14,%xmm0
|
|
+ vmovdqa %xmm5,%xmm8
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_5_EmbgEptodyewbFa:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vaesdec %xmm0,%xmm5,%xmm5
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vaesdeclast %xmm0,%xmm5,%xmm5
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vpxor %xmm13,%xmm5,%xmm5
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ vmovdqu %xmm4,48(%rsi)
|
|
+ addq $0x50,%rsi
|
|
+ vmovdqa %xmm5,%xmm8
|
|
+ jmp .L_done_EmbgEptodyewbFa
|
|
+
|
|
+.L_num_blocks_is_4_EmbgEptodyewbFa:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,32(%rsp)
|
|
+ movq %rbx,32 + 8(%rsp)
|
|
+ vmovdqa 32(%rsp),%xmm11
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,48(%rsp)
|
|
+ movq %rbx,48 + 8(%rsp)
|
|
+ vmovdqa 48(%rsp),%xmm12
|
|
+ vmovdqu 48(%rdi),%xmm4
|
|
+ addq $0x40,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_4_EmbgEptodyewbFa
|
|
+
|
|
+.L_steal_cipher_4_EmbgEptodyewbFa:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm12,%xmm13
|
|
+ vmovdqa 16(%rsp),%xmm12
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ addq $0x40,%rsi
|
|
+ vmovdqa %xmm13,%xmm0
|
|
+ vmovdqa %xmm4,%xmm8
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_4_EmbgEptodyewbFa:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vaesdec %xmm0,%xmm4,%xmm4
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vaesdeclast %xmm0,%xmm4,%xmm4
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vpxor %xmm12,%xmm4,%xmm4
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ vmovdqu %xmm3,32(%rsi)
|
|
+ addq $0x40,%rsi
|
|
+ vmovdqa %xmm4,%xmm8
|
|
+ jmp .L_done_EmbgEptodyewbFa
|
|
+
|
|
+.L_num_blocks_is_3_EmbgEptodyewbFa:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,32(%rsp)
|
|
+ movq %rbx,32 + 8(%rsp)
|
|
+ vmovdqa 32(%rsp),%xmm11
|
|
+ vmovdqu 32(%rdi),%xmm3
|
|
+ addq $0x30,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_3_EmbgEptodyewbFa
|
|
+
|
|
+.L_steal_cipher_3_EmbgEptodyewbFa:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm11,%xmm12
|
|
+ vmovdqa 16(%rsp),%xmm11
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ addq $0x30,%rsi
|
|
+ vmovdqa %xmm12,%xmm0
|
|
+ vmovdqa %xmm3,%xmm8
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_3_EmbgEptodyewbFa:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vaesdec %xmm0,%xmm3,%xmm3
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vaesdeclast %xmm0,%xmm3,%xmm3
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vpxor %xmm11,%xmm3,%xmm3
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ vmovdqu %xmm2,16(%rsi)
|
|
+ addq $0x30,%rsi
|
|
+ vmovdqa %xmm3,%xmm8
|
|
+ jmp .L_done_EmbgEptodyewbFa
|
|
+
|
|
+.L_num_blocks_is_2_EmbgEptodyewbFa:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,16 + 8(%rsp)
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vmovdqu 16(%rdi),%xmm2
|
|
+ addq $0x20,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_2_EmbgEptodyewbFa
|
|
+
|
|
+.L_steal_cipher_2_EmbgEptodyewbFa:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm10,%xmm11
|
|
+ vmovdqa 16(%rsp),%xmm10
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ addq $0x20,%rsi
|
|
+ vmovdqa %xmm11,%xmm0
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_2_EmbgEptodyewbFa:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vaesdec %xmm0,%xmm2,%xmm2
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vaesdeclast %xmm0,%xmm2,%xmm2
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vpxor %xmm10,%xmm2,%xmm2
|
|
+ vmovdqu %xmm1,(%rsi)
|
|
+ addq $0x20,%rsi
|
|
+ vmovdqa %xmm2,%xmm8
|
|
+ jmp .L_done_EmbgEptodyewbFa
|
|
+
|
|
+.L_num_blocks_is_1_EmbgEptodyewbFa:
|
|
+ vmovdqa 0(%rsp),%xmm9
|
|
+ movq 0(%rsp),%rax
|
|
+ movq 8(%rsp),%rbx
|
|
+ vmovdqu 0(%rdi),%xmm1
|
|
+ addq $0x10,%rdi
|
|
+ andq $0xf,%rdx
|
|
+ je .L_done_1_EmbgEptodyewbFa
|
|
+
|
|
+.L_steal_cipher_1_EmbgEptodyewbFa:
|
|
+ xorq %r11,%r11
|
|
+ shlq $1,%rax
|
|
+ adcq %rbx,%rbx
|
|
+ cmovcq %r10,%r11
|
|
+ xorq %r11,%rax
|
|
+ movq %rax,16(%rsp)
|
|
+ movq %rbx,24(%rsp)
|
|
+ vmovdqa64 %xmm9,%xmm10
|
|
+ vmovdqa 16(%rsp),%xmm9
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ addq $0x10,%rsi
|
|
+ vmovdqa %xmm10,%xmm0
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+ jmp .L_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+.L_done_1_EmbgEptodyewbFa:
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ vmovdqu (%rcx),%xmm0
|
|
+ vpxor %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 16(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 32(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 48(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 64(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 80(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 96(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 112(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 128(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 144(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 160(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 176(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 192(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 208(%rcx),%xmm0
|
|
+ vaesdec %xmm0,%xmm1,%xmm1
|
|
+ vmovdqu 224(%rcx),%xmm0
|
|
+ vaesdeclast %xmm0,%xmm1,%xmm1
|
|
+ vpxor %xmm9,%xmm1,%xmm1
|
|
+ addq $0x10,%rsi
|
|
+ vmovdqa %xmm1,%xmm8
|
|
+ jmp .L_done_EmbgEptodyewbFa
|
|
+.cfi_endproc
|
|
+.section .rodata
|
|
+.align 16
|
|
+
|
|
+vpshufb_shf_table:
|
|
+.quad 0x8786858483828100, 0x8f8e8d8c8b8a8988
|
|
+.quad 0x0706050403020100, 0x000e0d0c0b0a0908
|
|
+
|
|
+mask1:
|
|
+.quad 0x8080808080808080, 0x8080808080808080
|
|
+
|
|
+const_dq3210:
|
|
+.quad 0, 0, 1, 1, 2, 2, 3, 3
|
|
+const_dq5678:
|
|
+.quad 8, 8, 7, 7, 6, 6, 5, 5
|
|
+const_dq7654:
|
|
+.quad 4, 4, 5, 5, 6, 6, 7, 7
|
|
+const_dq1234:
|
|
+.quad 4, 4, 3, 3, 2, 2, 1, 1
|
|
+
|
|
+shufb_15_7:
|
|
+.byte 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 7, 0xff, 0xff
|
|
+.byte 0xff, 0xff, 0xff, 0xff, 0xff
|
|
+
|
|
+.text
|
|
+ .section ".note.gnu.property", "a"
|
|
+ .p2align 3
|
|
+ .long 1f - 0f
|
|
+ .long 4f - 1f
|
|
+ .long 5
|
|
+0:
|
|
+ # "GNU" encoded with .byte, since .asciz isn't supported
|
|
+ # on Solaris.
|
|
+ .byte 0x47
|
|
+ .byte 0x4e
|
|
+ .byte 0x55
|
|
+ .byte 0
|
|
+1:
|
|
+ .p2align 3
|
|
+ .long 0xc0000002
|
|
+ .long 3f - 2f
|
|
+2:
|
|
+ .long 3
|
|
+3:
|
|
+ .p2align 3
|
|
+4:
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s
|
|
new file mode 100644
|
|
index 0000000000..7e08b44dde
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s
|
|
@@ -0,0 +1,1167 @@
|
|
+.text
|
|
+
|
|
+.globl ossl_rsaz_avxifma_eligible
|
|
+.type ossl_rsaz_avxifma_eligible,@function
|
|
+.align 32
|
|
+ossl_rsaz_avxifma_eligible:
|
|
+ movl OPENSSL_ia32cap_P+20(%rip),%ecx
|
|
+ xorl %eax,%eax
|
|
+ andl $8388608,%ecx
|
|
+ cmpl $8388608,%ecx
|
|
+ cmovel %ecx,%eax
|
|
+ .byte 0xf3,0xc3
|
|
+.size ossl_rsaz_avxifma_eligible, .-ossl_rsaz_avxifma_eligible
|
|
+.text
|
|
+
|
|
+.globl ossl_rsaz_amm52x20_x1_avxifma256
|
|
+.type ossl_rsaz_amm52x20_x1_avxifma256,@function
|
|
+.align 32
|
|
+ossl_rsaz_amm52x20_x1_avxifma256:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ pushq %rbx
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbx,-16
|
|
+ pushq %rbp
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbp,-24
|
|
+ pushq %r12
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r12,-32
|
|
+ pushq %r13
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r13,-40
|
|
+ pushq %r14
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r14,-48
|
|
+ pushq %r15
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r15,-56
|
|
+.Lossl_rsaz_amm52x20_x1_avxifma256_body:
|
|
+
|
|
+
|
|
+ vpxor %ymm0,%ymm0,%ymm0
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+ vmovapd %ymm0,%ymm8
|
|
+
|
|
+ xorl %r9d,%r9d
|
|
+
|
|
+ movq %rdx,%r11
|
|
+ movq $0xfffffffffffff,%rax
|
|
+
|
|
+
|
|
+ movl $5,%ebx
|
|
+
|
|
+.align 32
|
|
+.Lloop5:
|
|
+ movq 0(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 0(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -168(%rsp),%rsp
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm5,32(%rsp)
|
|
+ vmovdqu %ymm6,64(%rsp)
|
|
+ vmovdqu %ymm7,96(%rsp)
|
|
+ vmovdqu %ymm8,128(%rsp)
|
|
+ movq $0,160(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm5
|
|
+ vmovdqu 72(%rsp),%ymm6
|
|
+ vmovdqu 104(%rsp),%ymm7
|
|
+ vmovdqu 136(%rsp),%ymm8
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8
|
|
+ leaq 168(%rsp),%rsp
|
|
+ movq 8(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 8(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -168(%rsp),%rsp
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm5,32(%rsp)
|
|
+ vmovdqu %ymm6,64(%rsp)
|
|
+ vmovdqu %ymm7,96(%rsp)
|
|
+ vmovdqu %ymm8,128(%rsp)
|
|
+ movq $0,160(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm5
|
|
+ vmovdqu 72(%rsp),%ymm6
|
|
+ vmovdqu 104(%rsp),%ymm7
|
|
+ vmovdqu 136(%rsp),%ymm8
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8
|
|
+ leaq 168(%rsp),%rsp
|
|
+ movq 16(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 16(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -168(%rsp),%rsp
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm5,32(%rsp)
|
|
+ vmovdqu %ymm6,64(%rsp)
|
|
+ vmovdqu %ymm7,96(%rsp)
|
|
+ vmovdqu %ymm8,128(%rsp)
|
|
+ movq $0,160(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm5
|
|
+ vmovdqu 72(%rsp),%ymm6
|
|
+ vmovdqu 104(%rsp),%ymm7
|
|
+ vmovdqu 136(%rsp),%ymm8
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8
|
|
+ leaq 168(%rsp),%rsp
|
|
+ movq 24(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 24(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -168(%rsp),%rsp
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm5,32(%rsp)
|
|
+ vmovdqu %ymm6,64(%rsp)
|
|
+ vmovdqu %ymm7,96(%rsp)
|
|
+ vmovdqu %ymm8,128(%rsp)
|
|
+ movq $0,160(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm5
|
|
+ vmovdqu 72(%rsp),%ymm6
|
|
+ vmovdqu 104(%rsp),%ymm7
|
|
+ vmovdqu 136(%rsp),%ymm8
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8
|
|
+ leaq 168(%rsp),%rsp
|
|
+ leaq 32(%r11),%r11
|
|
+ decl %ebx
|
|
+ jne .Lloop5
|
|
+
|
|
+ vmovq %r9,%xmm0
|
|
+ vpbroadcastq %xmm0,%ymm0
|
|
+ vpblendd $3,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq $52,%ymm3,%ymm0
|
|
+ vpsrlq $52,%ymm5,%ymm1
|
|
+ vpsrlq $52,%ymm6,%ymm2
|
|
+ vpsrlq $52,%ymm7,%ymm13
|
|
+ vpsrlq $52,%ymm8,%ymm14
|
|
+
|
|
+
|
|
+ vpermq $144,%ymm14,%ymm14
|
|
+ vpermq $3,%ymm13,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm14,%ymm14
|
|
+
|
|
+ vpermq $144,%ymm13,%ymm13
|
|
+ vpermq $3,%ymm2,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm13,%ymm13
|
|
+
|
|
+ vpermq $144,%ymm2,%ymm2
|
|
+ vpermq $3,%ymm1,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm2,%ymm2
|
|
+
|
|
+ vpermq $144,%ymm1,%ymm1
|
|
+ vpermq $3,%ymm0,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm1,%ymm1
|
|
+
|
|
+ vpermq $144,%ymm0,%ymm0
|
|
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+
|
|
+
|
|
+ vpaddq %ymm0,%ymm3,%ymm3
|
|
+ vpaddq %ymm1,%ymm5,%ymm5
|
|
+ vpaddq %ymm2,%ymm6,%ymm6
|
|
+ vpaddq %ymm13,%ymm7,%ymm7
|
|
+ vpaddq %ymm14,%ymm8,%ymm8
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm1
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm2
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm14
|
|
+ vmovmskpd %ymm0,%r14d
|
|
+ vmovmskpd %ymm1,%r13d
|
|
+ vmovmskpd %ymm2,%r12d
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ vmovmskpd %ymm14,%r10d
|
|
+
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm1
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm2
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm14
|
|
+ vmovmskpd %ymm0,%r9d
|
|
+ vmovmskpd %ymm1,%r8d
|
|
+ vmovmskpd %ymm2,%ebx
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ vmovmskpd %ymm14,%edx
|
|
+
|
|
+
|
|
+
|
|
+ shlb $4,%r13b
|
|
+ orb %r13b,%r14b
|
|
+ shlb $4,%r11b
|
|
+ orb %r11b,%r12b
|
|
+
|
|
+ addb %r14b,%r14b
|
|
+ adcb %r12b,%r12b
|
|
+ adcb %r10b,%r10b
|
|
+
|
|
+ shlb $4,%r8b
|
|
+ orb %r8b,%r9b
|
|
+ shlb $4,%cl
|
|
+ orb %cl,%bl
|
|
+
|
|
+ addb %r9b,%r14b
|
|
+ adcb %bl,%r12b
|
|
+ adcb %dl,%r10b
|
|
+
|
|
+ xorb %r9b,%r14b
|
|
+ xorb %bl,%r12b
|
|
+ xorb %dl,%r10b
|
|
+
|
|
+ leaq .Lkmasklut(%rip),%rdx
|
|
+
|
|
+ movb %r14b,%r13b
|
|
+ andq $0xf,%r14
|
|
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ shlq $5,%r14
|
|
+ vmovapd (%rdx,%r14), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+ shrb $4,%r13b
|
|
+ andq $0xf,%r13
|
|
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm0
|
|
+ shlq $5,%r13
|
|
+ vmovapd (%rdx,%r13), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm5,%ymm5
|
|
+
|
|
+ movb %r12b,%r11b
|
|
+ andq $0xf,%r12
|
|
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm0
|
|
+ shlq $5,%r12
|
|
+ vmovapd (%rdx,%r12), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm6,%ymm6
|
|
+
|
|
+ shrb $4,%r11b
|
|
+ andq $0xf,%r11
|
|
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm0
|
|
+ shlq $5,%r11
|
|
+ vmovapd (%rdx,%r11), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm7,%ymm7
|
|
+
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm8,%ymm8
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+
|
|
+ vmovdqu %ymm3,0(%rdi)
|
|
+ vmovdqu %ymm5,32(%rdi)
|
|
+ vmovdqu %ymm6,64(%rdi)
|
|
+ vmovdqu %ymm7,96(%rdi)
|
|
+ vmovdqu %ymm8,128(%rdi)
|
|
+
|
|
+ vzeroupper
|
|
+ movq 0(%rsp),%r15
|
|
+.cfi_restore %r15
|
|
+ movq 8(%rsp),%r14
|
|
+.cfi_restore %r14
|
|
+ movq 16(%rsp),%r13
|
|
+.cfi_restore %r13
|
|
+ movq 24(%rsp),%r12
|
|
+.cfi_restore %r12
|
|
+ movq 32(%rsp),%rbp
|
|
+.cfi_restore %rbp
|
|
+ movq 40(%rsp),%rbx
|
|
+.cfi_restore %rbx
|
|
+ leaq 48(%rsp),%rsp
|
|
+.cfi_adjust_cfa_offset -48
|
|
+.Lossl_rsaz_amm52x20_x1_avxifma256_epilogue:
|
|
+ .byte 0xf3,0xc3
|
|
+.cfi_endproc
|
|
+.size ossl_rsaz_amm52x20_x1_avxifma256, .-ossl_rsaz_amm52x20_x1_avxifma256
|
|
+.section .rodata
|
|
+.align 32
|
|
+.Lmask52x4:
|
|
+.quad 0xfffffffffffff
|
|
+.quad 0xfffffffffffff
|
|
+.quad 0xfffffffffffff
|
|
+.quad 0xfffffffffffff
|
|
+.Lhigh64x3:
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.Lkmasklut:
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.text
|
|
+
|
|
+.globl ossl_rsaz_amm52x20_x2_avxifma256
|
|
+.type ossl_rsaz_amm52x20_x2_avxifma256,@function
|
|
+.align 32
|
|
+ossl_rsaz_amm52x20_x2_avxifma256:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ pushq %rbx
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbx,-16
|
|
+ pushq %rbp
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbp,-24
|
|
+ pushq %r12
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r12,-32
|
|
+ pushq %r13
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r13,-40
|
|
+ pushq %r14
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r14,-48
|
|
+ pushq %r15
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r15,-56
|
|
+.Lossl_rsaz_amm52x20_x2_avxifma256_body:
|
|
+
|
|
+
|
|
+ vpxor %ymm0,%ymm0,%ymm0
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+ vmovapd %ymm0,%ymm8
|
|
+ vmovapd %ymm0,%ymm4
|
|
+ vmovapd %ymm0,%ymm9
|
|
+ vmovapd %ymm0,%ymm10
|
|
+ vmovapd %ymm0,%ymm11
|
|
+ vmovapd %ymm0,%ymm12
|
|
+
|
|
+ xorl %r9d,%r9d
|
|
+ xorl %r15d,%r15d
|
|
+
|
|
+ movq %rdx,%r11
|
|
+ movq $0xfffffffffffff,%rax
|
|
+
|
|
+ movl $20,%ebx
|
|
+
|
|
+.align 32
|
|
+.Lloop20:
|
|
+ movq 0(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 0(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq (%r8),%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -168(%rsp),%rsp
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm5,32(%rsp)
|
|
+ vmovdqu %ymm6,64(%rsp)
|
|
+ vmovdqu %ymm7,96(%rsp)
|
|
+ vmovdqu %ymm8,128(%rsp)
|
|
+ movq $0,160(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm5
|
|
+ vmovdqu 72(%rsp),%ymm6
|
|
+ vmovdqu 104(%rsp),%ymm7
|
|
+ vmovdqu 136(%rsp),%ymm8
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8
|
|
+ leaq 168(%rsp),%rsp
|
|
+ movq 160(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 160(%r11),%ymm1
|
|
+ movq 160(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r15
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq 8(%r8),%r13
|
|
+ imulq %r15,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 160(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r15
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r15
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r15
|
|
+
|
|
+ leaq -168(%rsp),%rsp
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+
|
|
+
|
|
+ vmovdqu %ymm4,0(%rsp)
|
|
+ vmovdqu %ymm9,32(%rsp)
|
|
+ vmovdqu %ymm10,64(%rsp)
|
|
+ vmovdqu %ymm11,96(%rsp)
|
|
+ vmovdqu %ymm12,128(%rsp)
|
|
+ movq $0,160(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm4
|
|
+ vmovdqu 40(%rsp),%ymm9
|
|
+ vmovdqu 72(%rsp),%ymm10
|
|
+ vmovdqu 104(%rsp),%ymm11
|
|
+ vmovdqu 136(%rsp),%ymm12
|
|
+
|
|
+ addq 8(%rsp),%r15
|
|
+
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ leaq 168(%rsp),%rsp
|
|
+ leaq 8(%r11),%r11
|
|
+ decl %ebx
|
|
+ jne .Lloop20
|
|
+
|
|
+ vmovq %r9,%xmm0
|
|
+ vpbroadcastq %xmm0,%ymm0
|
|
+ vpblendd $3,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq $52,%ymm3,%ymm0
|
|
+ vpsrlq $52,%ymm5,%ymm1
|
|
+ vpsrlq $52,%ymm6,%ymm2
|
|
+ vpsrlq $52,%ymm7,%ymm13
|
|
+ vpsrlq $52,%ymm8,%ymm14
|
|
+
|
|
+
|
|
+ vpermq $144,%ymm14,%ymm14
|
|
+ vpermq $3,%ymm13,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm14,%ymm14
|
|
+
|
|
+ vpermq $144,%ymm13,%ymm13
|
|
+ vpermq $3,%ymm2,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm13,%ymm13
|
|
+
|
|
+ vpermq $144,%ymm2,%ymm2
|
|
+ vpermq $3,%ymm1,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm2,%ymm2
|
|
+
|
|
+ vpermq $144,%ymm1,%ymm1
|
|
+ vpermq $3,%ymm0,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm1,%ymm1
|
|
+
|
|
+ vpermq $144,%ymm0,%ymm0
|
|
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+
|
|
+
|
|
+ vpaddq %ymm0,%ymm3,%ymm3
|
|
+ vpaddq %ymm1,%ymm5,%ymm5
|
|
+ vpaddq %ymm2,%ymm6,%ymm6
|
|
+ vpaddq %ymm13,%ymm7,%ymm7
|
|
+ vpaddq %ymm14,%ymm8,%ymm8
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm1
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm2
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm14
|
|
+ vmovmskpd %ymm0,%r14d
|
|
+ vmovmskpd %ymm1,%r13d
|
|
+ vmovmskpd %ymm2,%r12d
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ vmovmskpd %ymm14,%r10d
|
|
+
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm1
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm2
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm14
|
|
+ vmovmskpd %ymm0,%r9d
|
|
+ vmovmskpd %ymm1,%r8d
|
|
+ vmovmskpd %ymm2,%ebx
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ vmovmskpd %ymm14,%edx
|
|
+
|
|
+
|
|
+
|
|
+ shlb $4,%r13b
|
|
+ orb %r13b,%r14b
|
|
+ shlb $4,%r11b
|
|
+ orb %r11b,%r12b
|
|
+
|
|
+ addb %r14b,%r14b
|
|
+ adcb %r12b,%r12b
|
|
+ adcb %r10b,%r10b
|
|
+
|
|
+ shlb $4,%r8b
|
|
+ orb %r8b,%r9b
|
|
+ shlb $4,%cl
|
|
+ orb %cl,%bl
|
|
+
|
|
+ addb %r9b,%r14b
|
|
+ adcb %bl,%r12b
|
|
+ adcb %dl,%r10b
|
|
+
|
|
+ xorb %r9b,%r14b
|
|
+ xorb %bl,%r12b
|
|
+ xorb %dl,%r10b
|
|
+
|
|
+ leaq .Lkmasklut(%rip),%rdx
|
|
+
|
|
+ movb %r14b,%r13b
|
|
+ andq $0xf,%r14
|
|
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ shlq $5,%r14
|
|
+ vmovapd (%rdx,%r14), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+ shrb $4,%r13b
|
|
+ andq $0xf,%r13
|
|
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm0
|
|
+ shlq $5,%r13
|
|
+ vmovapd (%rdx,%r13), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm5,%ymm5
|
|
+
|
|
+ movb %r12b,%r11b
|
|
+ andq $0xf,%r12
|
|
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm0
|
|
+ shlq $5,%r12
|
|
+ vmovapd (%rdx,%r12), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm6,%ymm6
|
|
+
|
|
+ shrb $4,%r11b
|
|
+ andq $0xf,%r11
|
|
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm0
|
|
+ shlq $5,%r11
|
|
+ vmovapd (%rdx,%r11), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm7,%ymm7
|
|
+
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm8,%ymm8
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+
|
|
+ vmovq %r15,%xmm0
|
|
+ vpbroadcastq %xmm0,%ymm0
|
|
+ vpblendd $3,%ymm0,%ymm4,%ymm4
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq $52,%ymm4,%ymm0
|
|
+ vpsrlq $52,%ymm9,%ymm1
|
|
+ vpsrlq $52,%ymm10,%ymm2
|
|
+ vpsrlq $52,%ymm11,%ymm13
|
|
+ vpsrlq $52,%ymm12,%ymm14
|
|
+
|
|
+
|
|
+ vpermq $144,%ymm14,%ymm14
|
|
+ vpermq $3,%ymm13,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm14,%ymm14
|
|
+
|
|
+ vpermq $144,%ymm13,%ymm13
|
|
+ vpermq $3,%ymm2,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm13,%ymm13
|
|
+
|
|
+ vpermq $144,%ymm2,%ymm2
|
|
+ vpermq $3,%ymm1,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm2,%ymm2
|
|
+
|
|
+ vpermq $144,%ymm1,%ymm1
|
|
+ vpermq $3,%ymm0,%ymm15
|
|
+ vblendpd $1,%ymm15,%ymm1,%ymm1
|
|
+
|
|
+ vpermq $144,%ymm0,%ymm0
|
|
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
|
|
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
|
|
+
|
|
+
|
|
+ vpaddq %ymm0,%ymm4,%ymm4
|
|
+ vpaddq %ymm1,%ymm9,%ymm9
|
|
+ vpaddq %ymm2,%ymm10,%ymm10
|
|
+ vpaddq %ymm13,%ymm11,%ymm11
|
|
+ vpaddq %ymm14,%ymm12,%ymm12
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm0
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm1
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm2
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm14
|
|
+ vmovmskpd %ymm0,%r14d
|
|
+ vmovmskpd %ymm1,%r13d
|
|
+ vmovmskpd %ymm2,%r12d
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ vmovmskpd %ymm14,%r10d
|
|
+
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm0
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm1
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm2
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm14
|
|
+ vmovmskpd %ymm0,%r9d
|
|
+ vmovmskpd %ymm1,%r8d
|
|
+ vmovmskpd %ymm2,%ebx
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ vmovmskpd %ymm14,%edx
|
|
+
|
|
+
|
|
+
|
|
+ shlb $4,%r13b
|
|
+ orb %r13b,%r14b
|
|
+ shlb $4,%r11b
|
|
+ orb %r11b,%r12b
|
|
+
|
|
+ addb %r14b,%r14b
|
|
+ adcb %r12b,%r12b
|
|
+ adcb %r10b,%r10b
|
|
+
|
|
+ shlb $4,%r8b
|
|
+ orb %r8b,%r9b
|
|
+ shlb $4,%cl
|
|
+ orb %cl,%bl
|
|
+
|
|
+ addb %r9b,%r14b
|
|
+ adcb %bl,%r12b
|
|
+ adcb %dl,%r10b
|
|
+
|
|
+ xorb %r9b,%r14b
|
|
+ xorb %bl,%r12b
|
|
+ xorb %dl,%r10b
|
|
+
|
|
+ leaq .Lkmasklut(%rip),%rdx
|
|
+
|
|
+ movb %r14b,%r13b
|
|
+ andq $0xf,%r14
|
|
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm0
|
|
+ shlq $5,%r14
|
|
+ vmovapd (%rdx,%r14), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm4,%ymm4
|
|
+
|
|
+ shrb $4,%r13b
|
|
+ andq $0xf,%r13
|
|
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm0
|
|
+ shlq $5,%r13
|
|
+ vmovapd (%rdx,%r13), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm9,%ymm9
|
|
+
|
|
+ movb %r12b,%r11b
|
|
+ andq $0xf,%r12
|
|
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm0
|
|
+ shlq $5,%r12
|
|
+ vmovapd (%rdx,%r12), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm10,%ymm10
|
|
+
|
|
+ shrb $4,%r11b
|
|
+ andq $0xf,%r11
|
|
+ vpsubq .Lmask52x4(%rip),%ymm11,%ymm0
|
|
+ shlq $5,%r11
|
|
+ vmovapd (%rdx,%r11), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm11,%ymm11
|
|
+
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm12,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm12,%ymm12
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
|
|
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
|
|
+
|
|
+ vmovdqu %ymm3,0(%rdi)
|
|
+ vmovdqu %ymm5,32(%rdi)
|
|
+ vmovdqu %ymm6,64(%rdi)
|
|
+ vmovdqu %ymm7,96(%rdi)
|
|
+ vmovdqu %ymm8,128(%rdi)
|
|
+
|
|
+ vmovdqu %ymm4,160(%rdi)
|
|
+ vmovdqu %ymm9,192(%rdi)
|
|
+ vmovdqu %ymm10,224(%rdi)
|
|
+ vmovdqu %ymm11,256(%rdi)
|
|
+ vmovdqu %ymm12,288(%rdi)
|
|
+
|
|
+ vzeroupper
|
|
+ movq 0(%rsp),%r15
|
|
+.cfi_restore %r15
|
|
+ movq 8(%rsp),%r14
|
|
+.cfi_restore %r14
|
|
+ movq 16(%rsp),%r13
|
|
+.cfi_restore %r13
|
|
+ movq 24(%rsp),%r12
|
|
+.cfi_restore %r12
|
|
+ movq 32(%rsp),%rbp
|
|
+.cfi_restore %rbp
|
|
+ movq 40(%rsp),%rbx
|
|
+.cfi_restore %rbx
|
|
+ leaq 48(%rsp),%rsp
|
|
+.cfi_adjust_cfa_offset -48
|
|
+.Lossl_rsaz_amm52x20_x2_avxifma256_epilogue:
|
|
+ .byte 0xf3,0xc3
|
|
+.cfi_endproc
|
|
+.size ossl_rsaz_amm52x20_x2_avxifma256, .-ossl_rsaz_amm52x20_x2_avxifma256
|
|
+.text
|
|
+
|
|
+.align 32
|
|
+.globl ossl_extract_multiplier_2x20_win5_avx
|
|
+.type ossl_extract_multiplier_2x20_win5_avx,@function
|
|
+ossl_extract_multiplier_2x20_win5_avx:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ vmovapd .Lones(%rip),%ymm14
|
|
+ vmovq %rdx,%xmm10
|
|
+ vpbroadcastq %xmm10,%ymm12
|
|
+ vmovq %rcx,%xmm10
|
|
+ vpbroadcastq %xmm10,%ymm13
|
|
+ leaq 10240(%rsi),%rax
|
|
+
|
|
+
|
|
+ vpxor %xmm0,%xmm0,%xmm0
|
|
+ vmovapd %ymm0,%ymm11
|
|
+ vmovapd %ymm0,%ymm1
|
|
+ vmovapd %ymm0,%ymm2
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm4
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+ vmovapd %ymm0,%ymm8
|
|
+ vmovapd %ymm0,%ymm9
|
|
+
|
|
+.align 32
|
|
+.Lloop:
|
|
+ vpcmpeqq %ymm11,%ymm12,%ymm15
|
|
+ vmovdqu 0(%rsi),%ymm10
|
|
+ vblendvpd %ymm15,%ymm10,%ymm0,%ymm0
|
|
+ vmovdqu 32(%rsi),%ymm10
|
|
+ vblendvpd %ymm15,%ymm10,%ymm1,%ymm1
|
|
+ vmovdqu 64(%rsi),%ymm10
|
|
+ vblendvpd %ymm15,%ymm10,%ymm2,%ymm2
|
|
+ vmovdqu 96(%rsi),%ymm10
|
|
+ vblendvpd %ymm15,%ymm10,%ymm3,%ymm3
|
|
+ vmovdqu 128(%rsi),%ymm10
|
|
+ vblendvpd %ymm15,%ymm10,%ymm4,%ymm4
|
|
+ vpcmpeqq %ymm11,%ymm13,%ymm15
|
|
+ vmovdqu 160(%rsi),%ymm10
|
|
+ vblendvpd %ymm15,%ymm10,%ymm5,%ymm5
|
|
+ vmovdqu 192(%rsi),%ymm10
|
|
+ vblendvpd %ymm15,%ymm10,%ymm6,%ymm6
|
|
+ vmovdqu 224(%rsi),%ymm10
|
|
+ vblendvpd %ymm15,%ymm10,%ymm7,%ymm7
|
|
+ vmovdqu 256(%rsi),%ymm10
|
|
+ vblendvpd %ymm15,%ymm10,%ymm8,%ymm8
|
|
+ vmovdqu 288(%rsi),%ymm10
|
|
+ vblendvpd %ymm15,%ymm10,%ymm9,%ymm9
|
|
+ vpaddq %ymm14,%ymm11,%ymm11
|
|
+ addq $320,%rsi
|
|
+ cmpq %rsi,%rax
|
|
+ jne .Lloop
|
|
+ vmovdqu %ymm0,0(%rdi)
|
|
+ vmovdqu %ymm1,32(%rdi)
|
|
+ vmovdqu %ymm2,64(%rdi)
|
|
+ vmovdqu %ymm3,96(%rdi)
|
|
+ vmovdqu %ymm4,128(%rdi)
|
|
+ vmovdqu %ymm5,160(%rdi)
|
|
+ vmovdqu %ymm6,192(%rdi)
|
|
+ vmovdqu %ymm7,224(%rdi)
|
|
+ vmovdqu %ymm8,256(%rdi)
|
|
+ vmovdqu %ymm9,288(%rdi)
|
|
+ .byte 0xf3,0xc3
|
|
+.cfi_endproc
|
|
+.size ossl_extract_multiplier_2x20_win5_avx, .-ossl_extract_multiplier_2x20_win5_avx
|
|
+.section .rodata
|
|
+.align 32
|
|
+.Lones:
|
|
+.quad 1,1,1,1
|
|
+.Lzeros:
|
|
+.quad 0,0,0,0
|
|
+ .section ".note.gnu.property", "a"
|
|
+ .p2align 3
|
|
+ .long 1f - 0f
|
|
+ .long 4f - 1f
|
|
+ .long 5
|
|
+0:
|
|
+ # "GNU" encoded with .byte, since .asciz isn't supported
|
|
+ # on Solaris.
|
|
+ .byte 0x47
|
|
+ .byte 0x4e
|
|
+ .byte 0x55
|
|
+ .byte 0
|
|
+1:
|
|
+ .p2align 3
|
|
+ .long 0xc0000002
|
|
+ .long 3f - 2f
|
|
+2:
|
|
+ .long 3
|
|
+3:
|
|
+ .p2align 3
|
|
+4:
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s
|
|
new file mode 100644
|
|
index 0000000000..1e07eb24d6
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s
|
|
@@ -0,0 +1,1768 @@
|
|
+.text
|
|
+
|
|
+.globl ossl_rsaz_amm52x30_x1_avxifma256
|
|
+.type ossl_rsaz_amm52x30_x1_avxifma256,@function
|
|
+.align 32
|
|
+ossl_rsaz_amm52x30_x1_avxifma256:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ pushq %rbx
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbx,-16
|
|
+ pushq %rbp
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbp,-24
|
|
+ pushq %r12
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r12,-32
|
|
+ pushq %r13
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r13,-40
|
|
+ pushq %r14
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r14,-48
|
|
+ pushq %r15
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r15,-56
|
|
+
|
|
+ vpxor %ymm0,%ymm0,%ymm0
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm4
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+ vmovapd %ymm0,%ymm8
|
|
+ vmovapd %ymm0,%ymm9
|
|
+ vmovapd %ymm0,%ymm10
|
|
+
|
|
+ xorl %r9d,%r9d
|
|
+
|
|
+ movq %rdx,%r11
|
|
+ movq $0xfffffffffffff,%rax
|
|
+
|
|
+
|
|
+ movl $7,%ebx
|
|
+
|
|
+.align 32
|
|
+.Lloop7:
|
|
+ movq 0(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 0(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -264(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ movq $0,256(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ leaq 264(%rsp),%rsp
|
|
+ movq 8(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 8(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -264(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ movq $0,256(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ leaq 264(%rsp),%rsp
|
|
+ movq 16(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 16(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -264(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ movq $0,256(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ leaq 264(%rsp),%rsp
|
|
+ movq 24(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 24(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -264(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ movq $0,256(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ leaq 264(%rsp),%rsp
|
|
+ leaq 32(%r11),%r11
|
|
+ decl %ebx
|
|
+ jne .Lloop7
|
|
+ movq 0(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 0(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -264(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ movq $0,256(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ leaq 264(%rsp),%rsp
|
|
+ movq 8(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 8(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -264(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ movq $0,256(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ leaq 264(%rsp),%rsp
|
|
+
|
|
+ vmovq %r9,%xmm0
|
|
+ vpbroadcastq %xmm0,%ymm0
|
|
+ vpblendd $3,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq $52,%ymm3,%ymm0
|
|
+ vpsrlq $52,%ymm4,%ymm1
|
|
+ vpsrlq $52,%ymm5,%ymm2
|
|
+ vpsrlq $52,%ymm6,%ymm11
|
|
+ vpsrlq $52,%ymm7,%ymm12
|
|
+ vpsrlq $52,%ymm8,%ymm13
|
|
+ vpsrlq $52,%ymm9,%ymm14
|
|
+ vpsrlq $52,%ymm10,%ymm15
|
|
+
|
|
+ leaq -32(%rsp),%rsp
|
|
+ vmovupd %ymm3,(%rsp)
|
|
+
|
|
+
|
|
+ vpermq $144,%ymm15,%ymm15
|
|
+ vpermq $3,%ymm14,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm15,%ymm15
|
|
+
|
|
+ vpermq $144,%ymm14,%ymm14
|
|
+ vpermq $3,%ymm13,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm14,%ymm14
|
|
+
|
|
+ vpermq $144,%ymm13,%ymm13
|
|
+ vpermq $3,%ymm12,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm13,%ymm13
|
|
+
|
|
+ vpermq $144,%ymm12,%ymm12
|
|
+ vpermq $3,%ymm11,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm12,%ymm12
|
|
+
|
|
+ vpermq $144,%ymm11,%ymm11
|
|
+ vpermq $3,%ymm2,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm11,%ymm11
|
|
+
|
|
+ vpermq $144,%ymm2,%ymm2
|
|
+ vpermq $3,%ymm1,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm2,%ymm2
|
|
+
|
|
+ vpermq $144,%ymm1,%ymm1
|
|
+ vpermq $3,%ymm0,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm1,%ymm1
|
|
+
|
|
+ vpermq $144,%ymm0,%ymm0
|
|
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
|
|
+
|
|
+ vmovupd (%rsp),%ymm3
|
|
+ leaq 32(%rsp),%rsp
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+
|
|
+
|
|
+ vpaddq %ymm0,%ymm3,%ymm3
|
|
+ vpaddq %ymm1,%ymm4,%ymm4
|
|
+ vpaddq %ymm2,%ymm5,%ymm5
|
|
+ vpaddq %ymm11,%ymm6,%ymm6
|
|
+ vpaddq %ymm12,%ymm7,%ymm7
|
|
+ vpaddq %ymm13,%ymm8,%ymm8
|
|
+ vpaddq %ymm14,%ymm9,%ymm9
|
|
+ vpaddq %ymm15,%ymm10,%ymm10
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1
|
|
+ vmovmskpd %ymm0,%r14d
|
|
+ vmovmskpd %ymm1,%r13d
|
|
+ shlb $4,%r13b
|
|
+ orb %r13b,%r14b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11
|
|
+ vmovmskpd %ymm2,%r13d
|
|
+ vmovmskpd %ymm11,%r12d
|
|
+ shlb $4,%r12b
|
|
+ orb %r12b,%r13b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm12,%r12d
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ shlb $4,%r11b
|
|
+ orb %r11b,%r12b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15
|
|
+ vmovmskpd %ymm14,%r11d
|
|
+ vmovmskpd %ymm15,%r10d
|
|
+ shlb $4,%r10b
|
|
+ orb %r10b,%r11b
|
|
+
|
|
+ addb %r14b,%r14b
|
|
+ adcb %r13b,%r13b
|
|
+ adcb %r12b,%r12b
|
|
+ adcb %r11b,%r11b
|
|
+
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1
|
|
+ vmovmskpd %ymm0,%r9d
|
|
+ vmovmskpd %ymm1,%r8d
|
|
+ shlb $4,%r8b
|
|
+ orb %r8b,%r9b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11
|
|
+ vmovmskpd %ymm2,%r8d
|
|
+ vmovmskpd %ymm11,%edx
|
|
+ shlb $4,%dl
|
|
+ orb %dl,%r8b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm12,%edx
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ shlb $4,%cl
|
|
+ orb %cl,%dl
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15
|
|
+ vmovmskpd %ymm14,%ecx
|
|
+ vmovmskpd %ymm15,%ebx
|
|
+ shlb $4,%bl
|
|
+ orb %bl,%cl
|
|
+
|
|
+ addb %r9b,%r14b
|
|
+ adcb %r8b,%r13b
|
|
+ adcb %dl,%r12b
|
|
+ adcb %cl,%r11b
|
|
+
|
|
+ xorb %r9b,%r14b
|
|
+ xorb %r8b,%r13b
|
|
+ xorb %dl,%r12b
|
|
+ xorb %cl,%r11b
|
|
+
|
|
+ leaq .Lkmasklut(%rip),%rdx
|
|
+
|
|
+ movb %r14b,%r10b
|
|
+ andq $0xf,%r14
|
|
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ shlq $5,%r14
|
|
+ vmovapd (%rdx,%r14), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm4,%ymm4
|
|
+
|
|
+ movb %r13b,%r10b
|
|
+ andq $0xf,%r13
|
|
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm0
|
|
+ shlq $5,%r13
|
|
+ vmovapd (%rdx,%r13), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm5,%ymm5
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm6,%ymm6
|
|
+
|
|
+ movb %r12b,%r10b
|
|
+ andq $0xf,%r12
|
|
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm0
|
|
+ shlq $5,%r12
|
|
+ vmovapd (%rdx,%r12), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm7,%ymm7
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm8,%ymm8
|
|
+
|
|
+ movb %r11b,%r10b
|
|
+ andq $0xf,%r11
|
|
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm0
|
|
+ shlq $5,%r11
|
|
+ vmovapd (%rdx,%r11), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm9,%ymm9
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm10,%ymm10
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+
|
|
+ vmovdqu %ymm3,0(%rdi)
|
|
+ vmovdqu %ymm4,32(%rdi)
|
|
+ vmovdqu %ymm5,64(%rdi)
|
|
+ vmovdqu %ymm6,96(%rdi)
|
|
+ vmovdqu %ymm7,128(%rdi)
|
|
+ vmovdqu %ymm8,160(%rdi)
|
|
+ vmovdqu %ymm9,192(%rdi)
|
|
+ vmovdqu %ymm10,224(%rdi)
|
|
+
|
|
+ vzeroupper
|
|
+ leaq (%rsp),%rax
|
|
+.cfi_def_cfa_register %rax
|
|
+ movq 0(%rax),%r15
|
|
+.cfi_restore %r15
|
|
+ movq 8(%rax),%r14
|
|
+.cfi_restore %r14
|
|
+ movq 16(%rax),%r13
|
|
+.cfi_restore %r13
|
|
+ movq 24(%rax),%r12
|
|
+.cfi_restore %r12
|
|
+ movq 32(%rax),%rbp
|
|
+.cfi_restore %rbp
|
|
+ movq 40(%rax),%rbx
|
|
+.cfi_restore %rbx
|
|
+ leaq 48(%rax),%rsp
|
|
+.cfi_def_cfa %rsp,8
|
|
+.Lossl_rsaz_amm52x30_x1_avxifma256_epilogue:
|
|
+ .byte 0xf3,0xc3
|
|
+.cfi_endproc
|
|
+.size ossl_rsaz_amm52x30_x1_avxifma256, .-ossl_rsaz_amm52x30_x1_avxifma256
|
|
+.section .rodata
|
|
+.align 32
|
|
+.Lmask52x4:
|
|
+.quad 0xfffffffffffff
|
|
+.quad 0xfffffffffffff
|
|
+.quad 0xfffffffffffff
|
|
+.quad 0xfffffffffffff
|
|
+.Lhigh64x3:
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.Lkmasklut:
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.text
|
|
+
|
|
+.globl ossl_rsaz_amm52x30_x2_avxifma256
|
|
+.type ossl_rsaz_amm52x30_x2_avxifma256,@function
|
|
+.align 32
|
|
+ossl_rsaz_amm52x30_x2_avxifma256:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ pushq %rbx
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbx,-16
|
|
+ pushq %rbp
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbp,-24
|
|
+ pushq %r12
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r12,-32
|
|
+ pushq %r13
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r13,-40
|
|
+ pushq %r14
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r14,-48
|
|
+ pushq %r15
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r15,-56
|
|
+
|
|
+ vpxor %ymm0,%ymm0,%ymm0
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm4
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+ vmovapd %ymm0,%ymm8
|
|
+ vmovapd %ymm0,%ymm9
|
|
+ vmovapd %ymm0,%ymm10
|
|
+
|
|
+ xorl %r9d,%r9d
|
|
+
|
|
+ movq %rdx,%r11
|
|
+ movq $0xfffffffffffff,%rax
|
|
+
|
|
+ movl $30,%ebx
|
|
+
|
|
+.align 32
|
|
+.Lloop30:
|
|
+ movq 0(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 0(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq (%r8),%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -264(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ movq $0,256(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ leaq 264(%rsp),%rsp
|
|
+ leaq 8(%r11),%r11
|
|
+ decl %ebx
|
|
+ jne .Lloop30
|
|
+
|
|
+ pushq %r11
|
|
+ pushq %rsi
|
|
+ pushq %rcx
|
|
+ pushq %r8
|
|
+
|
|
+ vmovq %r9,%xmm0
|
|
+ vpbroadcastq %xmm0,%ymm0
|
|
+ vpblendd $3,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq $52,%ymm3,%ymm0
|
|
+ vpsrlq $52,%ymm4,%ymm1
|
|
+ vpsrlq $52,%ymm5,%ymm2
|
|
+ vpsrlq $52,%ymm6,%ymm11
|
|
+ vpsrlq $52,%ymm7,%ymm12
|
|
+ vpsrlq $52,%ymm8,%ymm13
|
|
+ vpsrlq $52,%ymm9,%ymm14
|
|
+ vpsrlq $52,%ymm10,%ymm15
|
|
+
|
|
+ leaq -32(%rsp),%rsp
|
|
+ vmovupd %ymm3,(%rsp)
|
|
+
|
|
+
|
|
+ vpermq $144,%ymm15,%ymm15
|
|
+ vpermq $3,%ymm14,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm15,%ymm15
|
|
+
|
|
+ vpermq $144,%ymm14,%ymm14
|
|
+ vpermq $3,%ymm13,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm14,%ymm14
|
|
+
|
|
+ vpermq $144,%ymm13,%ymm13
|
|
+ vpermq $3,%ymm12,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm13,%ymm13
|
|
+
|
|
+ vpermq $144,%ymm12,%ymm12
|
|
+ vpermq $3,%ymm11,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm12,%ymm12
|
|
+
|
|
+ vpermq $144,%ymm11,%ymm11
|
|
+ vpermq $3,%ymm2,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm11,%ymm11
|
|
+
|
|
+ vpermq $144,%ymm2,%ymm2
|
|
+ vpermq $3,%ymm1,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm2,%ymm2
|
|
+
|
|
+ vpermq $144,%ymm1,%ymm1
|
|
+ vpermq $3,%ymm0,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm1,%ymm1
|
|
+
|
|
+ vpermq $144,%ymm0,%ymm0
|
|
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
|
|
+
|
|
+ vmovupd (%rsp),%ymm3
|
|
+ leaq 32(%rsp),%rsp
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+
|
|
+
|
|
+ vpaddq %ymm0,%ymm3,%ymm3
|
|
+ vpaddq %ymm1,%ymm4,%ymm4
|
|
+ vpaddq %ymm2,%ymm5,%ymm5
|
|
+ vpaddq %ymm11,%ymm6,%ymm6
|
|
+ vpaddq %ymm12,%ymm7,%ymm7
|
|
+ vpaddq %ymm13,%ymm8,%ymm8
|
|
+ vpaddq %ymm14,%ymm9,%ymm9
|
|
+ vpaddq %ymm15,%ymm10,%ymm10
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1
|
|
+ vmovmskpd %ymm0,%r14d
|
|
+ vmovmskpd %ymm1,%r13d
|
|
+ shlb $4,%r13b
|
|
+ orb %r13b,%r14b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11
|
|
+ vmovmskpd %ymm2,%r13d
|
|
+ vmovmskpd %ymm11,%r12d
|
|
+ shlb $4,%r12b
|
|
+ orb %r12b,%r13b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm12,%r12d
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ shlb $4,%r11b
|
|
+ orb %r11b,%r12b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15
|
|
+ vmovmskpd %ymm14,%r11d
|
|
+ vmovmskpd %ymm15,%r10d
|
|
+ shlb $4,%r10b
|
|
+ orb %r10b,%r11b
|
|
+
|
|
+ addb %r14b,%r14b
|
|
+ adcb %r13b,%r13b
|
|
+ adcb %r12b,%r12b
|
|
+ adcb %r11b,%r11b
|
|
+
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1
|
|
+ vmovmskpd %ymm0,%r9d
|
|
+ vmovmskpd %ymm1,%r8d
|
|
+ shlb $4,%r8b
|
|
+ orb %r8b,%r9b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11
|
|
+ vmovmskpd %ymm2,%r8d
|
|
+ vmovmskpd %ymm11,%edx
|
|
+ shlb $4,%dl
|
|
+ orb %dl,%r8b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm12,%edx
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ shlb $4,%cl
|
|
+ orb %cl,%dl
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15
|
|
+ vmovmskpd %ymm14,%ecx
|
|
+ vmovmskpd %ymm15,%ebx
|
|
+ shlb $4,%bl
|
|
+ orb %bl,%cl
|
|
+
|
|
+ addb %r9b,%r14b
|
|
+ adcb %r8b,%r13b
|
|
+ adcb %dl,%r12b
|
|
+ adcb %cl,%r11b
|
|
+
|
|
+ xorb %r9b,%r14b
|
|
+ xorb %r8b,%r13b
|
|
+ xorb %dl,%r12b
|
|
+ xorb %cl,%r11b
|
|
+
|
|
+ leaq .Lkmasklut(%rip),%rdx
|
|
+
|
|
+ movb %r14b,%r10b
|
|
+ andq $0xf,%r14
|
|
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ shlq $5,%r14
|
|
+ vmovapd (%rdx,%r14), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm4,%ymm4
|
|
+
|
|
+ movb %r13b,%r10b
|
|
+ andq $0xf,%r13
|
|
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm0
|
|
+ shlq $5,%r13
|
|
+ vmovapd (%rdx,%r13), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm5,%ymm5
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm6,%ymm6
|
|
+
|
|
+ movb %r12b,%r10b
|
|
+ andq $0xf,%r12
|
|
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm0
|
|
+ shlq $5,%r12
|
|
+ vmovapd (%rdx,%r12), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm7,%ymm7
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm8,%ymm8
|
|
+
|
|
+ movb %r11b,%r10b
|
|
+ andq $0xf,%r11
|
|
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm0
|
|
+ shlq $5,%r11
|
|
+ vmovapd (%rdx,%r11), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm9,%ymm9
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm10,%ymm10
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+ popq %r8
|
|
+ popq %rcx
|
|
+ popq %rsi
|
|
+ popq %r11
|
|
+
|
|
+ vmovdqu %ymm3,0(%rdi)
|
|
+ vmovdqu %ymm4,32(%rdi)
|
|
+ vmovdqu %ymm5,64(%rdi)
|
|
+ vmovdqu %ymm6,96(%rdi)
|
|
+ vmovdqu %ymm7,128(%rdi)
|
|
+ vmovdqu %ymm8,160(%rdi)
|
|
+ vmovdqu %ymm9,192(%rdi)
|
|
+ vmovdqu %ymm10,224(%rdi)
|
|
+
|
|
+ xorl %r15d,%r15d
|
|
+
|
|
+ leaq 16(%r11),%r11
|
|
+ movq $0xfffffffffffff,%rax
|
|
+
|
|
+ movl $30,%ebx
|
|
+
|
|
+ vpxor %ymm0,%ymm0,%ymm0
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm4
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+ vmovapd %ymm0,%ymm8
|
|
+ vmovapd %ymm0,%ymm9
|
|
+ vmovapd %ymm0,%ymm10
|
|
+.align 32
|
|
+.Lloop40:
|
|
+ movq 0(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 0(%r11),%ymm1
|
|
+ movq 256(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq 8(%r8),%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 256(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -264(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 320(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 352(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 384(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 416(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 448(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 480(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 320(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 352(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 384(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 416(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 448(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 480(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ movq $0,256(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 320(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 352(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 384(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 416(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 448(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 480(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 320(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 352(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 384(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 416(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 448(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 480(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ leaq 264(%rsp),%rsp
|
|
+ leaq 8(%r11),%r11
|
|
+ decl %ebx
|
|
+ jne .Lloop40
|
|
+
|
|
+ vmovq %r9,%xmm0
|
|
+ vpbroadcastq %xmm0,%ymm0
|
|
+ vpblendd $3,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq $52,%ymm3,%ymm0
|
|
+ vpsrlq $52,%ymm4,%ymm1
|
|
+ vpsrlq $52,%ymm5,%ymm2
|
|
+ vpsrlq $52,%ymm6,%ymm11
|
|
+ vpsrlq $52,%ymm7,%ymm12
|
|
+ vpsrlq $52,%ymm8,%ymm13
|
|
+ vpsrlq $52,%ymm9,%ymm14
|
|
+ vpsrlq $52,%ymm10,%ymm15
|
|
+
|
|
+ leaq -32(%rsp),%rsp
|
|
+ vmovupd %ymm3,(%rsp)
|
|
+
|
|
+
|
|
+ vpermq $144,%ymm15,%ymm15
|
|
+ vpermq $3,%ymm14,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm15,%ymm15
|
|
+
|
|
+ vpermq $144,%ymm14,%ymm14
|
|
+ vpermq $3,%ymm13,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm14,%ymm14
|
|
+
|
|
+ vpermq $144,%ymm13,%ymm13
|
|
+ vpermq $3,%ymm12,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm13,%ymm13
|
|
+
|
|
+ vpermq $144,%ymm12,%ymm12
|
|
+ vpermq $3,%ymm11,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm12,%ymm12
|
|
+
|
|
+ vpermq $144,%ymm11,%ymm11
|
|
+ vpermq $3,%ymm2,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm11,%ymm11
|
|
+
|
|
+ vpermq $144,%ymm2,%ymm2
|
|
+ vpermq $3,%ymm1,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm2,%ymm2
|
|
+
|
|
+ vpermq $144,%ymm1,%ymm1
|
|
+ vpermq $3,%ymm0,%ymm3
|
|
+ vblendpd $1,%ymm3,%ymm1,%ymm1
|
|
+
|
|
+ vpermq $144,%ymm0,%ymm0
|
|
+ vpand .Lhigh64x3(%rip),%ymm0,%ymm0
|
|
+
|
|
+ vmovupd (%rsp),%ymm3
|
|
+ leaq 32(%rsp),%rsp
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+
|
|
+
|
|
+ vpaddq %ymm0,%ymm3,%ymm3
|
|
+ vpaddq %ymm1,%ymm4,%ymm4
|
|
+ vpaddq %ymm2,%ymm5,%ymm5
|
|
+ vpaddq %ymm11,%ymm6,%ymm6
|
|
+ vpaddq %ymm12,%ymm7,%ymm7
|
|
+ vpaddq %ymm13,%ymm8,%ymm8
|
|
+ vpaddq %ymm14,%ymm9,%ymm9
|
|
+ vpaddq %ymm15,%ymm10,%ymm10
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1
|
|
+ vmovmskpd %ymm0,%r14d
|
|
+ vmovmskpd %ymm1,%r13d
|
|
+ shlb $4,%r13b
|
|
+ orb %r13b,%r14b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11
|
|
+ vmovmskpd %ymm2,%r13d
|
|
+ vmovmskpd %ymm11,%r12d
|
|
+ shlb $4,%r12b
|
|
+ orb %r12b,%r13b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm12,%r12d
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ shlb $4,%r11b
|
|
+ orb %r11b,%r12b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15
|
|
+ vmovmskpd %ymm14,%r11d
|
|
+ vmovmskpd %ymm15,%r10d
|
|
+ shlb $4,%r10b
|
|
+ orb %r10b,%r11b
|
|
+
|
|
+ addb %r14b,%r14b
|
|
+ adcb %r13b,%r13b
|
|
+ adcb %r12b,%r12b
|
|
+ adcb %r11b,%r11b
|
|
+
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1
|
|
+ vmovmskpd %ymm0,%r9d
|
|
+ vmovmskpd %ymm1,%r8d
|
|
+ shlb $4,%r8b
|
|
+ orb %r8b,%r9b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11
|
|
+ vmovmskpd %ymm2,%r8d
|
|
+ vmovmskpd %ymm11,%edx
|
|
+ shlb $4,%dl
|
|
+ orb %dl,%r8b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm12,%edx
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ shlb $4,%cl
|
|
+ orb %cl,%dl
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15
|
|
+ vmovmskpd %ymm14,%ecx
|
|
+ vmovmskpd %ymm15,%ebx
|
|
+ shlb $4,%bl
|
|
+ orb %bl,%cl
|
|
+
|
|
+ addb %r9b,%r14b
|
|
+ adcb %r8b,%r13b
|
|
+ adcb %dl,%r12b
|
|
+ adcb %cl,%r11b
|
|
+
|
|
+ xorb %r9b,%r14b
|
|
+ xorb %r8b,%r13b
|
|
+ xorb %dl,%r12b
|
|
+ xorb %cl,%r11b
|
|
+
|
|
+ leaq .Lkmasklut(%rip),%rdx
|
|
+
|
|
+ movb %r14b,%r10b
|
|
+ andq $0xf,%r14
|
|
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm0
|
|
+ shlq $5,%r14
|
|
+ vmovapd (%rdx,%r14), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm4,%ymm4
|
|
+
|
|
+ movb %r13b,%r10b
|
|
+ andq $0xf,%r13
|
|
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm0
|
|
+ shlq $5,%r13
|
|
+ vmovapd (%rdx,%r13), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm5,%ymm5
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm6,%ymm6
|
|
+
|
|
+ movb %r12b,%r10b
|
|
+ andq $0xf,%r12
|
|
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm0
|
|
+ shlq $5,%r12
|
|
+ vmovapd (%rdx,%r12), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm7,%ymm7
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm8,%ymm8
|
|
+
|
|
+ movb %r11b,%r10b
|
|
+ andq $0xf,%r11
|
|
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm0
|
|
+ shlq $5,%r11
|
|
+ vmovapd (%rdx,%r11), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm9,%ymm9
|
|
+
|
|
+ shrb $4,%r10b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm0
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%rdx,%r10), %ymm2
|
|
+ vblendvpd %ymm2,%ymm0,%ymm10,%ymm10
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+
|
|
+ vmovdqu %ymm3,256(%rdi)
|
|
+ vmovdqu %ymm4,288(%rdi)
|
|
+ vmovdqu %ymm5,320(%rdi)
|
|
+ vmovdqu %ymm6,352(%rdi)
|
|
+ vmovdqu %ymm7,384(%rdi)
|
|
+ vmovdqu %ymm8,416(%rdi)
|
|
+ vmovdqu %ymm9,448(%rdi)
|
|
+ vmovdqu %ymm10,480(%rdi)
|
|
+
|
|
+ vzeroupper
|
|
+ leaq (%rsp),%rax
|
|
+.cfi_def_cfa_register %rax
|
|
+ movq 0(%rax),%r15
|
|
+.cfi_restore %r15
|
|
+ movq 8(%rax),%r14
|
|
+.cfi_restore %r14
|
|
+ movq 16(%rax),%r13
|
|
+.cfi_restore %r13
|
|
+ movq 24(%rax),%r12
|
|
+.cfi_restore %r12
|
|
+ movq 32(%rax),%rbp
|
|
+.cfi_restore %rbp
|
|
+ movq 40(%rax),%rbx
|
|
+.cfi_restore %rbx
|
|
+ leaq 48(%rax),%rsp
|
|
+.cfi_def_cfa %rsp,8
|
|
+.Lossl_rsaz_amm52x30_x2_avxifma256_epilogue:
|
|
+ .byte 0xf3,0xc3
|
|
+.cfi_endproc
|
|
+.size ossl_rsaz_amm52x30_x2_avxifma256, .-ossl_rsaz_amm52x30_x2_avxifma256
|
|
+.text
|
|
+
|
|
+.align 32
|
|
+.globl ossl_extract_multiplier_2x30_win5_avx
|
|
+.type ossl_extract_multiplier_2x30_win5_avx,@function
|
|
+ossl_extract_multiplier_2x30_win5_avx:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ vmovapd .Lones(%rip),%ymm12
|
|
+ vmovq %rdx,%xmm8
|
|
+ vpbroadcastq %xmm8,%ymm10
|
|
+ vmovq %rcx,%xmm8
|
|
+ vpbroadcastq %xmm8,%ymm11
|
|
+ leaq 16384(%rsi),%rax
|
|
+
|
|
+
|
|
+ vpxor %xmm0,%xmm0,%xmm0
|
|
+ vmovapd %ymm0,%ymm9
|
|
+ vmovapd %ymm0,%ymm1
|
|
+ vmovapd %ymm0,%ymm2
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm4
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+
|
|
+.align 32
|
|
+.Lloop:
|
|
+ vpcmpeqq %ymm9,%ymm10,%ymm13
|
|
+ vmovdqu 0(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm0,%ymm0
|
|
+ vmovdqu 32(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm1,%ymm1
|
|
+ vmovdqu 64(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm2,%ymm2
|
|
+ vmovdqu 96(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm3,%ymm3
|
|
+ vmovdqu 128(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm4,%ymm4
|
|
+ vmovdqu 160(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm5,%ymm5
|
|
+ vmovdqu 192(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm6,%ymm6
|
|
+ vmovdqu 224(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm7,%ymm7
|
|
+ vpaddq %ymm12,%ymm9,%ymm9
|
|
+ addq $512,%rsi
|
|
+ cmpq %rsi,%rax
|
|
+ jne .Lloop
|
|
+ vmovdqu %ymm0,0(%rdi)
|
|
+ vmovdqu %ymm1,32(%rdi)
|
|
+ vmovdqu %ymm2,64(%rdi)
|
|
+ vmovdqu %ymm3,96(%rdi)
|
|
+ vmovdqu %ymm4,128(%rdi)
|
|
+ vmovdqu %ymm5,160(%rdi)
|
|
+ vmovdqu %ymm6,192(%rdi)
|
|
+ vmovdqu %ymm7,224(%rdi)
|
|
+ leaq -16384(%rax),%rsi
|
|
+
|
|
+
|
|
+ vpxor %xmm0,%xmm0,%xmm0
|
|
+ vmovapd %ymm0,%ymm9
|
|
+ vmovapd %ymm0,%ymm0
|
|
+ vmovapd %ymm0,%ymm1
|
|
+ vmovapd %ymm0,%ymm2
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm4
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+
|
|
+.align 32
|
|
+.Lloop_8_15:
|
|
+ vpcmpeqq %ymm9,%ymm11,%ymm13
|
|
+ vmovdqu 256(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm0,%ymm0
|
|
+ vmovdqu 288(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm1,%ymm1
|
|
+ vmovdqu 320(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm2,%ymm2
|
|
+ vmovdqu 352(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm3,%ymm3
|
|
+ vmovdqu 384(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm4,%ymm4
|
|
+ vmovdqu 416(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm5,%ymm5
|
|
+ vmovdqu 448(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm6,%ymm6
|
|
+ vmovdqu 480(%rsi),%ymm8
|
|
+
|
|
+ vblendvpd %ymm13,%ymm8,%ymm7,%ymm7
|
|
+ vpaddq %ymm12,%ymm9,%ymm9
|
|
+ addq $512,%rsi
|
|
+ cmpq %rsi,%rax
|
|
+ jne .Lloop_8_15
|
|
+ vmovdqu %ymm0,256(%rdi)
|
|
+ vmovdqu %ymm1,288(%rdi)
|
|
+ vmovdqu %ymm2,320(%rdi)
|
|
+ vmovdqu %ymm3,352(%rdi)
|
|
+ vmovdqu %ymm4,384(%rdi)
|
|
+ vmovdqu %ymm5,416(%rdi)
|
|
+ vmovdqu %ymm6,448(%rdi)
|
|
+ vmovdqu %ymm7,480(%rdi)
|
|
+
|
|
+ .byte 0xf3,0xc3
|
|
+.cfi_endproc
|
|
+.size ossl_extract_multiplier_2x30_win5_avx, .-ossl_extract_multiplier_2x30_win5_avx
|
|
+.section .rodata
|
|
+.align 32
|
|
+.Lones:
|
|
+.quad 1,1,1,1
|
|
+.Lzeros:
|
|
+.quad 0,0,0,0
|
|
+ .section ".note.gnu.property", "a"
|
|
+ .p2align 3
|
|
+ .long 1f - 0f
|
|
+ .long 4f - 1f
|
|
+ .long 5
|
|
+0:
|
|
+ # "GNU" encoded with .byte, since .asciz isn't supported
|
|
+ # on Solaris.
|
|
+ .byte 0x47
|
|
+ .byte 0x4e
|
|
+ .byte 0x55
|
|
+ .byte 0
|
|
+1:
|
|
+ .p2align 3
|
|
+ .long 0xc0000002
|
|
+ .long 3f - 2f
|
|
+2:
|
|
+ .long 3
|
|
+3:
|
|
+ .p2align 3
|
|
+4:
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s
|
|
new file mode 100644
|
|
index 0000000000..08a6243d77
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s
|
|
@@ -0,0 +1,1922 @@
|
|
+.text
|
|
+
|
|
+.globl ossl_rsaz_amm52x40_x1_avxifma256
|
|
+.type ossl_rsaz_amm52x40_x1_avxifma256,@function
|
|
+.align 32
|
|
+ossl_rsaz_amm52x40_x1_avxifma256:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ pushq %rbx
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbx,-16
|
|
+ pushq %rbp
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbp,-24
|
|
+ pushq %r12
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r12,-32
|
|
+ pushq %r13
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r13,-40
|
|
+ pushq %r14
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r14,-48
|
|
+ pushq %r15
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r15,-56
|
|
+
|
|
+ vpxor %ymm0,%ymm0,%ymm0
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm4
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+ vmovapd %ymm0,%ymm8
|
|
+ vmovapd %ymm0,%ymm9
|
|
+ vmovapd %ymm0,%ymm10
|
|
+ vmovapd %ymm0,%ymm11
|
|
+ vmovapd %ymm0,%ymm12
|
|
+
|
|
+ xorl %r9d,%r9d
|
|
+
|
|
+ movq %rdx,%r11
|
|
+ movq $0xfffffffffffff,%rax
|
|
+
|
|
+
|
|
+ movl $10,%ebx
|
|
+
|
|
+.align 32
|
|
+.Lloop10:
|
|
+ movq 0(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 0(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -328(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ vmovdqu %ymm11,256(%rsp)
|
|
+ vmovdqu %ymm12,288(%rsp)
|
|
+ movq $0,320(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+ vmovdqu 264(%rsp),%ymm11
|
|
+ vmovdqu 296(%rsp),%ymm12
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ leaq 328(%rsp),%rsp
|
|
+ movq 8(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 8(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -328(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ vmovdqu %ymm11,256(%rsp)
|
|
+ vmovdqu %ymm12,288(%rsp)
|
|
+ movq $0,320(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+ vmovdqu 264(%rsp),%ymm11
|
|
+ vmovdqu 296(%rsp),%ymm12
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ leaq 328(%rsp),%rsp
|
|
+ movq 16(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 16(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -328(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ vmovdqu %ymm11,256(%rsp)
|
|
+ vmovdqu %ymm12,288(%rsp)
|
|
+ movq $0,320(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+ vmovdqu 264(%rsp),%ymm11
|
|
+ vmovdqu 296(%rsp),%ymm12
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ leaq 328(%rsp),%rsp
|
|
+ movq 24(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 24(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq %r8,%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -328(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ vmovdqu %ymm11,256(%rsp)
|
|
+ vmovdqu %ymm12,288(%rsp)
|
|
+ movq $0,320(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+ vmovdqu 264(%rsp),%ymm11
|
|
+ vmovdqu 296(%rsp),%ymm12
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ leaq 328(%rsp),%rsp
|
|
+ leaq 32(%r11),%r11
|
|
+ decl %ebx
|
|
+ jne .Lloop10
|
|
+
|
|
+ vmovq %r9,%xmm0
|
|
+ vpbroadcastq %xmm0,%ymm0
|
|
+ vpblendd $3,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+ leaq -640(%rsp),%rsp
|
|
+ vmovupd %ymm3,0(%rsp)
|
|
+ vmovupd %ymm4,32(%rsp)
|
|
+ vmovupd %ymm5,64(%rsp)
|
|
+ vmovupd %ymm6,96(%rsp)
|
|
+ vmovupd %ymm7,128(%rsp)
|
|
+ vmovupd %ymm8,160(%rsp)
|
|
+ vmovupd %ymm9,192(%rsp)
|
|
+ vmovupd %ymm10,224(%rsp)
|
|
+ vmovupd %ymm11,256(%rsp)
|
|
+ vmovupd %ymm12,288(%rsp)
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq $52,%ymm3,%ymm3
|
|
+ vpsrlq $52,%ymm4,%ymm4
|
|
+ vpsrlq $52,%ymm5,%ymm5
|
|
+ vpsrlq $52,%ymm6,%ymm6
|
|
+ vpsrlq $52,%ymm7,%ymm7
|
|
+ vpsrlq $52,%ymm8,%ymm8
|
|
+ vpsrlq $52,%ymm9,%ymm9
|
|
+ vpsrlq $52,%ymm10,%ymm10
|
|
+ vpsrlq $52,%ymm11,%ymm11
|
|
+ vpsrlq $52,%ymm12,%ymm12
|
|
+
|
|
+
|
|
+ vpermq $144,%ymm12,%ymm12
|
|
+ vpermq $3,%ymm11,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm12,%ymm12
|
|
+
|
|
+ vpermq $144,%ymm11,%ymm11
|
|
+ vpermq $3,%ymm10,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm11,%ymm11
|
|
+
|
|
+ vpermq $144,%ymm10,%ymm10
|
|
+ vpermq $3,%ymm9,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm10,%ymm10
|
|
+
|
|
+ vpermq $144,%ymm9,%ymm9
|
|
+ vpermq $3,%ymm8,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm9,%ymm9
|
|
+
|
|
+ vpermq $144,%ymm8,%ymm8
|
|
+ vpermq $3,%ymm7,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm8,%ymm8
|
|
+
|
|
+ vpermq $144,%ymm7,%ymm7
|
|
+ vpermq $3,%ymm6,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm7,%ymm7
|
|
+
|
|
+ vpermq $144,%ymm6,%ymm6
|
|
+ vpermq $3,%ymm5,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm6,%ymm6
|
|
+
|
|
+ vpermq $144,%ymm5,%ymm5
|
|
+ vpermq $3,%ymm4,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm5,%ymm5
|
|
+
|
|
+ vpermq $144,%ymm4,%ymm4
|
|
+ vpermq $3,%ymm3,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm4,%ymm4
|
|
+
|
|
+ vpermq $144,%ymm3,%ymm3
|
|
+ vpand .Lhigh64x3(%rip),%ymm3,%ymm3
|
|
+
|
|
+ vmovupd %ymm3,320(%rsp)
|
|
+ vmovupd %ymm4,352(%rsp)
|
|
+ vmovupd %ymm5,384(%rsp)
|
|
+ vmovupd %ymm6,416(%rsp)
|
|
+ vmovupd %ymm7,448(%rsp)
|
|
+ vmovupd %ymm8,480(%rsp)
|
|
+ vmovupd %ymm9,512(%rsp)
|
|
+ vmovupd %ymm10,544(%rsp)
|
|
+ vmovupd %ymm11,576(%rsp)
|
|
+ vmovupd %ymm12,608(%rsp)
|
|
+
|
|
+ vmovupd 0(%rsp),%ymm3
|
|
+ vmovupd 32(%rsp),%ymm4
|
|
+ vmovupd 64(%rsp),%ymm5
|
|
+ vmovupd 96(%rsp),%ymm6
|
|
+ vmovupd 128(%rsp),%ymm7
|
|
+ vmovupd 160(%rsp),%ymm8
|
|
+ vmovupd 192(%rsp),%ymm9
|
|
+ vmovupd 224(%rsp),%ymm10
|
|
+ vmovupd 256(%rsp),%ymm11
|
|
+ vmovupd 288(%rsp),%ymm12
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
|
|
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
|
|
+
|
|
+
|
|
+ vpaddq 320(%rsp),%ymm3,%ymm3
|
|
+ vpaddq 352(%rsp),%ymm4,%ymm4
|
|
+ vpaddq 384(%rsp),%ymm5,%ymm5
|
|
+ vpaddq 416(%rsp),%ymm6,%ymm6
|
|
+ vpaddq 448(%rsp),%ymm7,%ymm7
|
|
+ vpaddq 480(%rsp),%ymm8,%ymm8
|
|
+ vpaddq 512(%rsp),%ymm9,%ymm9
|
|
+ vpaddq 544(%rsp),%ymm10,%ymm10
|
|
+ vpaddq 576(%rsp),%ymm11,%ymm11
|
|
+ vpaddq 608(%rsp),%ymm12,%ymm12
|
|
+
|
|
+ leaq 640(%rsp),%rsp
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13
|
|
+ vmovmskpd %ymm13,%r14d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13
|
|
+ vmovmskpd %ymm13,%r13d
|
|
+ shlb $4,%r13b
|
|
+ orb %r13b,%r14b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13
|
|
+ vmovmskpd %ymm13,%r13d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13
|
|
+ vmovmskpd %ymm13,%r12d
|
|
+ shlb $4,%r12b
|
|
+ orb %r12b,%r13b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ vmovmskpd %ymm13,%r12d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ shlb $4,%r11b
|
|
+ orb %r11b,%r12b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13
|
|
+ vmovmskpd %ymm13,%r10d
|
|
+ shlb $4,%r10b
|
|
+ orb %r10b,%r11b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13
|
|
+ vmovmskpd %ymm13,%r10d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13
|
|
+ vmovmskpd %ymm13,%r9d
|
|
+ shlb $4,%r9b
|
|
+ orb %r9b,%r10b
|
|
+
|
|
+ addb %r14b,%r14b
|
|
+ adcb %r13b,%r13b
|
|
+ adcb %r12b,%r12b
|
|
+ adcb %r11b,%r11b
|
|
+ adcb %r10b,%r10b
|
|
+
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13
|
|
+ vmovmskpd %ymm13,%r9d
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13
|
|
+ vmovmskpd %ymm13,%r8d
|
|
+ shlb $4,%r8b
|
|
+ orb %r8b,%r9b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13
|
|
+ vmovmskpd %ymm13,%r8d
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13
|
|
+ vmovmskpd %ymm13,%edx
|
|
+ shlb $4,%dl
|
|
+ orb %dl,%r8b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ vmovmskpd %ymm13,%edx
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ shlb $4,%cl
|
|
+ orb %cl,%dl
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13
|
|
+ vmovmskpd %ymm13,%ebx
|
|
+ shlb $4,%bl
|
|
+ orb %bl,%cl
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13
|
|
+ vmovmskpd %ymm13,%ebx
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13
|
|
+ vmovmskpd %ymm13,%eax
|
|
+ shlb $4,%al
|
|
+ orb %al,%bl
|
|
+
|
|
+ addb %r9b,%r14b
|
|
+ adcb %r8b,%r13b
|
|
+ adcb %dl,%r12b
|
|
+ adcb %cl,%r11b
|
|
+ adcb %bl,%r10b
|
|
+
|
|
+ xorb %r9b,%r14b
|
|
+ xorb %r8b,%r13b
|
|
+ xorb %dl,%r12b
|
|
+ xorb %cl,%r11b
|
|
+ xorb %bl,%r10b
|
|
+
|
|
+ pushq %r9
|
|
+ pushq %r8
|
|
+
|
|
+ leaq .Lkmasklut(%rip),%r8
|
|
+
|
|
+ movb %r14b,%r9b
|
|
+ andq $0xf,%r14
|
|
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm13
|
|
+ shlq $5,%r14
|
|
+ vmovapd (%r8,%r14), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm3,%ymm3
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm4,%ymm4
|
|
+
|
|
+ movb %r13b,%r9b
|
|
+ andq $0xf,%r13
|
|
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm13
|
|
+ shlq $5,%r13
|
|
+ vmovapd (%r8,%r13), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm5,%ymm5
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm6,%ymm6
|
|
+
|
|
+ movb %r12b,%r9b
|
|
+ andq $0xf,%r12
|
|
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ shlq $5,%r12
|
|
+ vmovapd (%r8,%r12), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm7,%ymm7
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm8,%ymm8
|
|
+
|
|
+ movb %r11b,%r9b
|
|
+ andq $0xf,%r11
|
|
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm13
|
|
+ shlq $5,%r11
|
|
+ vmovapd (%r8,%r11), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm9,%ymm9
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm10,%ymm10
|
|
+
|
|
+ movb %r10b,%r9b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm11,%ymm13
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%r8,%r10), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm11,%ymm11
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm12,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm12,%ymm12
|
|
+
|
|
+ popq %r8
|
|
+ popq %r9
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
|
|
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
|
|
+
|
|
+ vmovdqu %ymm3,0(%rdi)
|
|
+ vmovdqu %ymm4,32(%rdi)
|
|
+ vmovdqu %ymm5,64(%rdi)
|
|
+ vmovdqu %ymm6,96(%rdi)
|
|
+ vmovdqu %ymm7,128(%rdi)
|
|
+ vmovdqu %ymm8,160(%rdi)
|
|
+ vmovdqu %ymm9,192(%rdi)
|
|
+ vmovdqu %ymm10,224(%rdi)
|
|
+ vmovdqu %ymm11,256(%rdi)
|
|
+ vmovdqu %ymm12,288(%rdi)
|
|
+
|
|
+ vzeroupper
|
|
+ leaq (%rsp),%rax
|
|
+.cfi_def_cfa_register %rax
|
|
+ movq 0(%rax),%r15
|
|
+.cfi_restore %r15
|
|
+ movq 8(%rax),%r14
|
|
+.cfi_restore %r14
|
|
+ movq 16(%rax),%r13
|
|
+.cfi_restore %r13
|
|
+ movq 24(%rax),%r12
|
|
+.cfi_restore %r12
|
|
+ movq 32(%rax),%rbp
|
|
+.cfi_restore %rbp
|
|
+ movq 40(%rax),%rbx
|
|
+.cfi_restore %rbx
|
|
+ leaq 48(%rax),%rsp
|
|
+.cfi_def_cfa %rsp,8
|
|
+.Lossl_rsaz_amm52x40_x1_avxifma256_epilogue:
|
|
+
|
|
+ .byte 0xf3,0xc3
|
|
+.cfi_endproc
|
|
+.size ossl_rsaz_amm52x40_x1_avxifma256, .-ossl_rsaz_amm52x40_x1_avxifma256
|
|
+.section .rodata
|
|
+.align 32
|
|
+.Lmask52x4:
|
|
+.quad 0xfffffffffffff
|
|
+.quad 0xfffffffffffff
|
|
+.quad 0xfffffffffffff
|
|
+.quad 0xfffffffffffff
|
|
+.Lhigh64x3:
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.Lkmasklut:
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0x0
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.quad 0xffffffffffffffff
|
|
+.text
|
|
+
|
|
+.globl ossl_rsaz_amm52x40_x2_avxifma256
|
|
+.type ossl_rsaz_amm52x40_x2_avxifma256,@function
|
|
+.align 32
|
|
+ossl_rsaz_amm52x40_x2_avxifma256:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ pushq %rbx
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbx,-16
|
|
+ pushq %rbp
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %rbp,-24
|
|
+ pushq %r12
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r12,-32
|
|
+ pushq %r13
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r13,-40
|
|
+ pushq %r14
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r14,-48
|
|
+ pushq %r15
|
|
+.cfi_adjust_cfa_offset 8
|
|
+.cfi_offset %r15,-56
|
|
+
|
|
+ vpxor %ymm0,%ymm0,%ymm0
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm4
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+ vmovapd %ymm0,%ymm8
|
|
+ vmovapd %ymm0,%ymm9
|
|
+ vmovapd %ymm0,%ymm10
|
|
+ vmovapd %ymm0,%ymm11
|
|
+ vmovapd %ymm0,%ymm12
|
|
+
|
|
+ xorl %r9d,%r9d
|
|
+
|
|
+ movq %rdx,%r11
|
|
+ movq $0xfffffffffffff,%rax
|
|
+
|
|
+ movl $40,%ebx
|
|
+
|
|
+.align 32
|
|
+.Lloop40:
|
|
+ movq 0(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 0(%r11),%ymm1
|
|
+ movq 0(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq (%r8),%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 0(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -328(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ vmovdqu %ymm11,256(%rsp)
|
|
+ vmovdqu %ymm12,288(%rsp)
|
|
+ movq $0,320(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+ vmovdqu 264(%rsp),%ymm11
|
|
+ vmovdqu 296(%rsp),%ymm12
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ leaq 328(%rsp),%rsp
|
|
+ leaq 8(%r11),%r11
|
|
+ decl %ebx
|
|
+ jne .Lloop40
|
|
+
|
|
+ pushq %r11
|
|
+ pushq %rsi
|
|
+ pushq %rcx
|
|
+ pushq %r8
|
|
+
|
|
+ vmovq %r9,%xmm0
|
|
+ vpbroadcastq %xmm0,%ymm0
|
|
+ vpblendd $3,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+ leaq -640(%rsp),%rsp
|
|
+ vmovupd %ymm3,0(%rsp)
|
|
+ vmovupd %ymm4,32(%rsp)
|
|
+ vmovupd %ymm5,64(%rsp)
|
|
+ vmovupd %ymm6,96(%rsp)
|
|
+ vmovupd %ymm7,128(%rsp)
|
|
+ vmovupd %ymm8,160(%rsp)
|
|
+ vmovupd %ymm9,192(%rsp)
|
|
+ vmovupd %ymm10,224(%rsp)
|
|
+ vmovupd %ymm11,256(%rsp)
|
|
+ vmovupd %ymm12,288(%rsp)
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq $52,%ymm3,%ymm3
|
|
+ vpsrlq $52,%ymm4,%ymm4
|
|
+ vpsrlq $52,%ymm5,%ymm5
|
|
+ vpsrlq $52,%ymm6,%ymm6
|
|
+ vpsrlq $52,%ymm7,%ymm7
|
|
+ vpsrlq $52,%ymm8,%ymm8
|
|
+ vpsrlq $52,%ymm9,%ymm9
|
|
+ vpsrlq $52,%ymm10,%ymm10
|
|
+ vpsrlq $52,%ymm11,%ymm11
|
|
+ vpsrlq $52,%ymm12,%ymm12
|
|
+
|
|
+
|
|
+ vpermq $144,%ymm12,%ymm12
|
|
+ vpermq $3,%ymm11,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm12,%ymm12
|
|
+
|
|
+ vpermq $144,%ymm11,%ymm11
|
|
+ vpermq $3,%ymm10,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm11,%ymm11
|
|
+
|
|
+ vpermq $144,%ymm10,%ymm10
|
|
+ vpermq $3,%ymm9,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm10,%ymm10
|
|
+
|
|
+ vpermq $144,%ymm9,%ymm9
|
|
+ vpermq $3,%ymm8,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm9,%ymm9
|
|
+
|
|
+ vpermq $144,%ymm8,%ymm8
|
|
+ vpermq $3,%ymm7,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm8,%ymm8
|
|
+
|
|
+ vpermq $144,%ymm7,%ymm7
|
|
+ vpermq $3,%ymm6,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm7,%ymm7
|
|
+
|
|
+ vpermq $144,%ymm6,%ymm6
|
|
+ vpermq $3,%ymm5,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm6,%ymm6
|
|
+
|
|
+ vpermq $144,%ymm5,%ymm5
|
|
+ vpermq $3,%ymm4,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm5,%ymm5
|
|
+
|
|
+ vpermq $144,%ymm4,%ymm4
|
|
+ vpermq $3,%ymm3,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm4,%ymm4
|
|
+
|
|
+ vpermq $144,%ymm3,%ymm3
|
|
+ vpand .Lhigh64x3(%rip),%ymm3,%ymm3
|
|
+
|
|
+ vmovupd %ymm3,320(%rsp)
|
|
+ vmovupd %ymm4,352(%rsp)
|
|
+ vmovupd %ymm5,384(%rsp)
|
|
+ vmovupd %ymm6,416(%rsp)
|
|
+ vmovupd %ymm7,448(%rsp)
|
|
+ vmovupd %ymm8,480(%rsp)
|
|
+ vmovupd %ymm9,512(%rsp)
|
|
+ vmovupd %ymm10,544(%rsp)
|
|
+ vmovupd %ymm11,576(%rsp)
|
|
+ vmovupd %ymm12,608(%rsp)
|
|
+
|
|
+ vmovupd 0(%rsp),%ymm3
|
|
+ vmovupd 32(%rsp),%ymm4
|
|
+ vmovupd 64(%rsp),%ymm5
|
|
+ vmovupd 96(%rsp),%ymm6
|
|
+ vmovupd 128(%rsp),%ymm7
|
|
+ vmovupd 160(%rsp),%ymm8
|
|
+ vmovupd 192(%rsp),%ymm9
|
|
+ vmovupd 224(%rsp),%ymm10
|
|
+ vmovupd 256(%rsp),%ymm11
|
|
+ vmovupd 288(%rsp),%ymm12
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
|
|
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
|
|
+
|
|
+
|
|
+ vpaddq 320(%rsp),%ymm3,%ymm3
|
|
+ vpaddq 352(%rsp),%ymm4,%ymm4
|
|
+ vpaddq 384(%rsp),%ymm5,%ymm5
|
|
+ vpaddq 416(%rsp),%ymm6,%ymm6
|
|
+ vpaddq 448(%rsp),%ymm7,%ymm7
|
|
+ vpaddq 480(%rsp),%ymm8,%ymm8
|
|
+ vpaddq 512(%rsp),%ymm9,%ymm9
|
|
+ vpaddq 544(%rsp),%ymm10,%ymm10
|
|
+ vpaddq 576(%rsp),%ymm11,%ymm11
|
|
+ vpaddq 608(%rsp),%ymm12,%ymm12
|
|
+
|
|
+ leaq 640(%rsp),%rsp
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13
|
|
+ vmovmskpd %ymm13,%r14d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13
|
|
+ vmovmskpd %ymm13,%r13d
|
|
+ shlb $4,%r13b
|
|
+ orb %r13b,%r14b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13
|
|
+ vmovmskpd %ymm13,%r13d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13
|
|
+ vmovmskpd %ymm13,%r12d
|
|
+ shlb $4,%r12b
|
|
+ orb %r12b,%r13b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ vmovmskpd %ymm13,%r12d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ shlb $4,%r11b
|
|
+ orb %r11b,%r12b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13
|
|
+ vmovmskpd %ymm13,%r10d
|
|
+ shlb $4,%r10b
|
|
+ orb %r10b,%r11b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13
|
|
+ vmovmskpd %ymm13,%r10d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13
|
|
+ vmovmskpd %ymm13,%r9d
|
|
+ shlb $4,%r9b
|
|
+ orb %r9b,%r10b
|
|
+
|
|
+ addb %r14b,%r14b
|
|
+ adcb %r13b,%r13b
|
|
+ adcb %r12b,%r12b
|
|
+ adcb %r11b,%r11b
|
|
+ adcb %r10b,%r10b
|
|
+
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13
|
|
+ vmovmskpd %ymm13,%r9d
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13
|
|
+ vmovmskpd %ymm13,%r8d
|
|
+ shlb $4,%r8b
|
|
+ orb %r8b,%r9b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13
|
|
+ vmovmskpd %ymm13,%r8d
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13
|
|
+ vmovmskpd %ymm13,%edx
|
|
+ shlb $4,%dl
|
|
+ orb %dl,%r8b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ vmovmskpd %ymm13,%edx
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ shlb $4,%cl
|
|
+ orb %cl,%dl
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13
|
|
+ vmovmskpd %ymm13,%ebx
|
|
+ shlb $4,%bl
|
|
+ orb %bl,%cl
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13
|
|
+ vmovmskpd %ymm13,%ebx
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13
|
|
+ vmovmskpd %ymm13,%eax
|
|
+ shlb $4,%al
|
|
+ orb %al,%bl
|
|
+
|
|
+ addb %r9b,%r14b
|
|
+ adcb %r8b,%r13b
|
|
+ adcb %dl,%r12b
|
|
+ adcb %cl,%r11b
|
|
+ adcb %bl,%r10b
|
|
+
|
|
+ xorb %r9b,%r14b
|
|
+ xorb %r8b,%r13b
|
|
+ xorb %dl,%r12b
|
|
+ xorb %cl,%r11b
|
|
+ xorb %bl,%r10b
|
|
+
|
|
+ pushq %r9
|
|
+ pushq %r8
|
|
+
|
|
+ leaq .Lkmasklut(%rip),%r8
|
|
+
|
|
+ movb %r14b,%r9b
|
|
+ andq $0xf,%r14
|
|
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm13
|
|
+ shlq $5,%r14
|
|
+ vmovapd (%r8,%r14), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm3,%ymm3
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm4,%ymm4
|
|
+
|
|
+ movb %r13b,%r9b
|
|
+ andq $0xf,%r13
|
|
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm13
|
|
+ shlq $5,%r13
|
|
+ vmovapd (%r8,%r13), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm5,%ymm5
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm6,%ymm6
|
|
+
|
|
+ movb %r12b,%r9b
|
|
+ andq $0xf,%r12
|
|
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ shlq $5,%r12
|
|
+ vmovapd (%r8,%r12), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm7,%ymm7
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm8,%ymm8
|
|
+
|
|
+ movb %r11b,%r9b
|
|
+ andq $0xf,%r11
|
|
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm13
|
|
+ shlq $5,%r11
|
|
+ vmovapd (%r8,%r11), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm9,%ymm9
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm10,%ymm10
|
|
+
|
|
+ movb %r10b,%r9b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm11,%ymm13
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%r8,%r10), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm11,%ymm11
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm12,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm12,%ymm12
|
|
+
|
|
+ popq %r8
|
|
+ popq %r9
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
|
|
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
|
|
+
|
|
+ popq %r8
|
|
+ popq %rcx
|
|
+ popq %rsi
|
|
+ popq %r11
|
|
+
|
|
+ vmovdqu %ymm3,0(%rdi)
|
|
+ vmovdqu %ymm4,32(%rdi)
|
|
+ vmovdqu %ymm5,64(%rdi)
|
|
+ vmovdqu %ymm6,96(%rdi)
|
|
+ vmovdqu %ymm7,128(%rdi)
|
|
+ vmovdqu %ymm8,160(%rdi)
|
|
+ vmovdqu %ymm9,192(%rdi)
|
|
+ vmovdqu %ymm10,224(%rdi)
|
|
+ vmovdqu %ymm11,256(%rdi)
|
|
+ vmovdqu %ymm12,288(%rdi)
|
|
+
|
|
+ xorl %r15d,%r15d
|
|
+
|
|
+ movq $0xfffffffffffff,%rax
|
|
+
|
|
+ movl $40,%ebx
|
|
+
|
|
+ vpxor %ymm0,%ymm0,%ymm0
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm4
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+ vmovapd %ymm0,%ymm8
|
|
+ vmovapd %ymm0,%ymm9
|
|
+ vmovapd %ymm0,%ymm10
|
|
+ vmovapd %ymm0,%ymm11
|
|
+ vmovapd %ymm0,%ymm12
|
|
+.align 32
|
|
+.Lloop40_1:
|
|
+ movq 0(%r11),%r13
|
|
+
|
|
+ vpbroadcastq 0(%r11),%ymm1
|
|
+ movq 320(%rsi),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ movq %r12,%r10
|
|
+ adcq $0,%r10
|
|
+
|
|
+ movq 8(%r8),%r13
|
|
+ imulq %r9,%r13
|
|
+ andq %rax,%r13
|
|
+
|
|
+ vmovq %r13,%xmm2
|
|
+ vpbroadcastq %xmm2,%ymm2
|
|
+ movq 320(%rcx),%rdx
|
|
+ mulxq %r13,%r13,%r12
|
|
+ addq %r13,%r9
|
|
+ adcq %r12,%r10
|
|
+
|
|
+ shrq $52,%r9
|
|
+ salq $12,%r10
|
|
+ orq %r10,%r9
|
|
+
|
|
+ leaq -328(%rsp),%rsp
|
|
+
|
|
+{vex} vpmadd52luq 320(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 352(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 384(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 416(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 448(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 480(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 512(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 544(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 576(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 608(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 320(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 352(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 384(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 416(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 448(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 480(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 512(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 544(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 576(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 608(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu %ymm3,0(%rsp)
|
|
+ vmovdqu %ymm4,32(%rsp)
|
|
+ vmovdqu %ymm5,64(%rsp)
|
|
+ vmovdqu %ymm6,96(%rsp)
|
|
+ vmovdqu %ymm7,128(%rsp)
|
|
+ vmovdqu %ymm8,160(%rsp)
|
|
+ vmovdqu %ymm9,192(%rsp)
|
|
+ vmovdqu %ymm10,224(%rsp)
|
|
+ vmovdqu %ymm11,256(%rsp)
|
|
+ vmovdqu %ymm12,288(%rsp)
|
|
+ movq $0,320(%rsp)
|
|
+
|
|
+ vmovdqu 8(%rsp),%ymm3
|
|
+ vmovdqu 40(%rsp),%ymm4
|
|
+ vmovdqu 72(%rsp),%ymm5
|
|
+ vmovdqu 104(%rsp),%ymm6
|
|
+ vmovdqu 136(%rsp),%ymm7
|
|
+ vmovdqu 168(%rsp),%ymm8
|
|
+ vmovdqu 200(%rsp),%ymm9
|
|
+ vmovdqu 232(%rsp),%ymm10
|
|
+ vmovdqu 264(%rsp),%ymm11
|
|
+ vmovdqu 296(%rsp),%ymm12
|
|
+
|
|
+ addq 8(%rsp),%r9
|
|
+
|
|
+{vex} vpmadd52huq 320(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 352(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 384(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 416(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 448(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 480(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 512(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 544(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 576(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 608(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 320(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 352(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 384(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 416(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 448(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 480(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 512(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 544(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 576(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 608(%rcx), %ymm2, %ymm12
|
|
+ leaq 328(%rsp),%rsp
|
|
+ leaq 8(%r11),%r11
|
|
+ decl %ebx
|
|
+ jne .Lloop40_1
|
|
+
|
|
+ vmovq %r9,%xmm0
|
|
+ vpbroadcastq %xmm0,%ymm0
|
|
+ vpblendd $3,%ymm0,%ymm3,%ymm3
|
|
+
|
|
+ leaq -640(%rsp),%rsp
|
|
+ vmovupd %ymm3,0(%rsp)
|
|
+ vmovupd %ymm4,32(%rsp)
|
|
+ vmovupd %ymm5,64(%rsp)
|
|
+ vmovupd %ymm6,96(%rsp)
|
|
+ vmovupd %ymm7,128(%rsp)
|
|
+ vmovupd %ymm8,160(%rsp)
|
|
+ vmovupd %ymm9,192(%rsp)
|
|
+ vmovupd %ymm10,224(%rsp)
|
|
+ vmovupd %ymm11,256(%rsp)
|
|
+ vmovupd %ymm12,288(%rsp)
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq $52,%ymm3,%ymm3
|
|
+ vpsrlq $52,%ymm4,%ymm4
|
|
+ vpsrlq $52,%ymm5,%ymm5
|
|
+ vpsrlq $52,%ymm6,%ymm6
|
|
+ vpsrlq $52,%ymm7,%ymm7
|
|
+ vpsrlq $52,%ymm8,%ymm8
|
|
+ vpsrlq $52,%ymm9,%ymm9
|
|
+ vpsrlq $52,%ymm10,%ymm10
|
|
+ vpsrlq $52,%ymm11,%ymm11
|
|
+ vpsrlq $52,%ymm12,%ymm12
|
|
+
|
|
+
|
|
+ vpermq $144,%ymm12,%ymm12
|
|
+ vpermq $3,%ymm11,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm12,%ymm12
|
|
+
|
|
+ vpermq $144,%ymm11,%ymm11
|
|
+ vpermq $3,%ymm10,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm11,%ymm11
|
|
+
|
|
+ vpermq $144,%ymm10,%ymm10
|
|
+ vpermq $3,%ymm9,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm10,%ymm10
|
|
+
|
|
+ vpermq $144,%ymm9,%ymm9
|
|
+ vpermq $3,%ymm8,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm9,%ymm9
|
|
+
|
|
+ vpermq $144,%ymm8,%ymm8
|
|
+ vpermq $3,%ymm7,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm8,%ymm8
|
|
+
|
|
+ vpermq $144,%ymm7,%ymm7
|
|
+ vpermq $3,%ymm6,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm7,%ymm7
|
|
+
|
|
+ vpermq $144,%ymm6,%ymm6
|
|
+ vpermq $3,%ymm5,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm6,%ymm6
|
|
+
|
|
+ vpermq $144,%ymm5,%ymm5
|
|
+ vpermq $3,%ymm4,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm5,%ymm5
|
|
+
|
|
+ vpermq $144,%ymm4,%ymm4
|
|
+ vpermq $3,%ymm3,%ymm13
|
|
+ vblendpd $1,%ymm13,%ymm4,%ymm4
|
|
+
|
|
+ vpermq $144,%ymm3,%ymm3
|
|
+ vpand .Lhigh64x3(%rip),%ymm3,%ymm3
|
|
+
|
|
+ vmovupd %ymm3,320(%rsp)
|
|
+ vmovupd %ymm4,352(%rsp)
|
|
+ vmovupd %ymm5,384(%rsp)
|
|
+ vmovupd %ymm6,416(%rsp)
|
|
+ vmovupd %ymm7,448(%rsp)
|
|
+ vmovupd %ymm8,480(%rsp)
|
|
+ vmovupd %ymm9,512(%rsp)
|
|
+ vmovupd %ymm10,544(%rsp)
|
|
+ vmovupd %ymm11,576(%rsp)
|
|
+ vmovupd %ymm12,608(%rsp)
|
|
+
|
|
+ vmovupd 0(%rsp),%ymm3
|
|
+ vmovupd 32(%rsp),%ymm4
|
|
+ vmovupd 64(%rsp),%ymm5
|
|
+ vmovupd 96(%rsp),%ymm6
|
|
+ vmovupd 128(%rsp),%ymm7
|
|
+ vmovupd 160(%rsp),%ymm8
|
|
+ vmovupd 192(%rsp),%ymm9
|
|
+ vmovupd 224(%rsp),%ymm10
|
|
+ vmovupd 256(%rsp),%ymm11
|
|
+ vmovupd 288(%rsp),%ymm12
|
|
+
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
|
|
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
|
|
+
|
|
+
|
|
+ vpaddq 320(%rsp),%ymm3,%ymm3
|
|
+ vpaddq 352(%rsp),%ymm4,%ymm4
|
|
+ vpaddq 384(%rsp),%ymm5,%ymm5
|
|
+ vpaddq 416(%rsp),%ymm6,%ymm6
|
|
+ vpaddq 448(%rsp),%ymm7,%ymm7
|
|
+ vpaddq 480(%rsp),%ymm8,%ymm8
|
|
+ vpaddq 512(%rsp),%ymm9,%ymm9
|
|
+ vpaddq 544(%rsp),%ymm10,%ymm10
|
|
+ vpaddq 576(%rsp),%ymm11,%ymm11
|
|
+ vpaddq 608(%rsp),%ymm12,%ymm12
|
|
+
|
|
+ leaq 640(%rsp),%rsp
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13
|
|
+ vmovmskpd %ymm13,%r14d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13
|
|
+ vmovmskpd %ymm13,%r13d
|
|
+ shlb $4,%r13b
|
|
+ orb %r13b,%r14b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13
|
|
+ vmovmskpd %ymm13,%r13d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13
|
|
+ vmovmskpd %ymm13,%r12d
|
|
+ shlb $4,%r12b
|
|
+ orb %r12b,%r13b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ vmovmskpd %ymm13,%r12d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ shlb $4,%r11b
|
|
+ orb %r11b,%r12b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13
|
|
+ vmovmskpd %ymm13,%r11d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13
|
|
+ vmovmskpd %ymm13,%r10d
|
|
+ shlb $4,%r10b
|
|
+ orb %r10b,%r11b
|
|
+
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13
|
|
+ vmovmskpd %ymm13,%r10d
|
|
+ vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13
|
|
+ vmovmskpd %ymm13,%r9d
|
|
+ shlb $4,%r9b
|
|
+ orb %r9b,%r10b
|
|
+
|
|
+ addb %r14b,%r14b
|
|
+ adcb %r13b,%r13b
|
|
+ adcb %r12b,%r12b
|
|
+ adcb %r11b,%r11b
|
|
+ adcb %r10b,%r10b
|
|
+
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13
|
|
+ vmovmskpd %ymm13,%r9d
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13
|
|
+ vmovmskpd %ymm13,%r8d
|
|
+ shlb $4,%r8b
|
|
+ orb %r8b,%r9b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13
|
|
+ vmovmskpd %ymm13,%r8d
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13
|
|
+ vmovmskpd %ymm13,%edx
|
|
+ shlb $4,%dl
|
|
+ orb %dl,%r8b
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ vmovmskpd %ymm13,%edx
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ shlb $4,%cl
|
|
+ orb %cl,%dl
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13
|
|
+ vmovmskpd %ymm13,%ecx
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13
|
|
+ vmovmskpd %ymm13,%ebx
|
|
+ shlb $4,%bl
|
|
+ orb %bl,%cl
|
|
+
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13
|
|
+ vmovmskpd %ymm13,%ebx
|
|
+ vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13
|
|
+ vmovmskpd %ymm13,%eax
|
|
+ shlb $4,%al
|
|
+ orb %al,%bl
|
|
+
|
|
+ addb %r9b,%r14b
|
|
+ adcb %r8b,%r13b
|
|
+ adcb %dl,%r12b
|
|
+ adcb %cl,%r11b
|
|
+ adcb %bl,%r10b
|
|
+
|
|
+ xorb %r9b,%r14b
|
|
+ xorb %r8b,%r13b
|
|
+ xorb %dl,%r12b
|
|
+ xorb %cl,%r11b
|
|
+ xorb %bl,%r10b
|
|
+
|
|
+ pushq %r9
|
|
+ pushq %r8
|
|
+
|
|
+ leaq .Lkmasklut(%rip),%r8
|
|
+
|
|
+ movb %r14b,%r9b
|
|
+ andq $0xf,%r14
|
|
+ vpsubq .Lmask52x4(%rip),%ymm3,%ymm13
|
|
+ shlq $5,%r14
|
|
+ vmovapd (%r8,%r14), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm3,%ymm3
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm4,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm4,%ymm4
|
|
+
|
|
+ movb %r13b,%r9b
|
|
+ andq $0xf,%r13
|
|
+ vpsubq .Lmask52x4(%rip),%ymm5,%ymm13
|
|
+ shlq $5,%r13
|
|
+ vmovapd (%r8,%r13), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm5,%ymm5
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm6,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm6,%ymm6
|
|
+
|
|
+ movb %r12b,%r9b
|
|
+ andq $0xf,%r12
|
|
+ vpsubq .Lmask52x4(%rip),%ymm7,%ymm13
|
|
+ shlq $5,%r12
|
|
+ vmovapd (%r8,%r12), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm7,%ymm7
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm8,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm8,%ymm8
|
|
+
|
|
+ movb %r11b,%r9b
|
|
+ andq $0xf,%r11
|
|
+ vpsubq .Lmask52x4(%rip),%ymm9,%ymm13
|
|
+ shlq $5,%r11
|
|
+ vmovapd (%r8,%r11), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm9,%ymm9
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm10,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm10,%ymm10
|
|
+
|
|
+ movb %r10b,%r9b
|
|
+ andq $0xf,%r10
|
|
+ vpsubq .Lmask52x4(%rip),%ymm11,%ymm13
|
|
+ shlq $5,%r10
|
|
+ vmovapd (%r8,%r10), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm11,%ymm11
|
|
+
|
|
+ shrb $4,%r9b
|
|
+ andq $0xf,%r9
|
|
+ vpsubq .Lmask52x4(%rip),%ymm12,%ymm13
|
|
+ shlq $5,%r9
|
|
+ vmovapd (%r8,%r9), %ymm14
|
|
+ vblendvpd %ymm14,%ymm13,%ymm12,%ymm12
|
|
+
|
|
+ popq %r8
|
|
+ popq %r9
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm3,%ymm3
|
|
+ vpand .Lmask52x4(%rip),%ymm4,%ymm4
|
|
+ vpand .Lmask52x4(%rip),%ymm5,%ymm5
|
|
+ vpand .Lmask52x4(%rip),%ymm6,%ymm6
|
|
+ vpand .Lmask52x4(%rip),%ymm7,%ymm7
|
|
+ vpand .Lmask52x4(%rip),%ymm8,%ymm8
|
|
+ vpand .Lmask52x4(%rip),%ymm9,%ymm9
|
|
+
|
|
+ vpand .Lmask52x4(%rip),%ymm10,%ymm10
|
|
+ vpand .Lmask52x4(%rip),%ymm11,%ymm11
|
|
+ vpand .Lmask52x4(%rip),%ymm12,%ymm12
|
|
+
|
|
+ vmovdqu %ymm3,320(%rdi)
|
|
+ vmovdqu %ymm4,352(%rdi)
|
|
+ vmovdqu %ymm5,384(%rdi)
|
|
+ vmovdqu %ymm6,416(%rdi)
|
|
+ vmovdqu %ymm7,448(%rdi)
|
|
+ vmovdqu %ymm8,480(%rdi)
|
|
+ vmovdqu %ymm9,512(%rdi)
|
|
+ vmovdqu %ymm10,544(%rdi)
|
|
+ vmovdqu %ymm11,576(%rdi)
|
|
+ vmovdqu %ymm12,608(%rdi)
|
|
+
|
|
+ vzeroupper
|
|
+ leaq (%rsp),%rax
|
|
+.cfi_def_cfa_register %rax
|
|
+ movq 0(%rax),%r15
|
|
+.cfi_restore %r15
|
|
+ movq 8(%rax),%r14
|
|
+.cfi_restore %r14
|
|
+ movq 16(%rax),%r13
|
|
+.cfi_restore %r13
|
|
+ movq 24(%rax),%r12
|
|
+.cfi_restore %r12
|
|
+ movq 32(%rax),%rbp
|
|
+.cfi_restore %rbp
|
|
+ movq 40(%rax),%rbx
|
|
+.cfi_restore %rbx
|
|
+ leaq 48(%rax),%rsp
|
|
+.cfi_def_cfa %rsp,8
|
|
+.Lossl_rsaz_amm52x40_x2_avxifma256_epilogue:
|
|
+ .byte 0xf3,0xc3
|
|
+.cfi_endproc
|
|
+.size ossl_rsaz_amm52x40_x2_avxifma256, .-ossl_rsaz_amm52x40_x2_avxifma256
|
|
+.text
|
|
+
|
|
+.align 32
|
|
+.globl ossl_extract_multiplier_2x40_win5_avx
|
|
+.type ossl_extract_multiplier_2x40_win5_avx,@function
|
|
+ossl_extract_multiplier_2x40_win5_avx:
|
|
+.cfi_startproc
|
|
+.byte 243,15,30,250
|
|
+ vmovapd .Lones(%rip),%ymm14
|
|
+ vmovq %rdx,%xmm10
|
|
+ vpbroadcastq %xmm10,%ymm12
|
|
+ vmovq %rcx,%xmm10
|
|
+ vpbroadcastq %xmm10,%ymm13
|
|
+ leaq 20480(%rsi),%rax
|
|
+
|
|
+
|
|
+ movq %rsi,%r10
|
|
+
|
|
+
|
|
+ vpxor %xmm0,%xmm0,%xmm0
|
|
+ vmovapd %ymm0,%ymm1
|
|
+ vmovapd %ymm0,%ymm2
|
|
+ vmovapd %ymm0,%ymm3
|
|
+ vmovapd %ymm0,%ymm4
|
|
+ vmovapd %ymm0,%ymm5
|
|
+ vmovapd %ymm0,%ymm6
|
|
+ vmovapd %ymm0,%ymm7
|
|
+ vmovapd %ymm0,%ymm8
|
|
+ vmovapd %ymm0,%ymm9
|
|
+ vpxor %ymm11,%ymm11,%ymm11
|
|
+.align 32
|
|
+.Lloop_0:
|
|
+ vpcmpeqq %ymm11,%ymm12,%ymm15
|
|
+ vmovdqu 0(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm0,%ymm0
|
|
+ vmovdqu 32(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm1,%ymm1
|
|
+ vmovdqu 64(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm2,%ymm2
|
|
+ vmovdqu 96(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm3,%ymm3
|
|
+ vmovdqu 128(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm4,%ymm4
|
|
+ vmovdqu 160(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm5,%ymm5
|
|
+ vmovdqu 192(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm6,%ymm6
|
|
+ vmovdqu 224(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm7,%ymm7
|
|
+ vmovdqu 256(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm8,%ymm8
|
|
+ vmovdqu 288(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm9,%ymm9
|
|
+ vpaddq %ymm14,%ymm11,%ymm11
|
|
+ addq $640,%rsi
|
|
+ cmpq %rsi,%rax
|
|
+ jne .Lloop_0
|
|
+ vmovdqu %ymm0,0(%rdi)
|
|
+ vmovdqu %ymm1,32(%rdi)
|
|
+ vmovdqu %ymm2,64(%rdi)
|
|
+ vmovdqu %ymm3,96(%rdi)
|
|
+ vmovdqu %ymm4,128(%rdi)
|
|
+ vmovdqu %ymm5,160(%rdi)
|
|
+ vmovdqu %ymm6,192(%rdi)
|
|
+ vmovdqu %ymm7,224(%rdi)
|
|
+ vmovdqu %ymm8,256(%rdi)
|
|
+ vmovdqu %ymm9,288(%rdi)
|
|
+ movq %r10,%rsi
|
|
+ vpxor %ymm11,%ymm11,%ymm11
|
|
+.align 32
|
|
+.Lloop_320:
|
|
+ vpcmpeqq %ymm11,%ymm13,%ymm15
|
|
+ vmovdqu 320(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm0,%ymm0
|
|
+ vmovdqu 352(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm1,%ymm1
|
|
+ vmovdqu 384(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm2,%ymm2
|
|
+ vmovdqu 416(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm3,%ymm3
|
|
+ vmovdqu 448(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm4,%ymm4
|
|
+ vmovdqu 480(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm5,%ymm5
|
|
+ vmovdqu 512(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm6,%ymm6
|
|
+ vmovdqu 544(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm7,%ymm7
|
|
+ vmovdqu 576(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm8,%ymm8
|
|
+ vmovdqu 608(%rsi),%ymm10
|
|
+
|
|
+ vblendvpd %ymm15,%ymm10,%ymm9,%ymm9
|
|
+ vpaddq %ymm14,%ymm11,%ymm11
|
|
+ addq $640,%rsi
|
|
+ cmpq %rsi,%rax
|
|
+ jne .Lloop_320
|
|
+ vmovdqu %ymm0,320(%rdi)
|
|
+ vmovdqu %ymm1,352(%rdi)
|
|
+ vmovdqu %ymm2,384(%rdi)
|
|
+ vmovdqu %ymm3,416(%rdi)
|
|
+ vmovdqu %ymm4,448(%rdi)
|
|
+ vmovdqu %ymm5,480(%rdi)
|
|
+ vmovdqu %ymm6,512(%rdi)
|
|
+ vmovdqu %ymm7,544(%rdi)
|
|
+ vmovdqu %ymm8,576(%rdi)
|
|
+ vmovdqu %ymm9,608(%rdi)
|
|
+
|
|
+ .byte 0xf3,0xc3
|
|
+.cfi_endproc
|
|
+.size ossl_extract_multiplier_2x40_win5_avx, .-ossl_extract_multiplier_2x40_win5_avx
|
|
+.section .rodata
|
|
+.align 32
|
|
+.Lones:
|
|
+.quad 1,1,1,1
|
|
+.Lzeros:
|
|
+.quad 0,0,0,0
|
|
+ .section ".note.gnu.property", "a"
|
|
+ .p2align 3
|
|
+ .long 1f - 0f
|
|
+ .long 4f - 1f
|
|
+ .long 5
|
|
+0:
|
|
+ # "GNU" encoded with .byte, since .asciz isn't supported
|
|
+ # on Solaris.
|
|
+ .byte 0x47
|
|
+ .byte 0x4e
|
|
+ .byte 0x55
|
|
+ .byte 0
|
|
+1:
|
|
+ .p2align 3
|
|
+ .long 0xc0000002
|
|
+ .long 3f - 2f
|
|
+2:
|
|
+ .long 3
|
|
+3:
|
|
+ .p2align 3
|
|
+4:
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s
|
|
index 40bfc69f38..27a5a80375 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s
|
|
@@ -201,7 +201,7 @@ ossl_md5_block_asm_data_order:
|
|
leal -165796510(%rax,%r10,1),%eax
|
|
andl %ecx,%r11d
|
|
movl 24(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%eax
|
|
movl %ecx,%r11d
|
|
addl %r12d,%eax
|
|
movl %ecx,%r12d
|
|
@@ -212,7 +212,7 @@ ossl_md5_block_asm_data_order:
|
|
leal -1069501632(%rdx,%r10,1),%edx
|
|
andl %ebx,%r11d
|
|
movl 44(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%edx
|
|
movl %ebx,%r11d
|
|
addl %r12d,%edx
|
|
movl %ebx,%r12d
|
|
@@ -223,7 +223,7 @@ ossl_md5_block_asm_data_order:
|
|
leal 643717713(%rcx,%r10,1),%ecx
|
|
andl %eax,%r11d
|
|
movl 0(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%ecx
|
|
movl %eax,%r11d
|
|
addl %r12d,%ecx
|
|
movl %eax,%r12d
|
|
@@ -234,7 +234,7 @@ ossl_md5_block_asm_data_order:
|
|
leal -373897302(%rbx,%r10,1),%ebx
|
|
andl %edx,%r11d
|
|
movl 20(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%ebx
|
|
movl %edx,%r11d
|
|
addl %r12d,%ebx
|
|
movl %edx,%r12d
|
|
@@ -245,7 +245,7 @@ ossl_md5_block_asm_data_order:
|
|
leal -701558691(%rax,%r10,1),%eax
|
|
andl %ecx,%r11d
|
|
movl 40(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%eax
|
|
movl %ecx,%r11d
|
|
addl %r12d,%eax
|
|
movl %ecx,%r12d
|
|
@@ -256,7 +256,7 @@ ossl_md5_block_asm_data_order:
|
|
leal 38016083(%rdx,%r10,1),%edx
|
|
andl %ebx,%r11d
|
|
movl 60(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%edx
|
|
movl %ebx,%r11d
|
|
addl %r12d,%edx
|
|
movl %ebx,%r12d
|
|
@@ -267,7 +267,7 @@ ossl_md5_block_asm_data_order:
|
|
leal -660478335(%rcx,%r10,1),%ecx
|
|
andl %eax,%r11d
|
|
movl 16(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%ecx
|
|
movl %eax,%r11d
|
|
addl %r12d,%ecx
|
|
movl %eax,%r12d
|
|
@@ -278,7 +278,7 @@ ossl_md5_block_asm_data_order:
|
|
leal -405537848(%rbx,%r10,1),%ebx
|
|
andl %edx,%r11d
|
|
movl 36(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%ebx
|
|
movl %edx,%r11d
|
|
addl %r12d,%ebx
|
|
movl %edx,%r12d
|
|
@@ -289,7 +289,7 @@ ossl_md5_block_asm_data_order:
|
|
leal 568446438(%rax,%r10,1),%eax
|
|
andl %ecx,%r11d
|
|
movl 56(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%eax
|
|
movl %ecx,%r11d
|
|
addl %r12d,%eax
|
|
movl %ecx,%r12d
|
|
@@ -300,7 +300,7 @@ ossl_md5_block_asm_data_order:
|
|
leal -1019803690(%rdx,%r10,1),%edx
|
|
andl %ebx,%r11d
|
|
movl 12(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%edx
|
|
movl %ebx,%r11d
|
|
addl %r12d,%edx
|
|
movl %ebx,%r12d
|
|
@@ -311,7 +311,7 @@ ossl_md5_block_asm_data_order:
|
|
leal -187363961(%rcx,%r10,1),%ecx
|
|
andl %eax,%r11d
|
|
movl 32(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%ecx
|
|
movl %eax,%r11d
|
|
addl %r12d,%ecx
|
|
movl %eax,%r12d
|
|
@@ -322,7 +322,7 @@ ossl_md5_block_asm_data_order:
|
|
leal 1163531501(%rbx,%r10,1),%ebx
|
|
andl %edx,%r11d
|
|
movl 52(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%ebx
|
|
movl %edx,%r11d
|
|
addl %r12d,%ebx
|
|
movl %edx,%r12d
|
|
@@ -333,7 +333,7 @@ ossl_md5_block_asm_data_order:
|
|
leal -1444681467(%rax,%r10,1),%eax
|
|
andl %ecx,%r11d
|
|
movl 8(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%eax
|
|
movl %ecx,%r11d
|
|
addl %r12d,%eax
|
|
movl %ecx,%r12d
|
|
@@ -344,7 +344,7 @@ ossl_md5_block_asm_data_order:
|
|
leal -51403784(%rdx,%r10,1),%edx
|
|
andl %ebx,%r11d
|
|
movl 28(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%edx
|
|
movl %ebx,%r11d
|
|
addl %r12d,%edx
|
|
movl %ebx,%r12d
|
|
@@ -355,7 +355,7 @@ ossl_md5_block_asm_data_order:
|
|
leal 1735328473(%rcx,%r10,1),%ecx
|
|
andl %eax,%r11d
|
|
movl 48(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%ecx
|
|
movl %eax,%r11d
|
|
addl %r12d,%ecx
|
|
movl %eax,%r12d
|
|
@@ -366,7 +366,7 @@ ossl_md5_block_asm_data_order:
|
|
leal -1926607734(%rbx,%r10,1),%ebx
|
|
andl %edx,%r11d
|
|
movl 20(%rsi),%r10d
|
|
- orl %r11d,%r12d
|
|
+ addl %r11d,%ebx
|
|
movl %edx,%r11d
|
|
addl %r12d,%ebx
|
|
movl %edx,%r12d
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s
|
|
index 5fda386d1d..4fb26cc6e2 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s
|
|
@@ -1,11 +1,11 @@
|
|
|
|
+
|
|
.hidden OPENSSL_cpuid_setup
|
|
.section .init
|
|
call OPENSSL_cpuid_setup
|
|
|
|
.hidden OPENSSL_ia32cap_P
|
|
-.comm OPENSSL_ia32cap_P,16,4
|
|
-
|
|
+.comm OPENSSL_ia32cap_P,40,4
|
|
.text
|
|
|
|
.globl OPENSSL_atomic_add
|
|
@@ -163,6 +163,7 @@ OPENSSL_ia32_cpuid:
|
|
movl $7,%eax
|
|
xorl %ecx,%ecx
|
|
cpuid
|
|
+ movd %eax,%xmm1
|
|
btl $26,%r9d
|
|
jc .Lnotknights
|
|
andl $0xfff7ffff,%ebx
|
|
@@ -173,9 +174,31 @@ OPENSSL_ia32_cpuid:
|
|
jne .Lnotskylakex
|
|
andl $0xfffeffff,%ebx
|
|
|
|
+
|
|
.Lnotskylakex:
|
|
movl %ebx,8(%rdi)
|
|
movl %ecx,12(%rdi)
|
|
+ movl %edx,16(%rdi)
|
|
+
|
|
+ movd %xmm1,%eax
|
|
+ cmpl $0x1,%eax
|
|
+ jb .Lno_extended_info
|
|
+ movl $0x7,%eax
|
|
+ movl $0x1,%ecx
|
|
+ cpuid
|
|
+ movl %eax,20(%rdi)
|
|
+ movl %edx,24(%rdi)
|
|
+ movl %ebx,28(%rdi)
|
|
+ movl %ecx,32(%rdi)
|
|
+
|
|
+ andl $0x80000,%edx
|
|
+ cmpl $0x0,%edx
|
|
+ je .Lno_extended_info
|
|
+ movl $0x24,%eax
|
|
+ movl $0x0,%ecx
|
|
+ cpuid
|
|
+ movl %ebx,36(%rdi)
|
|
+
|
|
.Lno_extended_info:
|
|
|
|
btl $27,%r9d
|
|
@@ -194,6 +217,9 @@ OPENSSL_ia32_cpuid:
|
|
cmpl $6,%eax
|
|
je .Ldone
|
|
.Lclear_avx:
|
|
+ andl $0xff7fffff,20(%rdi)
|
|
+
|
|
+
|
|
movl $0xefffe7ff,%eax
|
|
andl %eax,%r9d
|
|
movl $0x3fdeffdf,%eax
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm
|
|
new file mode 100644
|
|
index 0000000000..cf6644f9e4
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm
|
|
@@ -0,0 +1,8350 @@
|
|
+default rel
|
|
+%define XMMWORD
|
|
+%define YMMWORD
|
|
+%define ZMMWORD
|
|
+section .text code align=64
|
|
+
|
|
+EXTERN OPENSSL_ia32cap_P
|
|
+global aesni_xts_avx512_eligible
|
|
+
|
|
+ALIGN 32
|
|
+aesni_xts_avx512_eligible:
|
|
+ mov ecx,DWORD[((OPENSSL_ia32cap_P+8))]
|
|
+ xor eax,eax
|
|
+
|
|
+ and ecx,0xc0030000
|
|
+ cmp ecx,0xc0030000
|
|
+ jne NEAR $L$_done
|
|
+ mov ecx,DWORD[((OPENSSL_ia32cap_P+12))]
|
|
+
|
|
+ and ecx,0x640
|
|
+ cmp ecx,0x640
|
|
+ cmove eax,ecx
|
|
+$L$_done:
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+global aesni_xts_128_encrypt_avx512
|
|
+
|
|
+
|
|
+ALIGN 32
|
|
+aesni_xts_128_encrypt_avx512:
|
|
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
+ mov QWORD[16+rsp],rsi
|
|
+ mov rax,rsp
|
|
+$L$SEH_begin_aesni_xts_128_encrypt_avx512:
|
|
+ mov rdi,rcx
|
|
+ mov rsi,rdx
|
|
+ mov rdx,r8
|
|
+ mov rcx,r9
|
|
+ mov r8,QWORD[40+rsp]
|
|
+ mov r9,QWORD[48+rsp]
|
|
+
|
|
+
|
|
+
|
|
+DB 243,15,30,250
|
|
+ push rbp
|
|
+ mov rbp,rsp
|
|
+ sub rsp,312
|
|
+ and rsp,0xffffffffffffffc0
|
|
+ mov QWORD[288+rsp],rbx
|
|
+ mov QWORD[((288 + 8))+rsp],rdi
|
|
+ mov QWORD[((288 + 16))+rsp],rsi
|
|
+ vmovdqa XMMWORD[(128 + 0)+rsp],xmm6
|
|
+ vmovdqa XMMWORD[(128 + 16)+rsp],xmm7
|
|
+ vmovdqa XMMWORD[(128 + 32)+rsp],xmm8
|
|
+ vmovdqa XMMWORD[(128 + 48)+rsp],xmm9
|
|
+ vmovdqa XMMWORD[(128 + 64)+rsp],xmm10
|
|
+ vmovdqa XMMWORD[(128 + 80)+rsp],xmm11
|
|
+ vmovdqa XMMWORD[(128 + 96)+rsp],xmm12
|
|
+ vmovdqa XMMWORD[(128 + 112)+rsp],xmm13
|
|
+ vmovdqa XMMWORD[(128 + 128)+rsp],xmm14
|
|
+ vmovdqa XMMWORD[(128 + 144)+rsp],xmm15
|
|
+ mov r10,0x87
|
|
+ vmovdqu xmm1,XMMWORD[r9]
|
|
+ vpxor xmm1,xmm1,XMMWORD[r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[16+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[32+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[48+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[64+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[80+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[96+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[112+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[128+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[144+r8]
|
|
+ vaesenclast xmm1,xmm1,XMMWORD[160+r8]
|
|
+ vmovdqa XMMWORD[rsp],xmm1
|
|
+ mov QWORD[((8 + 40))+rbp],rdi
|
|
+ mov QWORD[((8 + 48))+rbp],rsi
|
|
+
|
|
+ cmp rdx,0x80
|
|
+ jl NEAR $L$_less_than_128_bytes_hEgxyDlCngwrfFe
|
|
+ vpbroadcastq zmm25,r10
|
|
+ cmp rdx,0x100
|
|
+ jge NEAR $L$_start_by16_hEgxyDlCngwrfFe
|
|
+ cmp rdx,0x80
|
|
+ jge NEAR $L$_start_by8_hEgxyDlCngwrfFe
|
|
+
|
|
+$L$_do_n_blocks_hEgxyDlCngwrfFe:
|
|
+ cmp rdx,0x0
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ cmp rdx,0x70
|
|
+ jge NEAR $L$_remaining_num_blocks_is_7_hEgxyDlCngwrfFe
|
|
+ cmp rdx,0x60
|
|
+ jge NEAR $L$_remaining_num_blocks_is_6_hEgxyDlCngwrfFe
|
|
+ cmp rdx,0x50
|
|
+ jge NEAR $L$_remaining_num_blocks_is_5_hEgxyDlCngwrfFe
|
|
+ cmp rdx,0x40
|
|
+ jge NEAR $L$_remaining_num_blocks_is_4_hEgxyDlCngwrfFe
|
|
+ cmp rdx,0x30
|
|
+ jge NEAR $L$_remaining_num_blocks_is_3_hEgxyDlCngwrfFe
|
|
+ cmp rdx,0x20
|
|
+ jge NEAR $L$_remaining_num_blocks_is_2_hEgxyDlCngwrfFe
|
|
+ cmp rdx,0x10
|
|
+ jge NEAR $L$_remaining_num_blocks_is_1_hEgxyDlCngwrfFe
|
|
+ vmovdqa xmm8,xmm0
|
|
+ vmovdqa xmm0,xmm9
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+
|
|
+$L$_remaining_num_blocks_is_7_hEgxyDlCngwrfFe:
|
|
+ mov r8,0x0000ffffffffffff
|
|
+ kmovq k1,r8
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2{k1},[64+rdi]
|
|
+ add rdi,0x70
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
|
|
+ add rsi,0x70
|
|
+ vextracti32x4 xmm8,zmm2,0x2
|
|
+ vextracti32x4 xmm0,zmm10,0x3
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+
|
|
+$L$_remaining_num_blocks_is_6_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
|
|
+ add rdi,0x60
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 YMMWORD[64+rsi],ymm2
|
|
+ add rsi,0x60
|
|
+ vextracti32x4 xmm8,zmm2,0x1
|
|
+ vextracti32x4 xmm0,zmm10,0x2
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+
|
|
+$L$_remaining_num_blocks_is_5_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu xmm2,XMMWORD[64+rdi]
|
|
+ add rdi,0x50
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu XMMWORD[64+rsi],xmm2
|
|
+ add rsi,0x50
|
|
+ vmovdqa xmm8,xmm2
|
|
+ vextracti32x4 xmm0,zmm10,0x1
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+
|
|
+$L$_remaining_num_blocks_is_4_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ add rdi,0x40
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ add rsi,0x40
|
|
+ vextracti32x4 xmm8,zmm1,0x3
|
|
+ vmovdqa64 xmm0,xmm10
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+$L$_remaining_num_blocks_is_3_hEgxyDlCngwrfFe:
|
|
+ mov r8,-1
|
|
+ shr r8,0x10
|
|
+ kmovq k1,r8
|
|
+ vmovdqu8 zmm1{k1},[rdi]
|
|
+ add rdi,0x30
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vmovdqu8 ZMMWORD[rsi]{k1},zmm1
|
|
+ add rsi,0x30
|
|
+ vextracti32x4 xmm8,zmm1,0x2
|
|
+ vextracti32x4 xmm0,zmm9,0x3
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+$L$_remaining_num_blocks_is_2_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 ymm1,YMMWORD[rdi]
|
|
+ add rdi,0x20
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
|
|
+ vpternlogq ymm1,ymm9,ymm0,0x96
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
|
|
+ vaesenclast ymm1,ymm1,ymm0
|
|
+ vpxorq ymm1,ymm1,ymm9
|
|
+ vmovdqu YMMWORD[rsi],ymm1
|
|
+ add rsi,0x20
|
|
+ vextracti32x4 xmm8,zmm1,0x1
|
|
+ vextracti32x4 xmm0,zmm9,0x2
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+$L$_remaining_num_blocks_is_1_hEgxyDlCngwrfFe:
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ add rdi,0x10
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm1,xmm1,XMMWORD[rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[16+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[32+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[48+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[64+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[80+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[96+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[112+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[128+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[144+rcx]
|
|
+ vaesenclast xmm1,xmm1,XMMWORD[160+rcx]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ add rsi,0x10
|
|
+ vmovdqa xmm8,xmm1
|
|
+ vextracti32x4 xmm0,zmm9,0x1
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+
|
|
+
|
|
+$L$_start_by16_hEgxyDlCngwrfFe:
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
|
|
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
|
|
+ mov r8,0xaa
|
|
+ kmovq k2,r8
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+ vpsrldq zmm13,zmm9,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm11,zmm9,0x1
|
|
+ vpxord zmm11,zmm11,zmm14
|
|
+ vpsrldq zmm15,zmm10,0xf
|
|
+ vpclmulqdq zmm16,zmm15,zmm25,0x0
|
|
+ vpslldq zmm12,zmm10,0x1
|
|
+ vpxord zmm12,zmm12,zmm16
|
|
+
|
|
+$L$_main_loop_run_16_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
|
|
+ vmovdqu8 zmm3,ZMMWORD[128+rdi]
|
|
+ vmovdqu8 zmm4,ZMMWORD[192+rdi]
|
|
+ add rdi,0x100
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vpxorq zmm3,zmm3,zmm11
|
|
+ vpxorq zmm4,zmm4,zmm12
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vpxorq zmm3,zmm3,zmm0
|
|
+ vpxorq zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm11,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm15,zmm11,0x1
|
|
+ vpxord zmm15,zmm15,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm12,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm16,zmm12,0x1
|
|
+ vpxord zmm16,zmm16,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm15,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm17,zmm15,0x1
|
|
+ vpxord zmm17,zmm17,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm16,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm18,zmm16,0x1
|
|
+ vpxord zmm18,zmm18,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vaesenclast zmm3,zmm3,zmm0
|
|
+ vaesenclast zmm4,zmm4,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vpxorq zmm3,zmm3,zmm11
|
|
+ vpxorq zmm4,zmm4,zmm12
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqa32 zmm11,zmm17
|
|
+ vmovdqa32 zmm12,zmm18
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
|
|
+ vmovdqu8 ZMMWORD[128+rsi],zmm3
|
|
+ vmovdqu8 ZMMWORD[192+rsi],zmm4
|
|
+ add rsi,0x100
|
|
+ sub rdx,0x100
|
|
+ cmp rdx,0x100
|
|
+ jae NEAR $L$_main_loop_run_16_hEgxyDlCngwrfFe
|
|
+ cmp rdx,0x80
|
|
+ jae NEAR $L$_main_loop_run_8_hEgxyDlCngwrfFe
|
|
+ vextracti32x4 xmm0,zmm4,0x3
|
|
+ jmp NEAR $L$_do_n_blocks_hEgxyDlCngwrfFe
|
|
+
|
|
+$L$_start_by8_hEgxyDlCngwrfFe:
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
|
|
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
|
|
+ mov r8,0xaa
|
|
+ kmovq k2,r8
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+
|
|
+$L$_main_loop_run_8_hEgxyDlCngwrfFe:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
|
|
+ add rdi,0x80
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vpsrldq zmm13,zmm9,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm15,zmm9,0x1
|
|
+ vpxord zmm15,zmm15,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vpsrldq zmm13,zmm10,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm16,zmm10,0x1
|
|
+ vpxord zmm16,zmm16,zmm14
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
|
|
+ add rsi,0x80
|
|
+ sub rdx,0x80
|
|
+ cmp rdx,0x80
|
|
+ jae NEAR $L$_main_loop_run_8_hEgxyDlCngwrfFe
|
|
+ vextracti32x4 xmm0,zmm2,0x3
|
|
+ jmp NEAR $L$_do_n_blocks_hEgxyDlCngwrfFe
|
|
+
|
|
+$L$_steal_cipher_hEgxyDlCngwrfFe:
|
|
+ vmovdqa xmm2,xmm8
|
|
+ lea rax,[vpshufb_shf_table]
|
|
+ vmovdqu xmm10,XMMWORD[rdx*1+rax]
|
|
+ vpshufb xmm8,xmm8,xmm10
|
|
+ vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi]
|
|
+ vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8
|
|
+ lea rax,[vpshufb_shf_table]
|
|
+ add rax,16
|
|
+ sub rax,rdx
|
|
+ vmovdqu xmm10,XMMWORD[rax]
|
|
+ vpxor xmm10,xmm10,XMMWORD[mask1]
|
|
+ vpshufb xmm3,xmm3,xmm10
|
|
+ vpblendvb xmm3,xmm3,xmm2,xmm10
|
|
+ vpxor xmm8,xmm3,xmm0
|
|
+ vpxor xmm8,xmm8,XMMWORD[rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[16+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[32+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[48+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[64+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[80+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[96+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[112+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[128+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[144+rcx]
|
|
+ vaesenclast xmm8,xmm8,XMMWORD[160+rcx]
|
|
+ vpxor xmm8,xmm8,xmm0
|
|
+ vmovdqu XMMWORD[(-16)+rsi],xmm8
|
|
+$L$_ret_hEgxyDlCngwrfFe:
|
|
+ mov rbx,QWORD[288+rsp]
|
|
+ xor r8,r8
|
|
+ mov QWORD[288+rsp],r8
|
|
+
|
|
+ vpxorq zmm0,zmm0,zmm0
|
|
+ mov rdi,QWORD[((288 + 8))+rsp]
|
|
+ mov QWORD[((288 + 8))+rsp],r8
|
|
+ mov rsi,QWORD[((288 + 16))+rsp]
|
|
+ mov QWORD[((288 + 16))+rsp],r8
|
|
+
|
|
+ vmovdqa xmm6,XMMWORD[((128 + 0))+rsp]
|
|
+ vmovdqa xmm7,XMMWORD[((128 + 16))+rsp]
|
|
+ vmovdqa xmm8,XMMWORD[((128 + 32))+rsp]
|
|
+ vmovdqa xmm9,XMMWORD[((128 + 48))+rsp]
|
|
+
|
|
+
|
|
+ vmovdqa64 ZMMWORD[128+rsp],zmm0
|
|
+
|
|
+ vmovdqa xmm10,XMMWORD[((128 + 64))+rsp]
|
|
+ vmovdqa xmm11,XMMWORD[((128 + 80))+rsp]
|
|
+ vmovdqa xmm12,XMMWORD[((128 + 96))+rsp]
|
|
+ vmovdqa xmm13,XMMWORD[((128 + 112))+rsp]
|
|
+
|
|
+
|
|
+ vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0
|
|
+
|
|
+ vmovdqa xmm14,XMMWORD[((128 + 128))+rsp]
|
|
+ vmovdqa xmm15,XMMWORD[((128 + 144))+rsp]
|
|
+
|
|
+
|
|
+
|
|
+ vmovdqa YMMWORD[(128 + 128)+rsp],ymm0
|
|
+ mov rsp,rbp
|
|
+ pop rbp
|
|
+ vzeroupper
|
|
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
+ mov rsi,QWORD[16+rsp]
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+$L$_less_than_128_bytes_hEgxyDlCngwrfFe:
|
|
+ vpbroadcastq zmm25,r10
|
|
+ cmp rdx,0x10
|
|
+ jb NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
|
|
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
|
|
+ mov r8d,0xaa
|
|
+ kmovq k2,r8
|
|
+ mov r8,rdx
|
|
+ and r8,0x70
|
|
+ cmp r8,0x60
|
|
+ je NEAR $L$_num_blocks_is_6_hEgxyDlCngwrfFe
|
|
+ cmp r8,0x50
|
|
+ je NEAR $L$_num_blocks_is_5_hEgxyDlCngwrfFe
|
|
+ cmp r8,0x40
|
|
+ je NEAR $L$_num_blocks_is_4_hEgxyDlCngwrfFe
|
|
+ cmp r8,0x30
|
|
+ je NEAR $L$_num_blocks_is_3_hEgxyDlCngwrfFe
|
|
+ cmp r8,0x20
|
|
+ je NEAR $L$_num_blocks_is_2_hEgxyDlCngwrfFe
|
|
+ cmp r8,0x10
|
|
+ je NEAR $L$_num_blocks_is_1_hEgxyDlCngwrfFe
|
|
+
|
|
+$L$_num_blocks_is_7_hEgxyDlCngwrfFe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+ mov r8,0x0000ffffffffffff
|
|
+ kmovq k1,r8
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2{k1},[64+rdi]
|
|
+
|
|
+ add rdi,0x70
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
|
|
+ add rsi,0x70
|
|
+ vextracti32x4 xmm8,zmm2,0x2
|
|
+ vextracti32x4 xmm0,zmm10,0x3
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+$L$_num_blocks_is_6_hEgxyDlCngwrfFe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
|
|
+ add rdi,96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 YMMWORD[64+rsi],ymm2
|
|
+ add rsi,96
|
|
+
|
|
+ vextracti32x4 xmm8,ymm2,0x1
|
|
+ vextracti32x4 xmm0,zmm10,0x2
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+$L$_num_blocks_is_5_hEgxyDlCngwrfFe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 xmm2,XMMWORD[64+rdi]
|
|
+ add rdi,80
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 XMMWORD[64+rsi],xmm2
|
|
+ add rsi,80
|
|
+
|
|
+ vmovdqa xmm8,xmm2
|
|
+ vextracti32x4 xmm0,zmm10,0x1
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+$L$_num_blocks_is_4_hEgxyDlCngwrfFe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ add rdi,64
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ add rsi,64
|
|
+ vextracti32x4 xmm8,zmm1,0x3
|
|
+ vmovdqa xmm0,xmm10
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+$L$_num_blocks_is_3_hEgxyDlCngwrfFe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ mov r8,0x0000ffffffffffff
|
|
+ kmovq k1,r8
|
|
+ vmovdqu8 zmm1{k1},[rdi]
|
|
+ add rdi,48
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vmovdqu8 ZMMWORD[rsi]{k1},zmm1
|
|
+ add rsi,48
|
|
+ vextracti32x4 xmm8,zmm1,2
|
|
+ vextracti32x4 xmm0,zmm9,3
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+$L$_num_blocks_is_2_hEgxyDlCngwrfFe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+
|
|
+ vmovdqu8 ymm1,YMMWORD[rdi]
|
|
+ add rdi,32
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
|
|
+ vpternlogq ymm1,ymm9,ymm0,0x96
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
|
|
+ vaesenclast ymm1,ymm1,ymm0
|
|
+ vpxorq ymm1,ymm1,ymm9
|
|
+ vmovdqu8 YMMWORD[rsi],ymm1
|
|
+ add rsi,32
|
|
+
|
|
+ vextracti32x4 xmm8,ymm1,1
|
|
+ vextracti32x4 xmm0,zmm9,2
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+$L$_num_blocks_is_1_hEgxyDlCngwrfFe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+
|
|
+ vmovdqu8 xmm1,XMMWORD[rdi]
|
|
+ add rdi,16
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
|
|
+ vpternlogq ymm1,ymm9,ymm0,0x96
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
|
|
+ vaesenclast ymm1,ymm1,ymm0
|
|
+ vpxorq ymm1,ymm1,ymm9
|
|
+ vmovdqu8 XMMWORD[rsi],xmm1
|
|
+ add rsi,16
|
|
+
|
|
+ vmovdqa xmm8,xmm1
|
|
+ vextracti32x4 xmm0,zmm9,1
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_hEgxyDlCngwrfFe
|
|
+ jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe
|
|
+
|
|
+global aesni_xts_128_decrypt_avx512
|
|
+
|
|
+
|
|
+ALIGN 32
|
|
+aesni_xts_128_decrypt_avx512:
|
|
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
+ mov QWORD[16+rsp],rsi
|
|
+ mov rax,rsp
|
|
+$L$SEH_begin_aesni_xts_128_decrypt_avx512:
|
|
+ mov rdi,rcx
|
|
+ mov rsi,rdx
|
|
+ mov rdx,r8
|
|
+ mov rcx,r9
|
|
+ mov r8,QWORD[40+rsp]
|
|
+ mov r9,QWORD[48+rsp]
|
|
+
|
|
+
|
|
+
|
|
+DB 243,15,30,250
|
|
+ push rbp
|
|
+ mov rbp,rsp
|
|
+ sub rsp,312
|
|
+ and rsp,0xffffffffffffffc0
|
|
+ mov QWORD[288+rsp],rbx
|
|
+ mov QWORD[((288 + 8))+rsp],rdi
|
|
+ mov QWORD[((288 + 16))+rsp],rsi
|
|
+ vmovdqa XMMWORD[(128 + 0)+rsp],xmm6
|
|
+ vmovdqa XMMWORD[(128 + 16)+rsp],xmm7
|
|
+ vmovdqa XMMWORD[(128 + 32)+rsp],xmm8
|
|
+ vmovdqa XMMWORD[(128 + 48)+rsp],xmm9
|
|
+ vmovdqa XMMWORD[(128 + 64)+rsp],xmm10
|
|
+ vmovdqa XMMWORD[(128 + 80)+rsp],xmm11
|
|
+ vmovdqa XMMWORD[(128 + 96)+rsp],xmm12
|
|
+ vmovdqa XMMWORD[(128 + 112)+rsp],xmm13
|
|
+ vmovdqa XMMWORD[(128 + 128)+rsp],xmm14
|
|
+ vmovdqa XMMWORD[(128 + 144)+rsp],xmm15
|
|
+ mov r10,0x87
|
|
+ vmovdqu xmm1,XMMWORD[r9]
|
|
+ vpxor xmm1,xmm1,XMMWORD[r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[16+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[32+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[48+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[64+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[80+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[96+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[112+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[128+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[144+r8]
|
|
+ vaesenclast xmm1,xmm1,XMMWORD[160+r8]
|
|
+ vmovdqa XMMWORD[rsp],xmm1
|
|
+ mov QWORD[((8 + 40))+rbp],rdi
|
|
+ mov QWORD[((8 + 48))+rbp],rsi
|
|
+
|
|
+ cmp rdx,0x80
|
|
+ jb NEAR $L$_less_than_128_bytes_amivrujEyduiFoi
|
|
+ vpbroadcastq zmm25,r10
|
|
+ cmp rdx,0x100
|
|
+ jge NEAR $L$_start_by16_amivrujEyduiFoi
|
|
+ jmp NEAR $L$_start_by8_amivrujEyduiFoi
|
|
+
|
|
+$L$_do_n_blocks_amivrujEyduiFoi:
|
|
+ cmp rdx,0x0
|
|
+ je NEAR $L$_ret_amivrujEyduiFoi
|
|
+ cmp rdx,0x70
|
|
+ jge NEAR $L$_remaining_num_blocks_is_7_amivrujEyduiFoi
|
|
+ cmp rdx,0x60
|
|
+ jge NEAR $L$_remaining_num_blocks_is_6_amivrujEyduiFoi
|
|
+ cmp rdx,0x50
|
|
+ jge NEAR $L$_remaining_num_blocks_is_5_amivrujEyduiFoi
|
|
+ cmp rdx,0x40
|
|
+ jge NEAR $L$_remaining_num_blocks_is_4_amivrujEyduiFoi
|
|
+ cmp rdx,0x30
|
|
+ jge NEAR $L$_remaining_num_blocks_is_3_amivrujEyduiFoi
|
|
+ cmp rdx,0x20
|
|
+ jge NEAR $L$_remaining_num_blocks_is_2_amivrujEyduiFoi
|
|
+ cmp rdx,0x10
|
|
+ jge NEAR $L$_remaining_num_blocks_is_1_amivrujEyduiFoi
|
|
+
|
|
+
|
|
+ vmovdqu xmm1,xmm5
|
|
+
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu XMMWORD[(-16)+rsi],xmm1
|
|
+ vmovdqa xmm8,xmm1
|
|
+
|
|
+
|
|
+ mov r8,0x1
|
|
+ kmovq k1,r8
|
|
+ vpsllq xmm13,xmm9,0x3f
|
|
+ vpsraq xmm14,xmm13,0x3f
|
|
+ vpandq xmm5,xmm14,xmm25
|
|
+ vpxorq xmm9{k1},xmm9,xmm5
|
|
+ vpsrldq xmm10,xmm9,0x8
|
|
+DB 98,211,181,8,115,194,1
|
|
+ vpslldq xmm13,xmm13,0x8
|
|
+ vpxorq xmm0,xmm0,xmm13
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_remaining_num_blocks_is_7_amivrujEyduiFoi:
|
|
+ mov r8,0xffffffffffffffff
|
|
+ shr r8,0x10
|
|
+ kmovq k1,r8
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2{k1},[64+rdi]
|
|
+ add rdi,0x70
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_7_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 xmm12,zmm10,0x2
|
|
+ vextracti32x4 xmm13,zmm10,0x3
|
|
+ vinserti32x4 zmm10,zmm10,xmm13,0x2
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
|
|
+ add rsi,0x70
|
|
+ vextracti32x4 xmm8,zmm2,0x2
|
|
+ vmovdqa xmm0,xmm12
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_7_remain_amivrujEyduiFoi:
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
|
|
+ jmp NEAR $L$_ret_amivrujEyduiFoi
|
|
+
|
|
+$L$_remaining_num_blocks_is_6_amivrujEyduiFoi:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
|
|
+ add rdi,0x60
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_6_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 xmm12,zmm10,0x1
|
|
+ vextracti32x4 xmm13,zmm10,0x2
|
|
+ vinserti32x4 zmm10,zmm10,xmm13,0x1
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 YMMWORD[64+rsi],ymm2
|
|
+ add rsi,0x60
|
|
+ vextracti32x4 xmm8,zmm2,0x1
|
|
+ vmovdqa xmm0,xmm12
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_6_remain_amivrujEyduiFoi:
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 YMMWORD[64+rsi],ymm2
|
|
+ jmp NEAR $L$_ret_amivrujEyduiFoi
|
|
+
|
|
+$L$_remaining_num_blocks_is_5_amivrujEyduiFoi:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu xmm2,XMMWORD[64+rdi]
|
|
+ add rdi,0x50
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_5_remain_amivrujEyduiFoi
|
|
+ vmovdqa xmm12,xmm10
|
|
+ vextracti32x4 xmm10,zmm10,0x1
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu XMMWORD[64+rsi],xmm2
|
|
+ add rsi,0x50
|
|
+ vmovdqa xmm8,xmm2
|
|
+ vmovdqa xmm0,xmm12
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_5_remain_amivrujEyduiFoi:
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 XMMWORD[64+rsi],xmm2
|
|
+ jmp NEAR $L$_ret_amivrujEyduiFoi
|
|
+
|
|
+$L$_remaining_num_blocks_is_4_amivrujEyduiFoi:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ add rdi,0x40
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_4_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 xmm12,zmm9,0x3
|
|
+ vinserti32x4 zmm9,zmm9,xmm10,0x3
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ add rsi,0x40
|
|
+ vextracti32x4 xmm8,zmm1,0x3
|
|
+ vmovdqa xmm0,xmm12
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_4_remain_amivrujEyduiFoi:
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ jmp NEAR $L$_ret_amivrujEyduiFoi
|
|
+
|
|
+$L$_remaining_num_blocks_is_3_amivrujEyduiFoi:
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ add rdi,0x30
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_3_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 xmm13,zmm9,0x2
|
|
+ vextracti32x4 xmm10,zmm9,0x1
|
|
+ vextracti32x4 xmm11,zmm9,0x3
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ add rsi,0x30
|
|
+ vmovdqa xmm8,xmm3
|
|
+ vmovdqa xmm0,xmm13
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_3_remain_amivrujEyduiFoi:
|
|
+ vextracti32x4 xmm10,zmm9,0x1
|
|
+ vextracti32x4 xmm11,zmm9,0x2
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ jmp NEAR $L$_ret_amivrujEyduiFoi
|
|
+
|
|
+$L$_remaining_num_blocks_is_2_amivrujEyduiFoi:
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ add rdi,0x20
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_2_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 xmm10,zmm9,0x2
|
|
+ vextracti32x4 xmm12,zmm9,0x1
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ add rsi,0x20
|
|
+ vmovdqa xmm8,xmm2
|
|
+ vmovdqa xmm0,xmm12
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_2_remain_amivrujEyduiFoi:
|
|
+ vextracti32x4 xmm10,zmm9,0x1
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ jmp NEAR $L$_ret_amivrujEyduiFoi
|
|
+
|
|
+$L$_remaining_num_blocks_is_1_amivrujEyduiFoi:
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ add rdi,0x10
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_1_remain_amivrujEyduiFoi
|
|
+ vextracti32x4 xmm11,zmm9,0x1
|
|
+ vpxor xmm1,xmm1,xmm11
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vpxor xmm1,xmm1,xmm11
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ add rsi,0x10
|
|
+ vmovdqa xmm8,xmm1
|
|
+ vmovdqa xmm0,xmm9
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_1_remain_amivrujEyduiFoi:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ jmp NEAR $L$_ret_amivrujEyduiFoi
|
|
+
|
|
+$L$_start_by16_amivrujEyduiFoi:
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
|
|
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
|
|
+ mov r8,0xaa
|
|
+ kmovq k2,r8
|
|
+
|
|
+
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+
|
|
+
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+
|
|
+
|
|
+ vpsrldq zmm13,zmm9,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm11,zmm9,0x1
|
|
+ vpxord zmm11,zmm11,zmm14
|
|
+
|
|
+ vpsrldq zmm15,zmm10,0xf
|
|
+ vpclmulqdq zmm16,zmm15,zmm25,0x0
|
|
+ vpslldq zmm12,zmm10,0x1
|
|
+ vpxord zmm12,zmm12,zmm16
|
|
+
|
|
+$L$_main_loop_run_16_amivrujEyduiFoi:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
|
|
+ vmovdqu8 zmm3,ZMMWORD[128+rdi]
|
|
+ vmovdqu8 zmm4,ZMMWORD[192+rdi]
|
|
+ vmovdqu8 xmm5,XMMWORD[240+rdi]
|
|
+ add rdi,0x100
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vpxorq zmm3,zmm3,zmm11
|
|
+ vpxorq zmm4,zmm4,zmm12
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vpxorq zmm3,zmm3,zmm0
|
|
+ vpxorq zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm11,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm15,zmm11,0x1
|
|
+ vpxord zmm15,zmm15,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm12,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm16,zmm12,0x1
|
|
+ vpxord zmm16,zmm16,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm15,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm17,zmm15,0x1
|
|
+ vpxord zmm17,zmm17,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm16,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm18,zmm16,0x1
|
|
+ vpxord zmm18,zmm18,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+ vaesdeclast zmm3,zmm3,zmm0
|
|
+ vaesdeclast zmm4,zmm4,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vpxorq zmm3,zmm3,zmm11
|
|
+ vpxorq zmm4,zmm4,zmm12
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqa32 zmm11,zmm17
|
|
+ vmovdqa32 zmm12,zmm18
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
|
|
+ vmovdqu8 ZMMWORD[128+rsi],zmm3
|
|
+ vmovdqu8 ZMMWORD[192+rsi],zmm4
|
|
+ add rsi,0x100
|
|
+ sub rdx,0x100
|
|
+ cmp rdx,0x100
|
|
+ jge NEAR $L$_main_loop_run_16_amivrujEyduiFoi
|
|
+
|
|
+ cmp rdx,0x80
|
|
+ jge NEAR $L$_main_loop_run_8_amivrujEyduiFoi
|
|
+ jmp NEAR $L$_do_n_blocks_amivrujEyduiFoi
|
|
+
|
|
+$L$_start_by8_amivrujEyduiFoi:
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
|
|
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
|
|
+ mov r8,0xaa
|
|
+ kmovq k2,r8
|
|
+
|
|
+
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+
|
|
+
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+
|
|
+$L$_main_loop_run_8_amivrujEyduiFoi:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
|
|
+ vmovdqu8 xmm5,XMMWORD[112+rdi]
|
|
+ add rdi,0x80
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vpsrldq zmm13,zmm9,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm15,zmm9,0x1
|
|
+ vpxord zmm15,zmm15,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vpsrldq zmm13,zmm10,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm16,zmm10,0x1
|
|
+ vpxord zmm16,zmm16,zmm14
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
|
|
+ add rsi,0x80
|
|
+ sub rdx,0x80
|
|
+ cmp rdx,0x80
|
|
+ jge NEAR $L$_main_loop_run_8_amivrujEyduiFoi
|
|
+ jmp NEAR $L$_do_n_blocks_amivrujEyduiFoi
|
|
+
|
|
+$L$_steal_cipher_amivrujEyduiFoi:
|
|
+
|
|
+ vmovdqa xmm2,xmm8
|
|
+
|
|
+
|
|
+ lea rax,[vpshufb_shf_table]
|
|
+ vmovdqu xmm10,XMMWORD[rdx*1+rax]
|
|
+ vpshufb xmm8,xmm8,xmm10
|
|
+
|
|
+
|
|
+ vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi]
|
|
+ vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8
|
|
+
|
|
+
|
|
+ lea rax,[vpshufb_shf_table]
|
|
+ add rax,16
|
|
+ sub rax,rdx
|
|
+ vmovdqu xmm10,XMMWORD[rax]
|
|
+ vpxor xmm10,xmm10,XMMWORD[mask1]
|
|
+ vpshufb xmm3,xmm3,xmm10
|
|
+
|
|
+ vpblendvb xmm3,xmm3,xmm2,xmm10
|
|
+
|
|
+
|
|
+ vpxor xmm8,xmm3,xmm0
|
|
+
|
|
+
|
|
+ vpxor xmm8,xmm8,XMMWORD[rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[16+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[32+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[48+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[64+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[80+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[96+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[112+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[128+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[144+rcx]
|
|
+ vaesdeclast xmm8,xmm8,XMMWORD[160+rcx]
|
|
+
|
|
+ vpxor xmm8,xmm8,xmm0
|
|
+
|
|
+$L$_done_amivrujEyduiFoi:
|
|
+
|
|
+ vmovdqu XMMWORD[(-16)+rsi],xmm8
|
|
+$L$_ret_amivrujEyduiFoi:
|
|
+ mov rbx,QWORD[288+rsp]
|
|
+ xor r8,r8
|
|
+ mov QWORD[288+rsp],r8
|
|
+
|
|
+ vpxorq zmm0,zmm0,zmm0
|
|
+ mov rdi,QWORD[((288 + 8))+rsp]
|
|
+ mov QWORD[((288 + 8))+rsp],r8
|
|
+ mov rsi,QWORD[((288 + 16))+rsp]
|
|
+ mov QWORD[((288 + 16))+rsp],r8
|
|
+
|
|
+ vmovdqa xmm6,XMMWORD[((128 + 0))+rsp]
|
|
+ vmovdqa xmm7,XMMWORD[((128 + 16))+rsp]
|
|
+ vmovdqa xmm8,XMMWORD[((128 + 32))+rsp]
|
|
+ vmovdqa xmm9,XMMWORD[((128 + 48))+rsp]
|
|
+
|
|
+
|
|
+ vmovdqa64 ZMMWORD[128+rsp],zmm0
|
|
+
|
|
+ vmovdqa xmm10,XMMWORD[((128 + 64))+rsp]
|
|
+ vmovdqa xmm11,XMMWORD[((128 + 80))+rsp]
|
|
+ vmovdqa xmm12,XMMWORD[((128 + 96))+rsp]
|
|
+ vmovdqa xmm13,XMMWORD[((128 + 112))+rsp]
|
|
+
|
|
+
|
|
+ vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0
|
|
+
|
|
+ vmovdqa xmm14,XMMWORD[((128 + 128))+rsp]
|
|
+ vmovdqa xmm15,XMMWORD[((128 + 144))+rsp]
|
|
+
|
|
+
|
|
+
|
|
+ vmovdqa YMMWORD[(128 + 128)+rsp],ymm0
|
|
+ mov rsp,rbp
|
|
+ pop rbp
|
|
+ vzeroupper
|
|
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
+ mov rsi,QWORD[16+rsp]
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+$L$_less_than_128_bytes_amivrujEyduiFoi:
|
|
+ cmp rdx,0x10
|
|
+ jb NEAR $L$_ret_amivrujEyduiFoi
|
|
+
|
|
+ mov r8,rdx
|
|
+ and r8,0x70
|
|
+ cmp r8,0x60
|
|
+ je NEAR $L$_num_blocks_is_6_amivrujEyduiFoi
|
|
+ cmp r8,0x50
|
|
+ je NEAR $L$_num_blocks_is_5_amivrujEyduiFoi
|
|
+ cmp r8,0x40
|
|
+ je NEAR $L$_num_blocks_is_4_amivrujEyduiFoi
|
|
+ cmp r8,0x30
|
|
+ je NEAR $L$_num_blocks_is_3_amivrujEyduiFoi
|
|
+ cmp r8,0x20
|
|
+ je NEAR $L$_num_blocks_is_2_amivrujEyduiFoi
|
|
+ cmp r8,0x10
|
|
+ je NEAR $L$_num_blocks_is_1_amivrujEyduiFoi
|
|
+
|
|
+$L$_num_blocks_is_7_amivrujEyduiFoi:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[32+rsp],rax
|
|
+ mov QWORD[((32 + 8))+rsp],rbx
|
|
+ vmovdqa xmm11,XMMWORD[32+rsp]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[48+rsp],rax
|
|
+ mov QWORD[((48 + 8))+rsp],rbx
|
|
+ vmovdqa xmm12,XMMWORD[48+rsp]
|
|
+ vmovdqu xmm4,XMMWORD[48+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[64+rsp],rax
|
|
+ mov QWORD[((64 + 8))+rsp],rbx
|
|
+ vmovdqa xmm13,XMMWORD[64+rsp]
|
|
+ vmovdqu xmm5,XMMWORD[64+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[80+rsp],rax
|
|
+ mov QWORD[((80 + 8))+rsp],rbx
|
|
+ vmovdqa xmm14,XMMWORD[80+rsp]
|
|
+ vmovdqu xmm6,XMMWORD[80+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[96+rsp],rax
|
|
+ mov QWORD[((96 + 8))+rsp],rbx
|
|
+ vmovdqa xmm15,XMMWORD[96+rsp]
|
|
+ vmovdqu xmm7,XMMWORD[96+rdi]
|
|
+ add rdi,0x70
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_7_amivrujEyduiFoi
|
|
+
|
|
+$L$_steal_cipher_7_amivrujEyduiFoi:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm16,xmm15
|
|
+ vmovdqa xmm15,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vpxor xmm7,xmm7,xmm15
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vpxor xmm6,xmm6,xmm0
|
|
+ vpxor xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vaesdeclast xmm6,xmm6,xmm0
|
|
+ vaesdeclast xmm7,xmm7,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vpxor xmm7,xmm7,xmm15
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ vmovdqu XMMWORD[64+rsi],xmm5
|
|
+ vmovdqu XMMWORD[80+rsi],xmm6
|
|
+ add rsi,0x70
|
|
+ vmovdqa64 xmm0,xmm16
|
|
+ vmovdqa xmm8,xmm7
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_7_amivrujEyduiFoi:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vpxor xmm7,xmm7,xmm15
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vpxor xmm6,xmm6,xmm0
|
|
+ vpxor xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vaesdeclast xmm6,xmm6,xmm0
|
|
+ vaesdeclast xmm7,xmm7,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vpxor xmm7,xmm7,xmm15
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ vmovdqu XMMWORD[64+rsi],xmm5
|
|
+ vmovdqu XMMWORD[80+rsi],xmm6
|
|
+ add rsi,0x70
|
|
+ vmovdqa xmm8,xmm7
|
|
+ jmp NEAR $L$_done_amivrujEyduiFoi
|
|
+
|
|
+$L$_num_blocks_is_6_amivrujEyduiFoi:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[32+rsp],rax
|
|
+ mov QWORD[((32 + 8))+rsp],rbx
|
|
+ vmovdqa xmm11,XMMWORD[32+rsp]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[48+rsp],rax
|
|
+ mov QWORD[((48 + 8))+rsp],rbx
|
|
+ vmovdqa xmm12,XMMWORD[48+rsp]
|
|
+ vmovdqu xmm4,XMMWORD[48+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[64+rsp],rax
|
|
+ mov QWORD[((64 + 8))+rsp],rbx
|
|
+ vmovdqa xmm13,XMMWORD[64+rsp]
|
|
+ vmovdqu xmm5,XMMWORD[64+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[80+rsp],rax
|
|
+ mov QWORD[((80 + 8))+rsp],rbx
|
|
+ vmovdqa xmm14,XMMWORD[80+rsp]
|
|
+ vmovdqu xmm6,XMMWORD[80+rdi]
|
|
+ add rdi,0x60
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_6_amivrujEyduiFoi
|
|
+
|
|
+$L$_steal_cipher_6_amivrujEyduiFoi:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm15,xmm14
|
|
+ vmovdqa xmm14,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vpxor xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vaesdeclast xmm6,xmm6,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ vmovdqu XMMWORD[64+rsi],xmm5
|
|
+ add rsi,0x60
|
|
+ vmovdqa xmm0,xmm15
|
|
+ vmovdqa xmm8,xmm6
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_6_amivrujEyduiFoi:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vpxor xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vaesdeclast xmm6,xmm6,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ vmovdqu XMMWORD[64+rsi],xmm5
|
|
+ add rsi,0x60
|
|
+ vmovdqa xmm8,xmm6
|
|
+ jmp NEAR $L$_done_amivrujEyduiFoi
|
|
+
|
|
+$L$_num_blocks_is_5_amivrujEyduiFoi:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[32+rsp],rax
|
|
+ mov QWORD[((32 + 8))+rsp],rbx
|
|
+ vmovdqa xmm11,XMMWORD[32+rsp]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[48+rsp],rax
|
|
+ mov QWORD[((48 + 8))+rsp],rbx
|
|
+ vmovdqa xmm12,XMMWORD[48+rsp]
|
|
+ vmovdqu xmm4,XMMWORD[48+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[64+rsp],rax
|
|
+ mov QWORD[((64 + 8))+rsp],rbx
|
|
+ vmovdqa xmm13,XMMWORD[64+rsp]
|
|
+ vmovdqu xmm5,XMMWORD[64+rdi]
|
|
+ add rdi,0x50
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_5_amivrujEyduiFoi
|
|
+
|
|
+$L$_steal_cipher_5_amivrujEyduiFoi:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm14,xmm13
|
|
+ vmovdqa xmm13,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ add rsi,0x50
|
|
+ vmovdqa xmm0,xmm14
|
|
+ vmovdqa xmm8,xmm5
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_5_amivrujEyduiFoi:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ add rsi,0x50
|
|
+ vmovdqa xmm8,xmm5
|
|
+ jmp NEAR $L$_done_amivrujEyduiFoi
|
|
+
|
|
+$L$_num_blocks_is_4_amivrujEyduiFoi:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[32+rsp],rax
|
|
+ mov QWORD[((32 + 8))+rsp],rbx
|
|
+ vmovdqa xmm11,XMMWORD[32+rsp]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[48+rsp],rax
|
|
+ mov QWORD[((48 + 8))+rsp],rbx
|
|
+ vmovdqa xmm12,XMMWORD[48+rsp]
|
|
+ vmovdqu xmm4,XMMWORD[48+rdi]
|
|
+ add rdi,0x40
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_4_amivrujEyduiFoi
|
|
+
|
|
+$L$_steal_cipher_4_amivrujEyduiFoi:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm13,xmm12
|
|
+ vmovdqa xmm12,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ add rsi,0x40
|
|
+ vmovdqa xmm0,xmm13
|
|
+ vmovdqa xmm8,xmm4
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_4_amivrujEyduiFoi:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ add rsi,0x40
|
|
+ vmovdqa xmm8,xmm4
|
|
+ jmp NEAR $L$_done_amivrujEyduiFoi
|
|
+
|
|
+$L$_num_blocks_is_3_amivrujEyduiFoi:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[32+rsp],rax
|
|
+ mov QWORD[((32 + 8))+rsp],rbx
|
|
+ vmovdqa xmm11,XMMWORD[32+rsp]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ add rdi,0x30
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_3_amivrujEyduiFoi
|
|
+
|
|
+$L$_steal_cipher_3_amivrujEyduiFoi:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm12,xmm11
|
|
+ vmovdqa xmm11,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ add rsi,0x30
|
|
+ vmovdqa xmm0,xmm12
|
|
+ vmovdqa xmm8,xmm3
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_3_amivrujEyduiFoi:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ add rsi,0x30
|
|
+ vmovdqa xmm8,xmm3
|
|
+ jmp NEAR $L$_done_amivrujEyduiFoi
|
|
+
|
|
+$L$_num_blocks_is_2_amivrujEyduiFoi:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ add rdi,0x20
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_2_amivrujEyduiFoi
|
|
+
|
|
+$L$_steal_cipher_2_amivrujEyduiFoi:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm11,xmm10
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ add rsi,0x20
|
|
+ vmovdqa xmm0,xmm11
|
|
+ vmovdqa xmm8,xmm2
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_2_amivrujEyduiFoi:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ add rsi,0x20
|
|
+ vmovdqa xmm8,xmm2
|
|
+ jmp NEAR $L$_done_amivrujEyduiFoi
|
|
+
|
|
+$L$_num_blocks_is_1_amivrujEyduiFoi:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ add rdi,0x10
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_1_amivrujEyduiFoi
|
|
+
|
|
+$L$_steal_cipher_1_amivrujEyduiFoi:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm10,xmm9
|
|
+ vmovdqa xmm9,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ add rsi,0x10
|
|
+ vmovdqa xmm0,xmm10
|
|
+ vmovdqa xmm8,xmm1
|
|
+ jmp NEAR $L$_steal_cipher_amivrujEyduiFoi
|
|
+
|
|
+$L$_done_1_amivrujEyduiFoi:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ add rsi,0x10
|
|
+ vmovdqa xmm8,xmm1
|
|
+ jmp NEAR $L$_done_amivrujEyduiFoi
|
|
+
|
|
+global aesni_xts_256_encrypt_avx512
|
|
+
|
|
+
|
|
+ALIGN 32
|
|
+aesni_xts_256_encrypt_avx512:
|
|
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
+ mov QWORD[16+rsp],rsi
|
|
+ mov rax,rsp
|
|
+$L$SEH_begin_aesni_xts_256_encrypt_avx512:
|
|
+ mov rdi,rcx
|
|
+ mov rsi,rdx
|
|
+ mov rdx,r8
|
|
+ mov rcx,r9
|
|
+ mov r8,QWORD[40+rsp]
|
|
+ mov r9,QWORD[48+rsp]
|
|
+
|
|
+
|
|
+
|
|
+DB 243,15,30,250
|
|
+ push rbp
|
|
+ mov rbp,rsp
|
|
+ sub rsp,312
|
|
+ and rsp,0xffffffffffffffc0
|
|
+ mov QWORD[288+rsp],rbx
|
|
+ mov QWORD[((288 + 8))+rsp],rdi
|
|
+ mov QWORD[((288 + 16))+rsp],rsi
|
|
+ vmovdqa XMMWORD[(128 + 0)+rsp],xmm6
|
|
+ vmovdqa XMMWORD[(128 + 16)+rsp],xmm7
|
|
+ vmovdqa XMMWORD[(128 + 32)+rsp],xmm8
|
|
+ vmovdqa XMMWORD[(128 + 48)+rsp],xmm9
|
|
+ vmovdqa XMMWORD[(128 + 64)+rsp],xmm10
|
|
+ vmovdqa XMMWORD[(128 + 80)+rsp],xmm11
|
|
+ vmovdqa XMMWORD[(128 + 96)+rsp],xmm12
|
|
+ vmovdqa XMMWORD[(128 + 112)+rsp],xmm13
|
|
+ vmovdqa XMMWORD[(128 + 128)+rsp],xmm14
|
|
+ vmovdqa XMMWORD[(128 + 144)+rsp],xmm15
|
|
+ mov r10,0x87
|
|
+ vmovdqu xmm1,XMMWORD[r9]
|
|
+ vpxor xmm1,xmm1,XMMWORD[r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[16+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[32+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[48+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[64+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[80+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[96+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[112+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[128+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[144+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[160+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[176+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[192+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[208+r8]
|
|
+ vaesenclast xmm1,xmm1,XMMWORD[224+r8]
|
|
+ vmovdqa XMMWORD[rsp],xmm1
|
|
+ mov QWORD[((8 + 40))+rbp],rdi
|
|
+ mov QWORD[((8 + 48))+rbp],rsi
|
|
+
|
|
+ cmp rdx,0x80
|
|
+ jl NEAR $L$_less_than_128_bytes_wcpqaDvsGlbjGoe
|
|
+ vpbroadcastq zmm25,r10
|
|
+ cmp rdx,0x100
|
|
+ jge NEAR $L$_start_by16_wcpqaDvsGlbjGoe
|
|
+ cmp rdx,0x80
|
|
+ jge NEAR $L$_start_by8_wcpqaDvsGlbjGoe
|
|
+
|
|
+$L$_do_n_blocks_wcpqaDvsGlbjGoe:
|
|
+ cmp rdx,0x0
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ cmp rdx,0x70
|
|
+ jge NEAR $L$_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe
|
|
+ cmp rdx,0x60
|
|
+ jge NEAR $L$_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe
|
|
+ cmp rdx,0x50
|
|
+ jge NEAR $L$_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe
|
|
+ cmp rdx,0x40
|
|
+ jge NEAR $L$_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe
|
|
+ cmp rdx,0x30
|
|
+ jge NEAR $L$_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe
|
|
+ cmp rdx,0x20
|
|
+ jge NEAR $L$_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe
|
|
+ cmp rdx,0x10
|
|
+ jge NEAR $L$_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe
|
|
+ vmovdqa xmm8,xmm0
|
|
+ vmovdqa xmm0,xmm9
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+
|
|
+$L$_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe:
|
|
+ mov r8,0x0000ffffffffffff
|
|
+ kmovq k1,r8
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2{k1},[64+rdi]
|
|
+ add rdi,0x70
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
|
|
+ add rsi,0x70
|
|
+ vextracti32x4 xmm8,zmm2,0x2
|
|
+ vextracti32x4 xmm0,zmm10,0x3
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+
|
|
+$L$_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
|
|
+ add rdi,0x60
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 YMMWORD[64+rsi],ymm2
|
|
+ add rsi,0x60
|
|
+ vextracti32x4 xmm8,zmm2,0x1
|
|
+ vextracti32x4 xmm0,zmm10,0x2
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+
|
|
+$L$_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu xmm2,XMMWORD[64+rdi]
|
|
+ add rdi,0x50
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu XMMWORD[64+rsi],xmm2
|
|
+ add rsi,0x50
|
|
+ vmovdqa xmm8,xmm2
|
|
+ vextracti32x4 xmm0,zmm10,0x1
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+
|
|
+$L$_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ add rdi,0x40
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ add rsi,0x40
|
|
+ vextracti32x4 xmm8,zmm1,0x3
|
|
+ vmovdqa64 xmm0,xmm10
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+$L$_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe:
|
|
+ mov r8,-1
|
|
+ shr r8,0x10
|
|
+ kmovq k1,r8
|
|
+ vmovdqu8 zmm1{k1},[rdi]
|
|
+ add rdi,0x30
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vmovdqu8 ZMMWORD[rsi]{k1},zmm1
|
|
+ add rsi,0x30
|
|
+ vextracti32x4 xmm8,zmm1,0x2
|
|
+ vextracti32x4 xmm0,zmm9,0x3
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+$L$_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 ymm1,YMMWORD[rdi]
|
|
+ add rdi,0x20
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
|
|
+ vpternlogq ymm1,ymm9,ymm0,0x96
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[176+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[192+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[208+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[224+rcx]
|
|
+ vaesenclast ymm1,ymm1,ymm0
|
|
+ vpxorq ymm1,ymm1,ymm9
|
|
+ vmovdqu YMMWORD[rsi],ymm1
|
|
+ add rsi,0x20
|
|
+ vextracti32x4 xmm8,zmm1,0x1
|
|
+ vextracti32x4 xmm0,zmm9,0x2
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+$L$_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ add rdi,0x10
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm1,xmm1,XMMWORD[rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[16+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[32+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[48+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[64+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[80+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[96+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[112+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[128+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[144+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[160+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[176+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[192+rcx]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[208+rcx]
|
|
+ vaesenclast xmm1,xmm1,XMMWORD[224+rcx]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ add rsi,0x10
|
|
+ vmovdqa xmm8,xmm1
|
|
+ vextracti32x4 xmm0,zmm9,0x1
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+
|
|
+
|
|
+$L$_start_by16_wcpqaDvsGlbjGoe:
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
|
|
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
|
|
+ mov r8,0xaa
|
|
+ kmovq k2,r8
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+ vpsrldq zmm13,zmm9,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm11,zmm9,0x1
|
|
+ vpxord zmm11,zmm11,zmm14
|
|
+ vpsrldq zmm15,zmm10,0xf
|
|
+ vpclmulqdq zmm16,zmm15,zmm25,0x0
|
|
+ vpslldq zmm12,zmm10,0x1
|
|
+ vpxord zmm12,zmm12,zmm16
|
|
+
|
|
+$L$_main_loop_run_16_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
|
|
+ vmovdqu8 zmm3,ZMMWORD[128+rdi]
|
|
+ vmovdqu8 zmm4,ZMMWORD[192+rdi]
|
|
+ add rdi,0x100
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vpxorq zmm3,zmm3,zmm11
|
|
+ vpxorq zmm4,zmm4,zmm12
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vpxorq zmm3,zmm3,zmm0
|
|
+ vpxorq zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm11,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm15,zmm11,0x1
|
|
+ vpxord zmm15,zmm15,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm12,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm16,zmm12,0x1
|
|
+ vpxord zmm16,zmm16,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm15,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm17,zmm15,0x1
|
|
+ vpxord zmm17,zmm17,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm16,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm18,zmm16,0x1
|
|
+ vpxord zmm18,zmm18,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vaesenc zmm3,zmm3,zmm0
|
|
+ vaesenc zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vaesenclast zmm3,zmm3,zmm0
|
|
+ vaesenclast zmm4,zmm4,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vpxorq zmm3,zmm3,zmm11
|
|
+ vpxorq zmm4,zmm4,zmm12
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqa32 zmm11,zmm17
|
|
+ vmovdqa32 zmm12,zmm18
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
|
|
+ vmovdqu8 ZMMWORD[128+rsi],zmm3
|
|
+ vmovdqu8 ZMMWORD[192+rsi],zmm4
|
|
+ add rsi,0x100
|
|
+ sub rdx,0x100
|
|
+ cmp rdx,0x100
|
|
+ jae NEAR $L$_main_loop_run_16_wcpqaDvsGlbjGoe
|
|
+ cmp rdx,0x80
|
|
+ jae NEAR $L$_main_loop_run_8_wcpqaDvsGlbjGoe
|
|
+ vextracti32x4 xmm0,zmm4,0x3
|
|
+ jmp NEAR $L$_do_n_blocks_wcpqaDvsGlbjGoe
|
|
+
|
|
+$L$_start_by8_wcpqaDvsGlbjGoe:
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
|
|
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
|
|
+ mov r8,0xaa
|
|
+ kmovq k2,r8
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+
|
|
+$L$_main_loop_run_8_wcpqaDvsGlbjGoe:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
|
|
+ add rdi,0x80
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vpsrldq zmm13,zmm9,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm15,zmm9,0x1
|
|
+ vpxord zmm15,zmm15,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+ vpsrldq zmm13,zmm10,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm16,zmm10,0x1
|
|
+ vpxord zmm16,zmm16,zmm14
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
|
|
+ add rsi,0x80
|
|
+ sub rdx,0x80
|
|
+ cmp rdx,0x80
|
|
+ jae NEAR $L$_main_loop_run_8_wcpqaDvsGlbjGoe
|
|
+ vextracti32x4 xmm0,zmm2,0x3
|
|
+ jmp NEAR $L$_do_n_blocks_wcpqaDvsGlbjGoe
|
|
+
|
|
+$L$_steal_cipher_wcpqaDvsGlbjGoe:
|
|
+ vmovdqa xmm2,xmm8
|
|
+ lea rax,[vpshufb_shf_table]
|
|
+ vmovdqu xmm10,XMMWORD[rdx*1+rax]
|
|
+ vpshufb xmm8,xmm8,xmm10
|
|
+ vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi]
|
|
+ vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8
|
|
+ lea rax,[vpshufb_shf_table]
|
|
+ add rax,16
|
|
+ sub rax,rdx
|
|
+ vmovdqu xmm10,XMMWORD[rax]
|
|
+ vpxor xmm10,xmm10,XMMWORD[mask1]
|
|
+ vpshufb xmm3,xmm3,xmm10
|
|
+ vpblendvb xmm3,xmm3,xmm2,xmm10
|
|
+ vpxor xmm8,xmm3,xmm0
|
|
+ vpxor xmm8,xmm8,XMMWORD[rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[16+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[32+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[48+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[64+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[80+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[96+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[112+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[128+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[144+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[160+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[176+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[192+rcx]
|
|
+ vaesenc xmm8,xmm8,XMMWORD[208+rcx]
|
|
+ vaesenclast xmm8,xmm8,XMMWORD[224+rcx]
|
|
+ vpxor xmm8,xmm8,xmm0
|
|
+ vmovdqu XMMWORD[(-16)+rsi],xmm8
|
|
+$L$_ret_wcpqaDvsGlbjGoe:
|
|
+ mov rbx,QWORD[288+rsp]
|
|
+ xor r8,r8
|
|
+ mov QWORD[288+rsp],r8
|
|
+
|
|
+ vpxorq zmm0,zmm0,zmm0
|
|
+ mov rdi,QWORD[((288 + 8))+rsp]
|
|
+ mov QWORD[((288 + 8))+rsp],r8
|
|
+ mov rsi,QWORD[((288 + 16))+rsp]
|
|
+ mov QWORD[((288 + 16))+rsp],r8
|
|
+
|
|
+ vmovdqa xmm6,XMMWORD[((128 + 0))+rsp]
|
|
+ vmovdqa xmm7,XMMWORD[((128 + 16))+rsp]
|
|
+ vmovdqa xmm8,XMMWORD[((128 + 32))+rsp]
|
|
+ vmovdqa xmm9,XMMWORD[((128 + 48))+rsp]
|
|
+
|
|
+
|
|
+ vmovdqa64 ZMMWORD[128+rsp],zmm0
|
|
+
|
|
+ vmovdqa xmm10,XMMWORD[((128 + 64))+rsp]
|
|
+ vmovdqa xmm11,XMMWORD[((128 + 80))+rsp]
|
|
+ vmovdqa xmm12,XMMWORD[((128 + 96))+rsp]
|
|
+ vmovdqa xmm13,XMMWORD[((128 + 112))+rsp]
|
|
+
|
|
+
|
|
+ vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0
|
|
+
|
|
+ vmovdqa xmm14,XMMWORD[((128 + 128))+rsp]
|
|
+ vmovdqa xmm15,XMMWORD[((128 + 144))+rsp]
|
|
+
|
|
+
|
|
+
|
|
+ vmovdqa YMMWORD[(128 + 128)+rsp],ymm0
|
|
+ mov rsp,rbp
|
|
+ pop rbp
|
|
+ vzeroupper
|
|
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
+ mov rsi,QWORD[16+rsp]
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+$L$_less_than_128_bytes_wcpqaDvsGlbjGoe:
|
|
+ vpbroadcastq zmm25,r10
|
|
+ cmp rdx,0x10
|
|
+ jb NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
|
|
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
|
|
+ mov r8d,0xaa
|
|
+ kmovq k2,r8
|
|
+ mov r8,rdx
|
|
+ and r8,0x70
|
|
+ cmp r8,0x60
|
|
+ je NEAR $L$_num_blocks_is_6_wcpqaDvsGlbjGoe
|
|
+ cmp r8,0x50
|
|
+ je NEAR $L$_num_blocks_is_5_wcpqaDvsGlbjGoe
|
|
+ cmp r8,0x40
|
|
+ je NEAR $L$_num_blocks_is_4_wcpqaDvsGlbjGoe
|
|
+ cmp r8,0x30
|
|
+ je NEAR $L$_num_blocks_is_3_wcpqaDvsGlbjGoe
|
|
+ cmp r8,0x20
|
|
+ je NEAR $L$_num_blocks_is_2_wcpqaDvsGlbjGoe
|
|
+ cmp r8,0x10
|
|
+ je NEAR $L$_num_blocks_is_1_wcpqaDvsGlbjGoe
|
|
+
|
|
+$L$_num_blocks_is_7_wcpqaDvsGlbjGoe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+ mov r8,0x0000ffffffffffff
|
|
+ kmovq k1,r8
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2{k1},[64+rdi]
|
|
+
|
|
+ add rdi,0x70
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
|
|
+ add rsi,0x70
|
|
+ vextracti32x4 xmm8,zmm2,0x2
|
|
+ vextracti32x4 xmm0,zmm10,0x3
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+$L$_num_blocks_is_6_wcpqaDvsGlbjGoe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
|
|
+ add rdi,96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 YMMWORD[64+rsi],ymm2
|
|
+ add rsi,96
|
|
+
|
|
+ vextracti32x4 xmm8,ymm2,0x1
|
|
+ vextracti32x4 xmm0,zmm10,0x2
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+$L$_num_blocks_is_5_wcpqaDvsGlbjGoe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 xmm2,XMMWORD[64+rdi]
|
|
+ add rdi,80
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vpternlogq zmm2,zmm10,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vaesenc zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vaesenclast zmm2,zmm2,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 XMMWORD[64+rsi],xmm2
|
|
+ add rsi,80
|
|
+
|
|
+ vmovdqa xmm8,xmm2
|
|
+ vextracti32x4 xmm0,zmm10,0x1
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+$L$_num_blocks_is_4_wcpqaDvsGlbjGoe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x00
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ add rdi,64
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ add rsi,64
|
|
+ vextracti32x4 xmm8,zmm1,0x3
|
|
+ vmovdqa xmm0,xmm10
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+$L$_num_blocks_is_3_wcpqaDvsGlbjGoe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+ mov r8,0x0000ffffffffffff
|
|
+ kmovq k1,r8
|
|
+ vmovdqu8 zmm1{k1},[rdi]
|
|
+ add rdi,48
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpternlogq zmm1,zmm9,zmm0,0x96
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesenc zmm1,zmm1,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesenclast zmm1,zmm1,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vmovdqu8 ZMMWORD[rsi]{k1},zmm1
|
|
+ add rsi,48
|
|
+ vextracti32x4 xmm8,zmm1,2
|
|
+ vextracti32x4 xmm0,zmm9,3
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+$L$_num_blocks_is_2_wcpqaDvsGlbjGoe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+
|
|
+ vmovdqu8 ymm1,YMMWORD[rdi]
|
|
+ add rdi,32
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
|
|
+ vpternlogq ymm1,ymm9,ymm0,0x96
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[176+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[192+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[208+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[224+rcx]
|
|
+ vaesenclast ymm1,ymm1,ymm0
|
|
+ vpxorq ymm1,ymm1,ymm9
|
|
+ vmovdqu8 YMMWORD[rsi],ymm1
|
|
+ add rsi,32
|
|
+
|
|
+ vextracti32x4 xmm8,ymm1,1
|
|
+ vextracti32x4 xmm0,zmm9,2
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+$L$_num_blocks_is_1_wcpqaDvsGlbjGoe:
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x00
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+
|
|
+ vmovdqu8 xmm1,XMMWORD[rdi]
|
|
+ add rdi,16
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[rcx]
|
|
+ vpternlogq ymm1,ymm9,ymm0,0x96
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[16+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[32+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[48+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[64+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[80+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[96+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[112+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[128+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[144+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[160+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[176+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[192+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[208+rcx]
|
|
+ vaesenc ymm1,ymm1,ymm0
|
|
+ vbroadcasti32x4 ymm0,YMMWORD[224+rcx]
|
|
+ vaesenclast ymm1,ymm1,ymm0
|
|
+ vpxorq ymm1,ymm1,ymm9
|
|
+ vmovdqu8 XMMWORD[rsi],xmm1
|
|
+ add rsi,16
|
|
+
|
|
+ vmovdqa xmm8,xmm1
|
|
+ vextracti32x4 xmm0,zmm9,1
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_ret_wcpqaDvsGlbjGoe
|
|
+ jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe
|
|
+
|
|
+global aesni_xts_256_decrypt_avx512
|
|
+
|
|
+
|
|
+ALIGN 32
|
|
+aesni_xts_256_decrypt_avx512:
|
|
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
+ mov QWORD[16+rsp],rsi
|
|
+ mov rax,rsp
|
|
+$L$SEH_begin_aesni_xts_256_decrypt_avx512:
|
|
+ mov rdi,rcx
|
|
+ mov rsi,rdx
|
|
+ mov rdx,r8
|
|
+ mov rcx,r9
|
|
+ mov r8,QWORD[40+rsp]
|
|
+ mov r9,QWORD[48+rsp]
|
|
+
|
|
+
|
|
+
|
|
+DB 243,15,30,250
|
|
+ push rbp
|
|
+ mov rbp,rsp
|
|
+ sub rsp,312
|
|
+ and rsp,0xffffffffffffffc0
|
|
+ mov QWORD[288+rsp],rbx
|
|
+ mov QWORD[((288 + 8))+rsp],rdi
|
|
+ mov QWORD[((288 + 16))+rsp],rsi
|
|
+ vmovdqa XMMWORD[(128 + 0)+rsp],xmm6
|
|
+ vmovdqa XMMWORD[(128 + 16)+rsp],xmm7
|
|
+ vmovdqa XMMWORD[(128 + 32)+rsp],xmm8
|
|
+ vmovdqa XMMWORD[(128 + 48)+rsp],xmm9
|
|
+ vmovdqa XMMWORD[(128 + 64)+rsp],xmm10
|
|
+ vmovdqa XMMWORD[(128 + 80)+rsp],xmm11
|
|
+ vmovdqa XMMWORD[(128 + 96)+rsp],xmm12
|
|
+ vmovdqa XMMWORD[(128 + 112)+rsp],xmm13
|
|
+ vmovdqa XMMWORD[(128 + 128)+rsp],xmm14
|
|
+ vmovdqa XMMWORD[(128 + 144)+rsp],xmm15
|
|
+ mov r10,0x87
|
|
+ vmovdqu xmm1,XMMWORD[r9]
|
|
+ vpxor xmm1,xmm1,XMMWORD[r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[16+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[32+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[48+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[64+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[80+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[96+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[112+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[128+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[144+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[160+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[176+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[192+r8]
|
|
+ vaesenc xmm1,xmm1,XMMWORD[208+r8]
|
|
+ vaesenclast xmm1,xmm1,XMMWORD[224+r8]
|
|
+ vmovdqa XMMWORD[rsp],xmm1
|
|
+ mov QWORD[((8 + 40))+rbp],rdi
|
|
+ mov QWORD[((8 + 48))+rbp],rsi
|
|
+
|
|
+ cmp rdx,0x80
|
|
+ jb NEAR $L$_less_than_128_bytes_EmbgEptodyewbFa
|
|
+ vpbroadcastq zmm25,r10
|
|
+ cmp rdx,0x100
|
|
+ jge NEAR $L$_start_by16_EmbgEptodyewbFa
|
|
+ jmp NEAR $L$_start_by8_EmbgEptodyewbFa
|
|
+
|
|
+$L$_do_n_blocks_EmbgEptodyewbFa:
|
|
+ cmp rdx,0x0
|
|
+ je NEAR $L$_ret_EmbgEptodyewbFa
|
|
+ cmp rdx,0x70
|
|
+ jge NEAR $L$_remaining_num_blocks_is_7_EmbgEptodyewbFa
|
|
+ cmp rdx,0x60
|
|
+ jge NEAR $L$_remaining_num_blocks_is_6_EmbgEptodyewbFa
|
|
+ cmp rdx,0x50
|
|
+ jge NEAR $L$_remaining_num_blocks_is_5_EmbgEptodyewbFa
|
|
+ cmp rdx,0x40
|
|
+ jge NEAR $L$_remaining_num_blocks_is_4_EmbgEptodyewbFa
|
|
+ cmp rdx,0x30
|
|
+ jge NEAR $L$_remaining_num_blocks_is_3_EmbgEptodyewbFa
|
|
+ cmp rdx,0x20
|
|
+ jge NEAR $L$_remaining_num_blocks_is_2_EmbgEptodyewbFa
|
|
+ cmp rdx,0x10
|
|
+ jge NEAR $L$_remaining_num_blocks_is_1_EmbgEptodyewbFa
|
|
+
|
|
+
|
|
+ vmovdqu xmm1,xmm5
|
|
+
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu XMMWORD[(-16)+rsi],xmm1
|
|
+ vmovdqa xmm8,xmm1
|
|
+
|
|
+
|
|
+ mov r8,0x1
|
|
+ kmovq k1,r8
|
|
+ vpsllq xmm13,xmm9,0x3f
|
|
+ vpsraq xmm14,xmm13,0x3f
|
|
+ vpandq xmm5,xmm14,xmm25
|
|
+ vpxorq xmm9{k1},xmm9,xmm5
|
|
+ vpsrldq xmm10,xmm9,0x8
|
|
+DB 98,211,181,8,115,194,1
|
|
+ vpslldq xmm13,xmm13,0x8
|
|
+ vpxorq xmm0,xmm0,xmm13
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_remaining_num_blocks_is_7_EmbgEptodyewbFa:
|
|
+ mov r8,0xffffffffffffffff
|
|
+ shr r8,0x10
|
|
+ kmovq k1,r8
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2{k1},[64+rdi]
|
|
+ add rdi,0x70
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_7_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 xmm12,zmm10,0x2
|
|
+ vextracti32x4 xmm13,zmm10,0x3
|
|
+ vinserti32x4 zmm10,zmm10,xmm13,0x2
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
|
|
+ add rsi,0x70
|
|
+ vextracti32x4 xmm8,zmm2,0x2
|
|
+ vmovdqa xmm0,xmm12
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_7_remain_EmbgEptodyewbFa:
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2
|
|
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
|
|
+
|
|
+$L$_remaining_num_blocks_is_6_EmbgEptodyewbFa:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 ymm2,YMMWORD[64+rdi]
|
|
+ add rdi,0x60
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_6_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 xmm12,zmm10,0x1
|
|
+ vextracti32x4 xmm13,zmm10,0x2
|
|
+ vinserti32x4 zmm10,zmm10,xmm13,0x1
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 YMMWORD[64+rsi],ymm2
|
|
+ add rsi,0x60
|
|
+ vextracti32x4 xmm8,zmm2,0x1
|
|
+ vmovdqa xmm0,xmm12
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_6_remain_EmbgEptodyewbFa:
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 YMMWORD[64+rsi],ymm2
|
|
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
|
|
+
|
|
+$L$_remaining_num_blocks_is_5_EmbgEptodyewbFa:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu xmm2,XMMWORD[64+rdi]
|
|
+ add rdi,0x50
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_5_remain_EmbgEptodyewbFa
|
|
+ vmovdqa xmm12,xmm10
|
|
+ vextracti32x4 xmm10,zmm10,0x1
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu XMMWORD[64+rsi],xmm2
|
|
+ add rsi,0x50
|
|
+ vmovdqa xmm8,xmm2
|
|
+ vmovdqa xmm0,xmm12
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_5_remain_EmbgEptodyewbFa:
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 XMMWORD[64+rsi],xmm2
|
|
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
|
|
+
|
|
+$L$_remaining_num_blocks_is_4_EmbgEptodyewbFa:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ add rdi,0x40
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_4_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 xmm12,zmm9,0x3
|
|
+ vinserti32x4 zmm9,zmm9,xmm10,0x3
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ add rsi,0x40
|
|
+ vextracti32x4 xmm8,zmm1,0x3
|
|
+ vmovdqa xmm0,xmm12
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_4_remain_EmbgEptodyewbFa:
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
|
|
+
|
|
+$L$_remaining_num_blocks_is_3_EmbgEptodyewbFa:
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ add rdi,0x30
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_3_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 xmm13,zmm9,0x2
|
|
+ vextracti32x4 xmm10,zmm9,0x1
|
|
+ vextracti32x4 xmm11,zmm9,0x3
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ add rsi,0x30
|
|
+ vmovdqa xmm8,xmm3
|
|
+ vmovdqa xmm0,xmm13
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_3_remain_EmbgEptodyewbFa:
|
|
+ vextracti32x4 xmm10,zmm9,0x1
|
|
+ vextracti32x4 xmm11,zmm9,0x2
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
|
|
+
|
|
+$L$_remaining_num_blocks_is_2_EmbgEptodyewbFa:
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ add rdi,0x20
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_2_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 xmm10,zmm9,0x2
|
|
+ vextracti32x4 xmm12,zmm9,0x1
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ add rsi,0x20
|
|
+ vmovdqa xmm8,xmm2
|
|
+ vmovdqa xmm0,xmm12
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_2_remain_EmbgEptodyewbFa:
|
|
+ vextracti32x4 xmm10,zmm9,0x1
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
|
|
+
|
|
+$L$_remaining_num_blocks_is_1_EmbgEptodyewbFa:
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ add rdi,0x10
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_1_remain_EmbgEptodyewbFa
|
|
+ vextracti32x4 xmm11,zmm9,0x1
|
|
+ vpxor xmm1,xmm1,xmm11
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vpxor xmm1,xmm1,xmm11
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ add rsi,0x10
|
|
+ vmovdqa xmm8,xmm1
|
|
+ vmovdqa xmm0,xmm9
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_1_remain_EmbgEptodyewbFa:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ jmp NEAR $L$_ret_EmbgEptodyewbFa
|
|
+
|
|
+$L$_start_by16_EmbgEptodyewbFa:
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
|
|
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
|
|
+ mov r8,0xaa
|
|
+ kmovq k2,r8
|
|
+
|
|
+
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+
|
|
+
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+
|
|
+
|
|
+ vpsrldq zmm13,zmm9,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm11,zmm9,0x1
|
|
+ vpxord zmm11,zmm11,zmm14
|
|
+
|
|
+ vpsrldq zmm15,zmm10,0xf
|
|
+ vpclmulqdq zmm16,zmm15,zmm25,0x0
|
|
+ vpslldq zmm12,zmm10,0x1
|
|
+ vpxord zmm12,zmm12,zmm16
|
|
+
|
|
+$L$_main_loop_run_16_EmbgEptodyewbFa:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
|
|
+ vmovdqu8 zmm3,ZMMWORD[128+rdi]
|
|
+ vmovdqu8 zmm4,ZMMWORD[192+rdi]
|
|
+ vmovdqu8 xmm5,XMMWORD[240+rdi]
|
|
+ add rdi,0x100
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vpxorq zmm3,zmm3,zmm11
|
|
+ vpxorq zmm4,zmm4,zmm12
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vpxorq zmm3,zmm3,zmm0
|
|
+ vpxorq zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm11,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm15,zmm11,0x1
|
|
+ vpxord zmm15,zmm15,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm12,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm16,zmm12,0x1
|
|
+ vpxord zmm16,zmm16,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm15,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm17,zmm15,0x1
|
|
+ vpxord zmm17,zmm17,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vpsrldq zmm13,zmm16,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm18,zmm16,0x1
|
|
+ vpxord zmm18,zmm18,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vaesdec zmm3,zmm3,zmm0
|
|
+ vaesdec zmm4,zmm4,zmm0
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+ vaesdeclast zmm3,zmm3,zmm0
|
|
+ vaesdeclast zmm4,zmm4,zmm0
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+ vpxorq zmm3,zmm3,zmm11
|
|
+ vpxorq zmm4,zmm4,zmm12
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqa32 zmm11,zmm17
|
|
+ vmovdqa32 zmm12,zmm18
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
|
|
+ vmovdqu8 ZMMWORD[128+rsi],zmm3
|
|
+ vmovdqu8 ZMMWORD[192+rsi],zmm4
|
|
+ add rsi,0x100
|
|
+ sub rdx,0x100
|
|
+ cmp rdx,0x100
|
|
+ jge NEAR $L$_main_loop_run_16_EmbgEptodyewbFa
|
|
+
|
|
+ cmp rdx,0x80
|
|
+ jge NEAR $L$_main_loop_run_8_EmbgEptodyewbFa
|
|
+ jmp NEAR $L$_do_n_blocks_EmbgEptodyewbFa
|
|
+
|
|
+$L$_start_by8_EmbgEptodyewbFa:
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rsp]
|
|
+ vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7]
|
|
+ mov r8,0xaa
|
|
+ kmovq k2,r8
|
|
+
|
|
+
|
|
+ vpshufb zmm1,zmm0,zmm8
|
|
+ vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210]
|
|
+ vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678]
|
|
+ vpclmulqdq zmm3,zmm2,zmm25,0x0
|
|
+ vpxorq zmm4{k2},zmm4,zmm2
|
|
+ vpxord zmm9,zmm3,zmm4
|
|
+
|
|
+
|
|
+ vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654]
|
|
+ vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234]
|
|
+ vpclmulqdq zmm7,zmm6,zmm25,0x0
|
|
+ vpxorq zmm5{k2},zmm5,zmm6
|
|
+ vpxord zmm10,zmm7,zmm5
|
|
+
|
|
+$L$_main_loop_run_8_EmbgEptodyewbFa:
|
|
+ vmovdqu8 zmm1,ZMMWORD[rdi]
|
|
+ vmovdqu8 zmm2,ZMMWORD[64+rdi]
|
|
+ vmovdqu8 xmm5,XMMWORD[112+rdi]
|
|
+ add rdi,0x80
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[rcx]
|
|
+ vpxorq zmm1,zmm1,zmm0
|
|
+ vpxorq zmm2,zmm2,zmm0
|
|
+ vpsrldq zmm13,zmm9,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm15,zmm9,0x1
|
|
+ vpxord zmm15,zmm15,zmm14
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[16+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[32+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[48+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+ vpsrldq zmm13,zmm10,0xf
|
|
+ vpclmulqdq zmm14,zmm13,zmm25,0x0
|
|
+ vpslldq zmm16,zmm10,0x1
|
|
+ vpxord zmm16,zmm16,zmm14
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[64+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[80+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[96+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[112+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[128+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[144+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[160+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[176+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[192+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[208+rcx]
|
|
+ vaesdec zmm1,zmm1,zmm0
|
|
+ vaesdec zmm2,zmm2,zmm0
|
|
+
|
|
+
|
|
+ vbroadcasti32x4 zmm0,ZMMWORD[224+rcx]
|
|
+ vaesdeclast zmm1,zmm1,zmm0
|
|
+ vaesdeclast zmm2,zmm2,zmm0
|
|
+
|
|
+ vpxorq zmm1,zmm1,zmm9
|
|
+ vpxorq zmm2,zmm2,zmm10
|
|
+
|
|
+
|
|
+ vmovdqa32 zmm9,zmm15
|
|
+ vmovdqa32 zmm10,zmm16
|
|
+ vmovdqu8 ZMMWORD[rsi],zmm1
|
|
+ vmovdqu8 ZMMWORD[64+rsi],zmm2
|
|
+ add rsi,0x80
|
|
+ sub rdx,0x80
|
|
+ cmp rdx,0x80
|
|
+ jge NEAR $L$_main_loop_run_8_EmbgEptodyewbFa
|
|
+ jmp NEAR $L$_do_n_blocks_EmbgEptodyewbFa
|
|
+
|
|
+$L$_steal_cipher_EmbgEptodyewbFa:
|
|
+
|
|
+ vmovdqa xmm2,xmm8
|
|
+
|
|
+
|
|
+ lea rax,[vpshufb_shf_table]
|
|
+ vmovdqu xmm10,XMMWORD[rdx*1+rax]
|
|
+ vpshufb xmm8,xmm8,xmm10
|
|
+
|
|
+
|
|
+ vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi]
|
|
+ vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8
|
|
+
|
|
+
|
|
+ lea rax,[vpshufb_shf_table]
|
|
+ add rax,16
|
|
+ sub rax,rdx
|
|
+ vmovdqu xmm10,XMMWORD[rax]
|
|
+ vpxor xmm10,xmm10,XMMWORD[mask1]
|
|
+ vpshufb xmm3,xmm3,xmm10
|
|
+
|
|
+ vpblendvb xmm3,xmm3,xmm2,xmm10
|
|
+
|
|
+
|
|
+ vpxor xmm8,xmm3,xmm0
|
|
+
|
|
+
|
|
+ vpxor xmm8,xmm8,XMMWORD[rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[16+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[32+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[48+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[64+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[80+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[96+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[112+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[128+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[144+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[160+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[176+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[192+rcx]
|
|
+ vaesdec xmm8,xmm8,XMMWORD[208+rcx]
|
|
+ vaesdeclast xmm8,xmm8,XMMWORD[224+rcx]
|
|
+
|
|
+ vpxor xmm8,xmm8,xmm0
|
|
+
|
|
+$L$_done_EmbgEptodyewbFa:
|
|
+
|
|
+ vmovdqu XMMWORD[(-16)+rsi],xmm8
|
|
+$L$_ret_EmbgEptodyewbFa:
|
|
+ mov rbx,QWORD[288+rsp]
|
|
+ xor r8,r8
|
|
+ mov QWORD[288+rsp],r8
|
|
+
|
|
+ vpxorq zmm0,zmm0,zmm0
|
|
+ mov rdi,QWORD[((288 + 8))+rsp]
|
|
+ mov QWORD[((288 + 8))+rsp],r8
|
|
+ mov rsi,QWORD[((288 + 16))+rsp]
|
|
+ mov QWORD[((288 + 16))+rsp],r8
|
|
+
|
|
+ vmovdqa xmm6,XMMWORD[((128 + 0))+rsp]
|
|
+ vmovdqa xmm7,XMMWORD[((128 + 16))+rsp]
|
|
+ vmovdqa xmm8,XMMWORD[((128 + 32))+rsp]
|
|
+ vmovdqa xmm9,XMMWORD[((128 + 48))+rsp]
|
|
+
|
|
+
|
|
+ vmovdqa64 ZMMWORD[128+rsp],zmm0
|
|
+
|
|
+ vmovdqa xmm10,XMMWORD[((128 + 64))+rsp]
|
|
+ vmovdqa xmm11,XMMWORD[((128 + 80))+rsp]
|
|
+ vmovdqa xmm12,XMMWORD[((128 + 96))+rsp]
|
|
+ vmovdqa xmm13,XMMWORD[((128 + 112))+rsp]
|
|
+
|
|
+
|
|
+ vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0
|
|
+
|
|
+ vmovdqa xmm14,XMMWORD[((128 + 128))+rsp]
|
|
+ vmovdqa xmm15,XMMWORD[((128 + 144))+rsp]
|
|
+
|
|
+
|
|
+
|
|
+ vmovdqa YMMWORD[(128 + 128)+rsp],ymm0
|
|
+ mov rsp,rbp
|
|
+ pop rbp
|
|
+ vzeroupper
|
|
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
+ mov rsi,QWORD[16+rsp]
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+$L$_less_than_128_bytes_EmbgEptodyewbFa:
|
|
+ cmp rdx,0x10
|
|
+ jb NEAR $L$_ret_EmbgEptodyewbFa
|
|
+
|
|
+ mov r8,rdx
|
|
+ and r8,0x70
|
|
+ cmp r8,0x60
|
|
+ je NEAR $L$_num_blocks_is_6_EmbgEptodyewbFa
|
|
+ cmp r8,0x50
|
|
+ je NEAR $L$_num_blocks_is_5_EmbgEptodyewbFa
|
|
+ cmp r8,0x40
|
|
+ je NEAR $L$_num_blocks_is_4_EmbgEptodyewbFa
|
|
+ cmp r8,0x30
|
|
+ je NEAR $L$_num_blocks_is_3_EmbgEptodyewbFa
|
|
+ cmp r8,0x20
|
|
+ je NEAR $L$_num_blocks_is_2_EmbgEptodyewbFa
|
|
+ cmp r8,0x10
|
|
+ je NEAR $L$_num_blocks_is_1_EmbgEptodyewbFa
|
|
+
|
|
+$L$_num_blocks_is_7_EmbgEptodyewbFa:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[32+rsp],rax
|
|
+ mov QWORD[((32 + 8))+rsp],rbx
|
|
+ vmovdqa xmm11,XMMWORD[32+rsp]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[48+rsp],rax
|
|
+ mov QWORD[((48 + 8))+rsp],rbx
|
|
+ vmovdqa xmm12,XMMWORD[48+rsp]
|
|
+ vmovdqu xmm4,XMMWORD[48+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[64+rsp],rax
|
|
+ mov QWORD[((64 + 8))+rsp],rbx
|
|
+ vmovdqa xmm13,XMMWORD[64+rsp]
|
|
+ vmovdqu xmm5,XMMWORD[64+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[80+rsp],rax
|
|
+ mov QWORD[((80 + 8))+rsp],rbx
|
|
+ vmovdqa xmm14,XMMWORD[80+rsp]
|
|
+ vmovdqu xmm6,XMMWORD[80+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[96+rsp],rax
|
|
+ mov QWORD[((96 + 8))+rsp],rbx
|
|
+ vmovdqa xmm15,XMMWORD[96+rsp]
|
|
+ vmovdqu xmm7,XMMWORD[96+rdi]
|
|
+ add rdi,0x70
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_7_EmbgEptodyewbFa
|
|
+
|
|
+$L$_steal_cipher_7_EmbgEptodyewbFa:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm16,xmm15
|
|
+ vmovdqa xmm15,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vpxor xmm7,xmm7,xmm15
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vpxor xmm6,xmm6,xmm0
|
|
+ vpxor xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vaesdeclast xmm6,xmm6,xmm0
|
|
+ vaesdeclast xmm7,xmm7,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vpxor xmm7,xmm7,xmm15
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ vmovdqu XMMWORD[64+rsi],xmm5
|
|
+ vmovdqu XMMWORD[80+rsi],xmm6
|
|
+ add rsi,0x70
|
|
+ vmovdqa64 xmm0,xmm16
|
|
+ vmovdqa xmm8,xmm7
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_7_EmbgEptodyewbFa:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vpxor xmm7,xmm7,xmm15
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vpxor xmm6,xmm6,xmm0
|
|
+ vpxor xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vaesdec xmm7,xmm7,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vaesdeclast xmm6,xmm6,xmm0
|
|
+ vaesdeclast xmm7,xmm7,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vpxor xmm7,xmm7,xmm15
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ vmovdqu XMMWORD[64+rsi],xmm5
|
|
+ vmovdqu XMMWORD[80+rsi],xmm6
|
|
+ add rsi,0x70
|
|
+ vmovdqa xmm8,xmm7
|
|
+ jmp NEAR $L$_done_EmbgEptodyewbFa
|
|
+
|
|
+$L$_num_blocks_is_6_EmbgEptodyewbFa:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[32+rsp],rax
|
|
+ mov QWORD[((32 + 8))+rsp],rbx
|
|
+ vmovdqa xmm11,XMMWORD[32+rsp]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[48+rsp],rax
|
|
+ mov QWORD[((48 + 8))+rsp],rbx
|
|
+ vmovdqa xmm12,XMMWORD[48+rsp]
|
|
+ vmovdqu xmm4,XMMWORD[48+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[64+rsp],rax
|
|
+ mov QWORD[((64 + 8))+rsp],rbx
|
|
+ vmovdqa xmm13,XMMWORD[64+rsp]
|
|
+ vmovdqu xmm5,XMMWORD[64+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[80+rsp],rax
|
|
+ mov QWORD[((80 + 8))+rsp],rbx
|
|
+ vmovdqa xmm14,XMMWORD[80+rsp]
|
|
+ vmovdqu xmm6,XMMWORD[80+rdi]
|
|
+ add rdi,0x60
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_6_EmbgEptodyewbFa
|
|
+
|
|
+$L$_steal_cipher_6_EmbgEptodyewbFa:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm15,xmm14
|
|
+ vmovdqa xmm14,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vpxor xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vaesdeclast xmm6,xmm6,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ vmovdqu XMMWORD[64+rsi],xmm5
|
|
+ add rsi,0x60
|
|
+ vmovdqa xmm0,xmm15
|
|
+ vmovdqa xmm8,xmm6
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_6_EmbgEptodyewbFa:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vpxor xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vaesdec xmm6,xmm6,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vaesdeclast xmm6,xmm6,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vpxor xmm6,xmm6,xmm14
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ vmovdqu XMMWORD[64+rsi],xmm5
|
|
+ add rsi,0x60
|
|
+ vmovdqa xmm8,xmm6
|
|
+ jmp NEAR $L$_done_EmbgEptodyewbFa
|
|
+
|
|
+$L$_num_blocks_is_5_EmbgEptodyewbFa:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[32+rsp],rax
|
|
+ mov QWORD[((32 + 8))+rsp],rbx
|
|
+ vmovdqa xmm11,XMMWORD[32+rsp]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[48+rsp],rax
|
|
+ mov QWORD[((48 + 8))+rsp],rbx
|
|
+ vmovdqa xmm12,XMMWORD[48+rsp]
|
|
+ vmovdqu xmm4,XMMWORD[48+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[64+rsp],rax
|
|
+ mov QWORD[((64 + 8))+rsp],rbx
|
|
+ vmovdqa xmm13,XMMWORD[64+rsp]
|
|
+ vmovdqu xmm5,XMMWORD[64+rdi]
|
|
+ add rdi,0x50
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_5_EmbgEptodyewbFa
|
|
+
|
|
+$L$_steal_cipher_5_EmbgEptodyewbFa:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm14,xmm13
|
|
+ vmovdqa xmm13,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ add rsi,0x50
|
|
+ vmovdqa xmm0,xmm14
|
|
+ vmovdqa xmm8,xmm5
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_5_EmbgEptodyewbFa:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vpxor xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vaesdec xmm5,xmm5,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vaesdeclast xmm5,xmm5,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vpxor xmm5,xmm5,xmm13
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ vmovdqu XMMWORD[48+rsi],xmm4
|
|
+ add rsi,0x50
|
|
+ vmovdqa xmm8,xmm5
|
|
+ jmp NEAR $L$_done_EmbgEptodyewbFa
|
|
+
|
|
+$L$_num_blocks_is_4_EmbgEptodyewbFa:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[32+rsp],rax
|
|
+ mov QWORD[((32 + 8))+rsp],rbx
|
|
+ vmovdqa xmm11,XMMWORD[32+rsp]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[48+rsp],rax
|
|
+ mov QWORD[((48 + 8))+rsp],rbx
|
|
+ vmovdqa xmm12,XMMWORD[48+rsp]
|
|
+ vmovdqu xmm4,XMMWORD[48+rdi]
|
|
+ add rdi,0x40
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_4_EmbgEptodyewbFa
|
|
+
|
|
+$L$_steal_cipher_4_EmbgEptodyewbFa:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm13,xmm12
|
|
+ vmovdqa xmm12,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ add rsi,0x40
|
|
+ vmovdqa xmm0,xmm13
|
|
+ vmovdqa xmm8,xmm4
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_4_EmbgEptodyewbFa:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vpxor xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vaesdec xmm4,xmm4,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vaesdeclast xmm4,xmm4,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vpxor xmm4,xmm4,xmm12
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ vmovdqu XMMWORD[32+rsi],xmm3
|
|
+ add rsi,0x40
|
|
+ vmovdqa xmm8,xmm4
|
|
+ jmp NEAR $L$_done_EmbgEptodyewbFa
|
|
+
|
|
+$L$_num_blocks_is_3_EmbgEptodyewbFa:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[32+rsp],rax
|
|
+ mov QWORD[((32 + 8))+rsp],rbx
|
|
+ vmovdqa xmm11,XMMWORD[32+rsp]
|
|
+ vmovdqu xmm3,XMMWORD[32+rdi]
|
|
+ add rdi,0x30
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_3_EmbgEptodyewbFa
|
|
+
|
|
+$L$_steal_cipher_3_EmbgEptodyewbFa:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm12,xmm11
|
|
+ vmovdqa xmm11,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ add rsi,0x30
|
|
+ vmovdqa xmm0,xmm12
|
|
+ vmovdqa xmm8,xmm3
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_3_EmbgEptodyewbFa:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vpxor xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vaesdec xmm3,xmm3,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vaesdeclast xmm3,xmm3,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vpxor xmm3,xmm3,xmm11
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ vmovdqu XMMWORD[16+rsi],xmm2
|
|
+ add rsi,0x30
|
|
+ vmovdqa xmm8,xmm3
|
|
+ jmp NEAR $L$_done_EmbgEptodyewbFa
|
|
+
|
|
+$L$_num_blocks_is_2_EmbgEptodyewbFa:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[((16 + 8))+rsp],rbx
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vmovdqu xmm2,XMMWORD[16+rdi]
|
|
+ add rdi,0x20
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_2_EmbgEptodyewbFa
|
|
+
|
|
+$L$_steal_cipher_2_EmbgEptodyewbFa:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm11,xmm10
|
|
+ vmovdqa xmm10,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ add rsi,0x20
|
|
+ vmovdqa xmm0,xmm11
|
|
+ vmovdqa xmm8,xmm2
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_2_EmbgEptodyewbFa:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vpxor xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vaesdec xmm2,xmm2,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vaesdeclast xmm2,xmm2,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vpxor xmm2,xmm2,xmm10
|
|
+ vmovdqu XMMWORD[rsi],xmm1
|
|
+ add rsi,0x20
|
|
+ vmovdqa xmm8,xmm2
|
|
+ jmp NEAR $L$_done_EmbgEptodyewbFa
|
|
+
|
|
+$L$_num_blocks_is_1_EmbgEptodyewbFa:
|
|
+ vmovdqa xmm9,XMMWORD[rsp]
|
|
+ mov rax,QWORD[rsp]
|
|
+ mov rbx,QWORD[8+rsp]
|
|
+ vmovdqu xmm1,XMMWORD[rdi]
|
|
+ add rdi,0x10
|
|
+ and rdx,0xf
|
|
+ je NEAR $L$_done_1_EmbgEptodyewbFa
|
|
+
|
|
+$L$_steal_cipher_1_EmbgEptodyewbFa:
|
|
+ xor r11,r11
|
|
+ shl rax,1
|
|
+ adc rbx,rbx
|
|
+ cmovc r11,r10
|
|
+ xor rax,r11
|
|
+ mov QWORD[16+rsp],rax
|
|
+ mov QWORD[24+rsp],rbx
|
|
+ vmovdqa64 xmm10,xmm9
|
|
+ vmovdqa xmm9,XMMWORD[16+rsp]
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ add rsi,0x10
|
|
+ vmovdqa xmm0,xmm10
|
|
+ vmovdqa xmm8,xmm1
|
|
+ jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa
|
|
+
|
|
+$L$_done_1_EmbgEptodyewbFa:
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ vmovdqu xmm0,XMMWORD[rcx]
|
|
+ vpxor xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[16+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[32+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[48+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[64+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[80+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[96+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[112+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[128+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[144+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[160+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[176+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[192+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[208+rcx]
|
|
+ vaesdec xmm1,xmm1,xmm0
|
|
+ vmovdqu xmm0,XMMWORD[224+rcx]
|
|
+ vaesdeclast xmm1,xmm1,xmm0
|
|
+ vpxor xmm1,xmm1,xmm9
|
|
+ add rsi,0x10
|
|
+ vmovdqa xmm8,xmm1
|
|
+ jmp NEAR $L$_done_EmbgEptodyewbFa
|
|
+
|
|
+section .rdata rdata align=8
|
|
+ALIGN 16
|
|
+
|
|
+vpshufb_shf_table:
|
|
+ DQ 0x8786858483828100,0x8f8e8d8c8b8a8988
|
|
+ DQ 0x0706050403020100,0x000e0d0c0b0a0908
|
|
+
|
|
+mask1:
|
|
+ DQ 0x8080808080808080,0x8080808080808080
|
|
+
|
|
+const_dq3210:
|
|
+ DQ 0,0,1,1,2,2,3,3
|
|
+const_dq5678:
|
|
+ DQ 8,8,7,7,6,6,5,5
|
|
+const_dq7654:
|
|
+ DQ 4,4,5,5,6,6,7,7
|
|
+const_dq1234:
|
|
+ DQ 4,4,3,3,2,2,1,1
|
|
+
|
|
+shufb_15_7:
|
|
+DB 15,0xff,0xff,0xff,0xff,0xff,0xff,0xff,7,0xff,0xff
|
|
+DB 0xff,0xff,0xff,0xff,0xff
|
|
+
|
|
+section .text code align=64
|
|
+
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm
|
|
new file mode 100644
|
|
index 0000000000..6c1ccfb458
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm
|
|
@@ -0,0 +1,1276 @@
|
|
+default rel
|
|
+%define XMMWORD
|
|
+%define YMMWORD
|
|
+%define ZMMWORD
|
|
+section .text code align=64
|
|
+
|
|
+EXTERN OPENSSL_ia32cap_P
|
|
+global ossl_rsaz_avxifma_eligible
|
|
+
|
|
+ALIGN 32
|
|
+ossl_rsaz_avxifma_eligible:
|
|
+ mov ecx,DWORD[((OPENSSL_ia32cap_P+20))]
|
|
+ xor eax,eax
|
|
+ and ecx,8388608
|
|
+ cmp ecx,8388608
|
|
+ cmove eax,ecx
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+section .text code align=64
|
|
+
|
|
+
|
|
+global ossl_rsaz_amm52x20_x1_avxifma256
|
|
+
|
|
+ALIGN 32
|
|
+ossl_rsaz_amm52x20_x1_avxifma256:
|
|
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
+ mov QWORD[16+rsp],rsi
|
|
+ mov rax,rsp
|
|
+$L$SEH_begin_ossl_rsaz_amm52x20_x1_avxifma256:
|
|
+ mov rdi,rcx
|
|
+ mov rsi,rdx
|
|
+ mov rdx,r8
|
|
+ mov rcx,r9
|
|
+ mov r8,QWORD[40+rsp]
|
|
+
|
|
+
|
|
+
|
|
+DB 243,15,30,250
|
|
+ push rbx
|
|
+
|
|
+ push rbp
|
|
+
|
|
+ push r12
|
|
+
|
|
+ push r13
|
|
+
|
|
+ push r14
|
|
+
|
|
+ push r15
|
|
+
|
|
+$L$ossl_rsaz_amm52x20_x1_avxifma256_body:
|
|
+
|
|
+
|
|
+ vpxor ymm0,ymm0,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+ vmovapd ymm8,ymm0
|
|
+
|
|
+ xor r9d,r9d
|
|
+
|
|
+ mov r11,rdx
|
|
+ mov rax,0xfffffffffffff
|
|
+
|
|
+
|
|
+ mov ebx,5
|
|
+
|
|
+ALIGN 32
|
|
+$L$loop5:
|
|
+ mov r13,QWORD[r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-168))+rsp]
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm5
|
|
+ vmovdqu YMMWORD[64+rsp],ymm6
|
|
+ vmovdqu YMMWORD[96+rsp],ymm7
|
|
+ vmovdqu YMMWORD[128+rsp],ymm8
|
|
+ mov QWORD[160+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[136+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8
|
|
+ lea rsp,[168+rsp]
|
|
+ mov r13,QWORD[8+r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[8+r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-168))+rsp]
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm5
|
|
+ vmovdqu YMMWORD[64+rsp],ymm6
|
|
+ vmovdqu YMMWORD[96+rsp],ymm7
|
|
+ vmovdqu YMMWORD[128+rsp],ymm8
|
|
+ mov QWORD[160+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[136+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8
|
|
+ lea rsp,[168+rsp]
|
|
+ mov r13,QWORD[16+r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[16+r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-168))+rsp]
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm5
|
|
+ vmovdqu YMMWORD[64+rsp],ymm6
|
|
+ vmovdqu YMMWORD[96+rsp],ymm7
|
|
+ vmovdqu YMMWORD[128+rsp],ymm8
|
|
+ mov QWORD[160+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[136+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8
|
|
+ lea rsp,[168+rsp]
|
|
+ mov r13,QWORD[24+r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[24+r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-168))+rsp]
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm5
|
|
+ vmovdqu YMMWORD[64+rsp],ymm6
|
|
+ vmovdqu YMMWORD[96+rsp],ymm7
|
|
+ vmovdqu YMMWORD[128+rsp],ymm8
|
|
+ mov QWORD[160+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[136+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8
|
|
+ lea rsp,[168+rsp]
|
|
+ lea r11,[32+r11]
|
|
+ dec ebx
|
|
+ jne NEAR $L$loop5
|
|
+
|
|
+ vmovq xmm0,r9
|
|
+ vpbroadcastq ymm0,xmm0
|
|
+ vpblendd ymm3,ymm3,ymm0,3
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq ymm0,ymm3,52
|
|
+ vpsrlq ymm1,ymm5,52
|
|
+ vpsrlq ymm2,ymm6,52
|
|
+ vpsrlq ymm13,ymm7,52
|
|
+ vpsrlq ymm14,ymm8,52
|
|
+
|
|
+
|
|
+ vpermq ymm14,ymm14,144
|
|
+ vpermq ymm15,ymm13,3
|
|
+ vblendpd ymm14,ymm14,ymm15,1
|
|
+
|
|
+ vpermq ymm13,ymm13,144
|
|
+ vpermq ymm15,ymm2,3
|
|
+ vblendpd ymm13,ymm13,ymm15,1
|
|
+
|
|
+ vpermq ymm2,ymm2,144
|
|
+ vpermq ymm15,ymm1,3
|
|
+ vblendpd ymm2,ymm2,ymm15,1
|
|
+
|
|
+ vpermq ymm1,ymm1,144
|
|
+ vpermq ymm15,ymm0,3
|
|
+ vblendpd ymm1,ymm1,ymm15,1
|
|
+
|
|
+ vpermq ymm0,ymm0,144
|
|
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
|
|
+
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+
|
|
+
|
|
+ vpaddq ymm3,ymm3,ymm0
|
|
+ vpaddq ymm5,ymm5,ymm1
|
|
+ vpaddq ymm6,ymm6,ymm2
|
|
+ vpaddq ymm7,ymm7,ymm13
|
|
+ vpaddq ymm8,ymm8,ymm14
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm1,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm2,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm14,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r14d,ymm0
|
|
+ vmovmskpd r13d,ymm1
|
|
+ vmovmskpd r12d,ymm2
|
|
+ vmovmskpd r11d,ymm13
|
|
+ vmovmskpd r10d,ymm14
|
|
+
|
|
+
|
|
+ vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm1,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm2,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm14,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm0
|
|
+ vmovmskpd r8d,ymm1
|
|
+ vmovmskpd ebx,ymm2
|
|
+ vmovmskpd ecx,ymm13
|
|
+ vmovmskpd edx,ymm14
|
|
+
|
|
+
|
|
+
|
|
+ shl r13b,4
|
|
+ or r14b,r13b
|
|
+ shl r11b,4
|
|
+ or r12b,r11b
|
|
+
|
|
+ add r14b,r14b
|
|
+ adc r12b,r12b
|
|
+ adc r10b,r10b
|
|
+
|
|
+ shl r8b,4
|
|
+ or r9b,r8b
|
|
+ shl cl,4
|
|
+ or bl,cl
|
|
+
|
|
+ add r14b,r9b
|
|
+ adc r12b,bl
|
|
+ adc r10b,dl
|
|
+
|
|
+ xor r14b,r9b
|
|
+ xor r12b,bl
|
|
+ xor r10b,dl
|
|
+
|
|
+ lea rdx,[$L$kmasklut]
|
|
+
|
|
+ mov r13b,r14b
|
|
+ and r14,0xf
|
|
+ vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ shl r14,5
|
|
+ vmovapd r14,(%rdx), %ymm2
|
|
+ vblendvpd ymm3,ymm3,ymm0,ymm2
|
|
+
|
|
+ shr r13b,4
|
|
+ and r13,0xf
|
|
+ vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4]
|
|
+ shl r13,5
|
|
+ vmovapd r13,(%rdx), %ymm2
|
|
+ vblendvpd ymm5,ymm5,ymm0,ymm2
|
|
+
|
|
+ mov r11b,r12b
|
|
+ and r12,0xf
|
|
+ vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4]
|
|
+ shl r12,5
|
|
+ vmovapd r12,(%rdx), %ymm2
|
|
+ vblendvpd ymm6,ymm6,ymm0,ymm2
|
|
+
|
|
+ shr r11b,4
|
|
+ and r11,0xf
|
|
+ vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4]
|
|
+ shl r11,5
|
|
+ vmovapd r11,(%rdx), %ymm2
|
|
+ vblendvpd ymm7,ymm7,ymm0,ymm2
|
|
+
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm8,ymm8,ymm0,ymm2
|
|
+
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vmovdqu YMMWORD[rdi],ymm3
|
|
+ vmovdqu YMMWORD[32+rdi],ymm5
|
|
+ vmovdqu YMMWORD[64+rdi],ymm6
|
|
+ vmovdqu YMMWORD[96+rdi],ymm7
|
|
+ vmovdqu YMMWORD[128+rdi],ymm8
|
|
+
|
|
+ vzeroupper
|
|
+ mov r15,QWORD[rsp]
|
|
+
|
|
+ mov r14,QWORD[8+rsp]
|
|
+
|
|
+ mov r13,QWORD[16+rsp]
|
|
+
|
|
+ mov r12,QWORD[24+rsp]
|
|
+
|
|
+ mov rbp,QWORD[32+rsp]
|
|
+
|
|
+ mov rbx,QWORD[40+rsp]
|
|
+
|
|
+ lea rsp,[48+rsp]
|
|
+
|
|
+$L$ossl_rsaz_amm52x20_x1_avxifma256_epilogue:
|
|
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
+ mov rsi,QWORD[16+rsp]
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+$L$SEH_end_ossl_rsaz_amm52x20_x1_avxifma256:
|
|
+section .rdata rdata align=32
|
|
+ALIGN 32
|
|
+$L$mask52x4:
|
|
+ DQ 0xfffffffffffff
|
|
+ DQ 0xfffffffffffff
|
|
+ DQ 0xfffffffffffff
|
|
+ DQ 0xfffffffffffff
|
|
+$L$high64x3:
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+$L$kmasklut:
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+section .text code align=64
|
|
+
|
|
+
|
|
+global ossl_rsaz_amm52x20_x2_avxifma256
|
|
+
|
|
+ALIGN 32
|
|
+ossl_rsaz_amm52x20_x2_avxifma256:
|
|
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
+ mov QWORD[16+rsp],rsi
|
|
+ mov rax,rsp
|
|
+$L$SEH_begin_ossl_rsaz_amm52x20_x2_avxifma256:
|
|
+ mov rdi,rcx
|
|
+ mov rsi,rdx
|
|
+ mov rdx,r8
|
|
+ mov rcx,r9
|
|
+ mov r8,QWORD[40+rsp]
|
|
+
|
|
+
|
|
+
|
|
+DB 243,15,30,250
|
|
+ push rbx
|
|
+
|
|
+ push rbp
|
|
+
|
|
+ push r12
|
|
+
|
|
+ push r13
|
|
+
|
|
+ push r14
|
|
+
|
|
+ push r15
|
|
+
|
|
+$L$ossl_rsaz_amm52x20_x2_avxifma256_body:
|
|
+
|
|
+
|
|
+ vpxor ymm0,ymm0,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+ vmovapd ymm8,ymm0
|
|
+ vmovapd ymm4,ymm0
|
|
+ vmovapd ymm9,ymm0
|
|
+ vmovapd ymm10,ymm0
|
|
+ vmovapd ymm11,ymm0
|
|
+ vmovapd ymm12,ymm0
|
|
+
|
|
+ xor r9d,r9d
|
|
+ xor r15d,r15d
|
|
+
|
|
+ mov r11,rdx
|
|
+ mov rax,0xfffffffffffff
|
|
+
|
|
+ mov ebx,20
|
|
+
|
|
+ALIGN 32
|
|
+$L$loop20:
|
|
+ mov r13,QWORD[r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,QWORD[r8]
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-168))+rsp]
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm5
|
|
+ vmovdqu YMMWORD[64+rsp],ymm6
|
|
+ vmovdqu YMMWORD[96+rsp],ymm7
|
|
+ vmovdqu YMMWORD[128+rsp],ymm8
|
|
+ mov QWORD[160+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[136+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8
|
|
+ lea rsp,[168+rsp]
|
|
+ mov r13,QWORD[160+r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[160+r11]
|
|
+ mov rdx,QWORD[160+rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r15,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,QWORD[8+r8]
|
|
+ imul r13,r15
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[160+rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r15,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r15,52
|
|
+ sal r10,12
|
|
+ or r15,r10
|
|
+
|
|
+ lea rsp,[((-168))+rsp]
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm4
|
|
+ vmovdqu YMMWORD[32+rsp],ymm9
|
|
+ vmovdqu YMMWORD[64+rsp],ymm10
|
|
+ vmovdqu YMMWORD[96+rsp],ymm11
|
|
+ vmovdqu YMMWORD[128+rsp],ymm12
|
|
+ mov QWORD[160+rsp],0
|
|
+
|
|
+ vmovdqu ymm4,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm11,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm12,YMMWORD[136+rsp]
|
|
+
|
|
+ add r15,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ lea rsp,[168+rsp]
|
|
+ lea r11,[8+r11]
|
|
+ dec ebx
|
|
+ jne NEAR $L$loop20
|
|
+
|
|
+ vmovq xmm0,r9
|
|
+ vpbroadcastq ymm0,xmm0
|
|
+ vpblendd ymm3,ymm3,ymm0,3
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq ymm0,ymm3,52
|
|
+ vpsrlq ymm1,ymm5,52
|
|
+ vpsrlq ymm2,ymm6,52
|
|
+ vpsrlq ymm13,ymm7,52
|
|
+ vpsrlq ymm14,ymm8,52
|
|
+
|
|
+
|
|
+ vpermq ymm14,ymm14,144
|
|
+ vpermq ymm15,ymm13,3
|
|
+ vblendpd ymm14,ymm14,ymm15,1
|
|
+
|
|
+ vpermq ymm13,ymm13,144
|
|
+ vpermq ymm15,ymm2,3
|
|
+ vblendpd ymm13,ymm13,ymm15,1
|
|
+
|
|
+ vpermq ymm2,ymm2,144
|
|
+ vpermq ymm15,ymm1,3
|
|
+ vblendpd ymm2,ymm2,ymm15,1
|
|
+
|
|
+ vpermq ymm1,ymm1,144
|
|
+ vpermq ymm15,ymm0,3
|
|
+ vblendpd ymm1,ymm1,ymm15,1
|
|
+
|
|
+ vpermq ymm0,ymm0,144
|
|
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
|
|
+
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+
|
|
+
|
|
+ vpaddq ymm3,ymm3,ymm0
|
|
+ vpaddq ymm5,ymm5,ymm1
|
|
+ vpaddq ymm6,ymm6,ymm2
|
|
+ vpaddq ymm7,ymm7,ymm13
|
|
+ vpaddq ymm8,ymm8,ymm14
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm1,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm2,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm14,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r14d,ymm0
|
|
+ vmovmskpd r13d,ymm1
|
|
+ vmovmskpd r12d,ymm2
|
|
+ vmovmskpd r11d,ymm13
|
|
+ vmovmskpd r10d,ymm14
|
|
+
|
|
+
|
|
+ vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm1,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm2,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm14,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm0
|
|
+ vmovmskpd r8d,ymm1
|
|
+ vmovmskpd ebx,ymm2
|
|
+ vmovmskpd ecx,ymm13
|
|
+ vmovmskpd edx,ymm14
|
|
+
|
|
+
|
|
+
|
|
+ shl r13b,4
|
|
+ or r14b,r13b
|
|
+ shl r11b,4
|
|
+ or r12b,r11b
|
|
+
|
|
+ add r14b,r14b
|
|
+ adc r12b,r12b
|
|
+ adc r10b,r10b
|
|
+
|
|
+ shl r8b,4
|
|
+ or r9b,r8b
|
|
+ shl cl,4
|
|
+ or bl,cl
|
|
+
|
|
+ add r14b,r9b
|
|
+ adc r12b,bl
|
|
+ adc r10b,dl
|
|
+
|
|
+ xor r14b,r9b
|
|
+ xor r12b,bl
|
|
+ xor r10b,dl
|
|
+
|
|
+ lea rdx,[$L$kmasklut]
|
|
+
|
|
+ mov r13b,r14b
|
|
+ and r14,0xf
|
|
+ vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ shl r14,5
|
|
+ vmovapd r14,(%rdx), %ymm2
|
|
+ vblendvpd ymm3,ymm3,ymm0,ymm2
|
|
+
|
|
+ shr r13b,4
|
|
+ and r13,0xf
|
|
+ vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4]
|
|
+ shl r13,5
|
|
+ vmovapd r13,(%rdx), %ymm2
|
|
+ vblendvpd ymm5,ymm5,ymm0,ymm2
|
|
+
|
|
+ mov r11b,r12b
|
|
+ and r12,0xf
|
|
+ vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4]
|
|
+ shl r12,5
|
|
+ vmovapd r12,(%rdx), %ymm2
|
|
+ vblendvpd ymm6,ymm6,ymm0,ymm2
|
|
+
|
|
+ shr r11b,4
|
|
+ and r11,0xf
|
|
+ vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4]
|
|
+ shl r11,5
|
|
+ vmovapd r11,(%rdx), %ymm2
|
|
+ vblendvpd ymm7,ymm7,ymm0,ymm2
|
|
+
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm8,ymm8,ymm0,ymm2
|
|
+
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vmovq xmm0,r15
|
|
+ vpbroadcastq ymm0,xmm0
|
|
+ vpblendd ymm4,ymm4,ymm0,3
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq ymm0,ymm4,52
|
|
+ vpsrlq ymm1,ymm9,52
|
|
+ vpsrlq ymm2,ymm10,52
|
|
+ vpsrlq ymm13,ymm11,52
|
|
+ vpsrlq ymm14,ymm12,52
|
|
+
|
|
+
|
|
+ vpermq ymm14,ymm14,144
|
|
+ vpermq ymm15,ymm13,3
|
|
+ vblendpd ymm14,ymm14,ymm15,1
|
|
+
|
|
+ vpermq ymm13,ymm13,144
|
|
+ vpermq ymm15,ymm2,3
|
|
+ vblendpd ymm13,ymm13,ymm15,1
|
|
+
|
|
+ vpermq ymm2,ymm2,144
|
|
+ vpermq ymm15,ymm1,3
|
|
+ vblendpd ymm2,ymm2,ymm15,1
|
|
+
|
|
+ vpermq ymm1,ymm1,144
|
|
+ vpermq ymm15,ymm0,3
|
|
+ vblendpd ymm1,ymm1,ymm15,1
|
|
+
|
|
+ vpermq ymm0,ymm0,144
|
|
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
|
|
+
|
|
+
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
|
|
+
|
|
+
|
|
+ vpaddq ymm4,ymm4,ymm0
|
|
+ vpaddq ymm9,ymm9,ymm1
|
|
+ vpaddq ymm10,ymm10,ymm2
|
|
+ vpaddq ymm11,ymm11,ymm13
|
|
+ vpaddq ymm12,ymm12,ymm14
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq ymm0,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm1,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm2,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm14,ymm12,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r14d,ymm0
|
|
+ vmovmskpd r13d,ymm1
|
|
+ vmovmskpd r12d,ymm2
|
|
+ vmovmskpd r11d,ymm13
|
|
+ vmovmskpd r10d,ymm14
|
|
+
|
|
+
|
|
+ vpcmpeqq ymm0,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm1,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm2,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm14,ymm12,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm0
|
|
+ vmovmskpd r8d,ymm1
|
|
+ vmovmskpd ebx,ymm2
|
|
+ vmovmskpd ecx,ymm13
|
|
+ vmovmskpd edx,ymm14
|
|
+
|
|
+
|
|
+
|
|
+ shl r13b,4
|
|
+ or r14b,r13b
|
|
+ shl r11b,4
|
|
+ or r12b,r11b
|
|
+
|
|
+ add r14b,r14b
|
|
+ adc r12b,r12b
|
|
+ adc r10b,r10b
|
|
+
|
|
+ shl r8b,4
|
|
+ or r9b,r8b
|
|
+ shl cl,4
|
|
+ or bl,cl
|
|
+
|
|
+ add r14b,r9b
|
|
+ adc r12b,bl
|
|
+ adc r10b,dl
|
|
+
|
|
+ xor r14b,r9b
|
|
+ xor r12b,bl
|
|
+ xor r10b,dl
|
|
+
|
|
+ lea rdx,[$L$kmasklut]
|
|
+
|
|
+ mov r13b,r14b
|
|
+ and r14,0xf
|
|
+ vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4]
|
|
+ shl r14,5
|
|
+ vmovapd r14,(%rdx), %ymm2
|
|
+ vblendvpd ymm4,ymm4,ymm0,ymm2
|
|
+
|
|
+ shr r13b,4
|
|
+ and r13,0xf
|
|
+ vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4]
|
|
+ shl r13,5
|
|
+ vmovapd r13,(%rdx), %ymm2
|
|
+ vblendvpd ymm9,ymm9,ymm0,ymm2
|
|
+
|
|
+ mov r11b,r12b
|
|
+ and r12,0xf
|
|
+ vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4]
|
|
+ shl r12,5
|
|
+ vmovapd r12,(%rdx), %ymm2
|
|
+ vblendvpd ymm10,ymm10,ymm0,ymm2
|
|
+
|
|
+ shr r11b,4
|
|
+ and r11,0xf
|
|
+ vpsubq ymm0,ymm11,YMMWORD[$L$mask52x4]
|
|
+ shl r11,5
|
|
+ vmovapd r11,(%rdx), %ymm2
|
|
+ vblendvpd ymm11,ymm11,ymm0,ymm2
|
|
+
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm12,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm12,ymm12,ymm0,ymm2
|
|
+
|
|
+
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vmovdqu YMMWORD[rdi],ymm3
|
|
+ vmovdqu YMMWORD[32+rdi],ymm5
|
|
+ vmovdqu YMMWORD[64+rdi],ymm6
|
|
+ vmovdqu YMMWORD[96+rdi],ymm7
|
|
+ vmovdqu YMMWORD[128+rdi],ymm8
|
|
+
|
|
+ vmovdqu YMMWORD[160+rdi],ymm4
|
|
+ vmovdqu YMMWORD[192+rdi],ymm9
|
|
+ vmovdqu YMMWORD[224+rdi],ymm10
|
|
+ vmovdqu YMMWORD[256+rdi],ymm11
|
|
+ vmovdqu YMMWORD[288+rdi],ymm12
|
|
+
|
|
+ vzeroupper
|
|
+ mov r15,QWORD[rsp]
|
|
+
|
|
+ mov r14,QWORD[8+rsp]
|
|
+
|
|
+ mov r13,QWORD[16+rsp]
|
|
+
|
|
+ mov r12,QWORD[24+rsp]
|
|
+
|
|
+ mov rbp,QWORD[32+rsp]
|
|
+
|
|
+ mov rbx,QWORD[40+rsp]
|
|
+
|
|
+ lea rsp,[48+rsp]
|
|
+
|
|
+$L$ossl_rsaz_amm52x20_x2_avxifma256_epilogue:
|
|
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
+ mov rsi,QWORD[16+rsp]
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+$L$SEH_end_ossl_rsaz_amm52x20_x2_avxifma256:
|
|
+section .text code align=64
|
|
+
|
|
+
|
|
+ALIGN 32
|
|
+global ossl_extract_multiplier_2x20_win5_avx
|
|
+
|
|
+ossl_extract_multiplier_2x20_win5_avx:
|
|
+
|
|
+DB 243,15,30,250
|
|
+ vmovapd ymm14,YMMWORD[$L$ones]
|
|
+ vmovq xmm10,r8
|
|
+ vpbroadcastq ymm12,xmm10
|
|
+ vmovq xmm10,r9
|
|
+ vpbroadcastq ymm13,xmm10
|
|
+ lea rax,[10240+rdx]
|
|
+
|
|
+
|
|
+ vpxor xmm0,xmm0,xmm0
|
|
+ vmovapd ymm11,ymm0
|
|
+ vmovapd ymm1,ymm0
|
|
+ vmovapd ymm2,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm4,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+ vmovapd ymm8,ymm0
|
|
+ vmovapd ymm9,ymm0
|
|
+
|
|
+ALIGN 32
|
|
+$L$loop:
|
|
+ vpcmpeqq ymm15,ymm12,ymm11
|
|
+ vmovdqu ymm10,YMMWORD[rdx]
|
|
+ vblendvpd ymm0,ymm0,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[32+rdx]
|
|
+ vblendvpd ymm1,ymm1,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[64+rdx]
|
|
+ vblendvpd ymm2,ymm2,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[96+rdx]
|
|
+ vblendvpd ymm3,ymm3,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[128+rdx]
|
|
+ vblendvpd ymm4,ymm4,ymm10,ymm15
|
|
+ vpcmpeqq ymm15,ymm13,ymm11
|
|
+ vmovdqu ymm10,YMMWORD[160+rdx]
|
|
+ vblendvpd ymm5,ymm5,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[192+rdx]
|
|
+ vblendvpd ymm6,ymm6,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[224+rdx]
|
|
+ vblendvpd ymm7,ymm7,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[256+rdx]
|
|
+ vblendvpd ymm8,ymm8,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[288+rdx]
|
|
+ vblendvpd ymm9,ymm9,ymm10,ymm15
|
|
+ vpaddq ymm11,ymm11,ymm14
|
|
+ add rdx,320
|
|
+ cmp rax,rdx
|
|
+ jne NEAR $L$loop
|
|
+ vmovdqu YMMWORD[rcx],ymm0
|
|
+ vmovdqu YMMWORD[32+rcx],ymm1
|
|
+ vmovdqu YMMWORD[64+rcx],ymm2
|
|
+ vmovdqu YMMWORD[96+rcx],ymm3
|
|
+ vmovdqu YMMWORD[128+rcx],ymm4
|
|
+ vmovdqu YMMWORD[160+rcx],ymm5
|
|
+ vmovdqu YMMWORD[192+rcx],ymm6
|
|
+ vmovdqu YMMWORD[224+rcx],ymm7
|
|
+ vmovdqu YMMWORD[256+rcx],ymm8
|
|
+ vmovdqu YMMWORD[288+rcx],ymm9
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+
|
|
+section .rdata rdata align=32
|
|
+ALIGN 32
|
|
+$L$ones:
|
|
+ DQ 1,1,1,1
|
|
+$L$zeros:
|
|
+ DQ 0,0,0,0
|
|
+EXTERN __imp_RtlVirtualUnwind
|
|
+
|
|
+ALIGN 16
|
|
+rsaz_def_handler:
|
|
+ push rsi
|
|
+ push rdi
|
|
+ push rbx
|
|
+ push rbp
|
|
+ push r12
|
|
+ push r13
|
|
+ push r14
|
|
+ push r15
|
|
+ pushfq
|
|
+ sub rsp,64
|
|
+
|
|
+ mov rax,QWORD[120+r8]
|
|
+ mov rbx,QWORD[248+r8]
|
|
+
|
|
+ mov rsi,QWORD[8+r9]
|
|
+ mov r11,QWORD[56+r9]
|
|
+
|
|
+ mov r10d,DWORD[r11]
|
|
+ lea r10,[r10*1+rsi]
|
|
+ cmp rbx,r10
|
|
+ jb NEAR $L$common_seh_tail
|
|
+
|
|
+ mov rax,QWORD[152+r8]
|
|
+
|
|
+ mov r10d,DWORD[4+r11]
|
|
+ lea r10,[r10*1+rsi]
|
|
+ cmp rbx,r10
|
|
+ jae NEAR $L$common_seh_tail
|
|
+
|
|
+ lea rax,[48+rax]
|
|
+
|
|
+ mov rbx,QWORD[((-8))+rax]
|
|
+ mov rbp,QWORD[((-16))+rax]
|
|
+ mov r12,QWORD[((-24))+rax]
|
|
+ mov r13,QWORD[((-32))+rax]
|
|
+ mov r14,QWORD[((-40))+rax]
|
|
+ mov r15,QWORD[((-48))+rax]
|
|
+ mov QWORD[144+r8],rbx
|
|
+ mov QWORD[160+r8],rbp
|
|
+ mov QWORD[216+r8],r12
|
|
+ mov QWORD[224+r8],r13
|
|
+ mov QWORD[232+r8],r14
|
|
+ mov QWORD[240+r8],r15
|
|
+
|
|
+$L$common_seh_tail:
|
|
+ mov rdi,QWORD[8+rax]
|
|
+ mov rsi,QWORD[16+rax]
|
|
+ mov QWORD[152+r8],rax
|
|
+ mov QWORD[168+r8],rsi
|
|
+ mov QWORD[176+r8],rdi
|
|
+
|
|
+ mov rdi,QWORD[40+r9]
|
|
+ mov rsi,r8
|
|
+ mov ecx,154
|
|
+ DD 0xa548f3fc
|
|
+
|
|
+ mov rsi,r9
|
|
+ xor rcx,rcx
|
|
+ mov rdx,QWORD[8+rsi]
|
|
+ mov r8,QWORD[rsi]
|
|
+ mov r9,QWORD[16+rsi]
|
|
+ mov r10,QWORD[40+rsi]
|
|
+ lea r11,[56+rsi]
|
|
+ lea r12,[24+rsi]
|
|
+ mov QWORD[32+rsp],r10
|
|
+ mov QWORD[40+rsp],r11
|
|
+ mov QWORD[48+rsp],r12
|
|
+ mov QWORD[56+rsp],rcx
|
|
+ call QWORD[__imp_RtlVirtualUnwind]
|
|
+
|
|
+ mov eax,1
|
|
+ add rsp,64
|
|
+ popfq
|
|
+ pop r15
|
|
+ pop r14
|
|
+ pop r13
|
|
+ pop r12
|
|
+ pop rbp
|
|
+ pop rbx
|
|
+ pop rdi
|
|
+ pop rsi
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+
|
|
+section .pdata rdata align=4
|
|
+ALIGN 4
|
|
+ DD $L$SEH_begin_ossl_rsaz_amm52x20_x1_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_end_ossl_rsaz_amm52x20_x1_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_info_ossl_rsaz_amm52x20_x1_avxifma256 wrt ..imagebase
|
|
+
|
|
+ DD $L$SEH_begin_ossl_rsaz_amm52x20_x2_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_end_ossl_rsaz_amm52x20_x2_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_info_ossl_rsaz_amm52x20_x2_avxifma256 wrt ..imagebase
|
|
+
|
|
+section .xdata rdata align=8
|
|
+ALIGN 8
|
|
+$L$SEH_info_ossl_rsaz_amm52x20_x1_avxifma256:
|
|
+DB 9,0,0,0
|
|
+ DD rsaz_def_handler wrt ..imagebase
|
|
+ DD $L$ossl_rsaz_amm52x20_x1_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x20_x1_avxifma256_epilogue wrt ..imagebase
|
|
+$L$SEH_info_ossl_rsaz_amm52x20_x2_avxifma256:
|
|
+DB 9,0,0,0
|
|
+ DD rsaz_def_handler wrt ..imagebase
|
|
+ DD $L$ossl_rsaz_amm52x20_x2_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x20_x2_avxifma256_epilogue wrt ..imagebase
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm
|
|
new file mode 100644
|
|
index 0000000000..c9e1700b25
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm
|
|
@@ -0,0 +1,1927 @@
|
|
+default rel
|
|
+%define XMMWORD
|
|
+%define YMMWORD
|
|
+%define ZMMWORD
|
|
+section .text code align=64
|
|
+
|
|
+
|
|
+global ossl_rsaz_amm52x30_x1_avxifma256
|
|
+
|
|
+ALIGN 32
|
|
+ossl_rsaz_amm52x30_x1_avxifma256:
|
|
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
+ mov QWORD[16+rsp],rsi
|
|
+ mov rax,rsp
|
|
+$L$SEH_begin_ossl_rsaz_amm52x30_x1_avxifma256:
|
|
+ mov rdi,rcx
|
|
+ mov rsi,rdx
|
|
+ mov rdx,r8
|
|
+ mov rcx,r9
|
|
+ mov r8,QWORD[40+rsp]
|
|
+
|
|
+
|
|
+
|
|
+DB 243,15,30,250
|
|
+ push rbx
|
|
+
|
|
+ push rbp
|
|
+
|
|
+ push r12
|
|
+
|
|
+ push r13
|
|
+
|
|
+ push r14
|
|
+
|
|
+ push r15
|
|
+
|
|
+ lea rsp,[((-168))+rsp]
|
|
+ vmovapd XMMWORD[rsp],xmm6
|
|
+ vmovapd XMMWORD[16+rsp],xmm7
|
|
+ vmovapd XMMWORD[32+rsp],xmm8
|
|
+ vmovapd XMMWORD[48+rsp],xmm9
|
|
+ vmovapd XMMWORD[64+rsp],xmm10
|
|
+ vmovapd XMMWORD[80+rsp],xmm11
|
|
+ vmovapd XMMWORD[96+rsp],xmm12
|
|
+ vmovapd XMMWORD[112+rsp],xmm13
|
|
+ vmovapd XMMWORD[128+rsp],xmm14
|
|
+ vmovapd XMMWORD[144+rsp],xmm15
|
|
+$L$ossl_rsaz_amm52x30_x1_avxifma256_body:
|
|
+
|
|
+ vpxor ymm0,ymm0,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm4,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+ vmovapd ymm8,ymm0
|
|
+ vmovapd ymm9,ymm0
|
|
+ vmovapd ymm10,ymm0
|
|
+
|
|
+ xor r9d,r9d
|
|
+
|
|
+ mov r11,rdx
|
|
+ mov rax,0xfffffffffffff
|
|
+
|
|
+
|
|
+ mov ebx,7
|
|
+
|
|
+ALIGN 32
|
|
+$L$loop7:
|
|
+ mov r13,QWORD[r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-264))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ mov QWORD[256+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ lea rsp,[264+rsp]
|
|
+ mov r13,QWORD[8+r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[8+r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-264))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ mov QWORD[256+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ lea rsp,[264+rsp]
|
|
+ mov r13,QWORD[16+r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[16+r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-264))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ mov QWORD[256+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ lea rsp,[264+rsp]
|
|
+ mov r13,QWORD[24+r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[24+r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-264))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ mov QWORD[256+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ lea rsp,[264+rsp]
|
|
+ lea r11,[32+r11]
|
|
+ dec ebx
|
|
+ jne NEAR $L$loop7
|
|
+ mov r13,QWORD[r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-264))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ mov QWORD[256+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ lea rsp,[264+rsp]
|
|
+ mov r13,QWORD[8+r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[8+r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-264))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ mov QWORD[256+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ lea rsp,[264+rsp]
|
|
+
|
|
+ vmovq xmm0,r9
|
|
+ vpbroadcastq ymm0,xmm0
|
|
+ vpblendd ymm3,ymm3,ymm0,3
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq ymm0,ymm3,52
|
|
+ vpsrlq ymm1,ymm4,52
|
|
+ vpsrlq ymm2,ymm5,52
|
|
+ vpsrlq ymm11,ymm6,52
|
|
+ vpsrlq ymm12,ymm7,52
|
|
+ vpsrlq ymm13,ymm8,52
|
|
+ vpsrlq ymm14,ymm9,52
|
|
+ vpsrlq ymm15,ymm10,52
|
|
+
|
|
+ lea rsp,[((-32))+rsp]
|
|
+ vmovupd YMMWORD[rsp],ymm3
|
|
+
|
|
+
|
|
+ vpermq ymm15,ymm15,144
|
|
+ vpermq ymm3,ymm14,3
|
|
+ vblendpd ymm15,ymm15,ymm3,1
|
|
+
|
|
+ vpermq ymm14,ymm14,144
|
|
+ vpermq ymm3,ymm13,3
|
|
+ vblendpd ymm14,ymm14,ymm3,1
|
|
+
|
|
+ vpermq ymm13,ymm13,144
|
|
+ vpermq ymm3,ymm12,3
|
|
+ vblendpd ymm13,ymm13,ymm3,1
|
|
+
|
|
+ vpermq ymm12,ymm12,144
|
|
+ vpermq ymm3,ymm11,3
|
|
+ vblendpd ymm12,ymm12,ymm3,1
|
|
+
|
|
+ vpermq ymm11,ymm11,144
|
|
+ vpermq ymm3,ymm2,3
|
|
+ vblendpd ymm11,ymm11,ymm3,1
|
|
+
|
|
+ vpermq ymm2,ymm2,144
|
|
+ vpermq ymm3,ymm1,3
|
|
+ vblendpd ymm2,ymm2,ymm3,1
|
|
+
|
|
+ vpermq ymm1,ymm1,144
|
|
+ vpermq ymm3,ymm0,3
|
|
+ vblendpd ymm1,ymm1,ymm3,1
|
|
+
|
|
+ vpermq ymm0,ymm0,144
|
|
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
|
|
+
|
|
+ vmovupd ymm3,YMMWORD[rsp]
|
|
+ lea rsp,[32+rsp]
|
|
+
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+
|
|
+
|
|
+ vpaddq ymm3,ymm3,ymm0
|
|
+ vpaddq ymm4,ymm4,ymm1
|
|
+ vpaddq ymm5,ymm5,ymm2
|
|
+ vpaddq ymm6,ymm6,ymm11
|
|
+ vpaddq ymm7,ymm7,ymm12
|
|
+ vpaddq ymm8,ymm8,ymm13
|
|
+ vpaddq ymm9,ymm9,ymm14
|
|
+ vpaddq ymm10,ymm10,ymm15
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm1,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r14d,ymm0
|
|
+ vmovmskpd r13d,ymm1
|
|
+ shl r13b,4
|
|
+ or r14b,r13b
|
|
+
|
|
+ vpcmpgtq ymm2,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm11,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r13d,ymm2
|
|
+ vmovmskpd r12d,ymm11
|
|
+ shl r12b,4
|
|
+ or r13b,r12b
|
|
+
|
|
+ vpcmpgtq ymm12,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r12d,ymm12
|
|
+ vmovmskpd r11d,ymm13
|
|
+ shl r11b,4
|
|
+ or r12b,r11b
|
|
+
|
|
+ vpcmpgtq ymm14,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm15,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r11d,ymm14
|
|
+ vmovmskpd r10d,ymm15
|
|
+ shl r10b,4
|
|
+ or r11b,r10b
|
|
+
|
|
+ add r14b,r14b
|
|
+ adc r13b,r13b
|
|
+ adc r12b,r12b
|
|
+ adc r11b,r11b
|
|
+
|
|
+
|
|
+ vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm1,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm0
|
|
+ vmovmskpd r8d,ymm1
|
|
+ shl r8b,4
|
|
+ or r9b,r8b
|
|
+
|
|
+ vpcmpeqq ymm2,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm11,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r8d,ymm2
|
|
+ vmovmskpd edx,ymm11
|
|
+ shl dl,4
|
|
+ or r8b,dl
|
|
+
|
|
+ vpcmpeqq ymm12,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd edx,ymm12
|
|
+ vmovmskpd ecx,ymm13
|
|
+ shl cl,4
|
|
+ or dl,cl
|
|
+
|
|
+ vpcmpeqq ymm14,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm15,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ecx,ymm14
|
|
+ vmovmskpd ebx,ymm15
|
|
+ shl bl,4
|
|
+ or cl,bl
|
|
+
|
|
+ add r14b,r9b
|
|
+ adc r13b,r8b
|
|
+ adc r12b,dl
|
|
+ adc r11b,cl
|
|
+
|
|
+ xor r14b,r9b
|
|
+ xor r13b,r8b
|
|
+ xor r12b,dl
|
|
+ xor r11b,cl
|
|
+
|
|
+ lea rdx,[$L$kmasklut]
|
|
+
|
|
+ mov r10b,r14b
|
|
+ and r14,0xf
|
|
+ vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ shl r14,5
|
|
+ vmovapd r14,(%rdx), %ymm2
|
|
+ vblendvpd ymm3,ymm3,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm4,ymm4,ymm0,ymm2
|
|
+
|
|
+ mov r10b,r13b
|
|
+ and r13,0xf
|
|
+ vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4]
|
|
+ shl r13,5
|
|
+ vmovapd r13,(%rdx), %ymm2
|
|
+ vblendvpd ymm5,ymm5,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm6,ymm6,ymm0,ymm2
|
|
+
|
|
+ mov r10b,r12b
|
|
+ and r12,0xf
|
|
+ vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4]
|
|
+ shl r12,5
|
|
+ vmovapd r12,(%rdx), %ymm2
|
|
+ vblendvpd ymm7,ymm7,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm8,ymm8,ymm0,ymm2
|
|
+
|
|
+ mov r10b,r11b
|
|
+ and r11,0xf
|
|
+ vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4]
|
|
+ shl r11,5
|
|
+ vmovapd r11,(%rdx), %ymm2
|
|
+ vblendvpd ymm9,ymm9,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm10,ymm10,ymm0,ymm2
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vmovdqu YMMWORD[rdi],ymm3
|
|
+ vmovdqu YMMWORD[32+rdi],ymm4
|
|
+ vmovdqu YMMWORD[64+rdi],ymm5
|
|
+ vmovdqu YMMWORD[96+rdi],ymm6
|
|
+ vmovdqu YMMWORD[128+rdi],ymm7
|
|
+ vmovdqu YMMWORD[160+rdi],ymm8
|
|
+ vmovdqu YMMWORD[192+rdi],ymm9
|
|
+ vmovdqu YMMWORD[224+rdi],ymm10
|
|
+
|
|
+ vzeroupper
|
|
+ lea rax,[rsp]
|
|
+
|
|
+ vmovapd xmm6,XMMWORD[rax]
|
|
+ vmovapd xmm7,XMMWORD[16+rax]
|
|
+ vmovapd xmm8,XMMWORD[32+rax]
|
|
+ vmovapd xmm9,XMMWORD[48+rax]
|
|
+ vmovapd xmm10,XMMWORD[64+rax]
|
|
+ vmovapd xmm11,XMMWORD[80+rax]
|
|
+ vmovapd xmm12,XMMWORD[96+rax]
|
|
+ vmovapd xmm13,XMMWORD[112+rax]
|
|
+ vmovapd xmm14,XMMWORD[128+rax]
|
|
+ vmovapd xmm15,XMMWORD[144+rax]
|
|
+ lea rax,[168+rsp]
|
|
+ mov r15,QWORD[rax]
|
|
+
|
|
+ mov r14,QWORD[8+rax]
|
|
+
|
|
+ mov r13,QWORD[16+rax]
|
|
+
|
|
+ mov r12,QWORD[24+rax]
|
|
+
|
|
+ mov rbp,QWORD[32+rax]
|
|
+
|
|
+ mov rbx,QWORD[40+rax]
|
|
+
|
|
+ lea rsp,[48+rax]
|
|
+
|
|
+$L$ossl_rsaz_amm52x30_x1_avxifma256_epilogue:
|
|
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
+ mov rsi,QWORD[16+rsp]
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+$L$SEH_end_ossl_rsaz_amm52x30_x1_avxifma256:
|
|
+section .rdata rdata align=32
|
|
+ALIGN 32
|
|
+$L$mask52x4:
|
|
+ DQ 0xfffffffffffff
|
|
+ DQ 0xfffffffffffff
|
|
+ DQ 0xfffffffffffff
|
|
+ DQ 0xfffffffffffff
|
|
+$L$high64x3:
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+$L$kmasklut:
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+section .text code align=64
|
|
+
|
|
+
|
|
+global ossl_rsaz_amm52x30_x2_avxifma256
|
|
+
|
|
+ALIGN 32
|
|
+ossl_rsaz_amm52x30_x2_avxifma256:
|
|
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
+ mov QWORD[16+rsp],rsi
|
|
+ mov rax,rsp
|
|
+$L$SEH_begin_ossl_rsaz_amm52x30_x2_avxifma256:
|
|
+ mov rdi,rcx
|
|
+ mov rsi,rdx
|
|
+ mov rdx,r8
|
|
+ mov rcx,r9
|
|
+ mov r8,QWORD[40+rsp]
|
|
+
|
|
+
|
|
+
|
|
+DB 243,15,30,250
|
|
+ push rbx
|
|
+
|
|
+ push rbp
|
|
+
|
|
+ push r12
|
|
+
|
|
+ push r13
|
|
+
|
|
+ push r14
|
|
+
|
|
+ push r15
|
|
+
|
|
+ lea rsp,[((-168))+rsp]
|
|
+ vmovapd XMMWORD[rsp],xmm6
|
|
+ vmovapd XMMWORD[16+rsp],xmm7
|
|
+ vmovapd XMMWORD[32+rsp],xmm8
|
|
+ vmovapd XMMWORD[48+rsp],xmm9
|
|
+ vmovapd XMMWORD[64+rsp],xmm10
|
|
+ vmovapd XMMWORD[80+rsp],xmm11
|
|
+ vmovapd XMMWORD[96+rsp],xmm12
|
|
+ vmovapd XMMWORD[112+rsp],xmm13
|
|
+ vmovapd XMMWORD[128+rsp],xmm14
|
|
+ vmovapd XMMWORD[144+rsp],xmm15
|
|
+$L$ossl_rsaz_amm52x30_x2_avxifma256_body:
|
|
+
|
|
+ vpxor ymm0,ymm0,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm4,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+ vmovapd ymm8,ymm0
|
|
+ vmovapd ymm9,ymm0
|
|
+ vmovapd ymm10,ymm0
|
|
+
|
|
+ xor r9d,r9d
|
|
+
|
|
+ mov r11,rdx
|
|
+ mov rax,0xfffffffffffff
|
|
+
|
|
+ mov ebx,30
|
|
+
|
|
+ALIGN 32
|
|
+$L$loop30:
|
|
+ mov r13,QWORD[r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,QWORD[r8]
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-264))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ mov QWORD[256+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ lea rsp,[264+rsp]
|
|
+ lea r11,[8+r11]
|
|
+ dec ebx
|
|
+ jne NEAR $L$loop30
|
|
+
|
|
+ push r11
|
|
+ push rsi
|
|
+ push rcx
|
|
+ push r8
|
|
+
|
|
+ vmovq xmm0,r9
|
|
+ vpbroadcastq ymm0,xmm0
|
|
+ vpblendd ymm3,ymm3,ymm0,3
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq ymm0,ymm3,52
|
|
+ vpsrlq ymm1,ymm4,52
|
|
+ vpsrlq ymm2,ymm5,52
|
|
+ vpsrlq ymm11,ymm6,52
|
|
+ vpsrlq ymm12,ymm7,52
|
|
+ vpsrlq ymm13,ymm8,52
|
|
+ vpsrlq ymm14,ymm9,52
|
|
+ vpsrlq ymm15,ymm10,52
|
|
+
|
|
+ lea rsp,[((-32))+rsp]
|
|
+ vmovupd YMMWORD[rsp],ymm3
|
|
+
|
|
+
|
|
+ vpermq ymm15,ymm15,144
|
|
+ vpermq ymm3,ymm14,3
|
|
+ vblendpd ymm15,ymm15,ymm3,1
|
|
+
|
|
+ vpermq ymm14,ymm14,144
|
|
+ vpermq ymm3,ymm13,3
|
|
+ vblendpd ymm14,ymm14,ymm3,1
|
|
+
|
|
+ vpermq ymm13,ymm13,144
|
|
+ vpermq ymm3,ymm12,3
|
|
+ vblendpd ymm13,ymm13,ymm3,1
|
|
+
|
|
+ vpermq ymm12,ymm12,144
|
|
+ vpermq ymm3,ymm11,3
|
|
+ vblendpd ymm12,ymm12,ymm3,1
|
|
+
|
|
+ vpermq ymm11,ymm11,144
|
|
+ vpermq ymm3,ymm2,3
|
|
+ vblendpd ymm11,ymm11,ymm3,1
|
|
+
|
|
+ vpermq ymm2,ymm2,144
|
|
+ vpermq ymm3,ymm1,3
|
|
+ vblendpd ymm2,ymm2,ymm3,1
|
|
+
|
|
+ vpermq ymm1,ymm1,144
|
|
+ vpermq ymm3,ymm0,3
|
|
+ vblendpd ymm1,ymm1,ymm3,1
|
|
+
|
|
+ vpermq ymm0,ymm0,144
|
|
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
|
|
+
|
|
+ vmovupd ymm3,YMMWORD[rsp]
|
|
+ lea rsp,[32+rsp]
|
|
+
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+
|
|
+
|
|
+ vpaddq ymm3,ymm3,ymm0
|
|
+ vpaddq ymm4,ymm4,ymm1
|
|
+ vpaddq ymm5,ymm5,ymm2
|
|
+ vpaddq ymm6,ymm6,ymm11
|
|
+ vpaddq ymm7,ymm7,ymm12
|
|
+ vpaddq ymm8,ymm8,ymm13
|
|
+ vpaddq ymm9,ymm9,ymm14
|
|
+ vpaddq ymm10,ymm10,ymm15
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm1,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r14d,ymm0
|
|
+ vmovmskpd r13d,ymm1
|
|
+ shl r13b,4
|
|
+ or r14b,r13b
|
|
+
|
|
+ vpcmpgtq ymm2,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm11,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r13d,ymm2
|
|
+ vmovmskpd r12d,ymm11
|
|
+ shl r12b,4
|
|
+ or r13b,r12b
|
|
+
|
|
+ vpcmpgtq ymm12,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r12d,ymm12
|
|
+ vmovmskpd r11d,ymm13
|
|
+ shl r11b,4
|
|
+ or r12b,r11b
|
|
+
|
|
+ vpcmpgtq ymm14,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm15,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r11d,ymm14
|
|
+ vmovmskpd r10d,ymm15
|
|
+ shl r10b,4
|
|
+ or r11b,r10b
|
|
+
|
|
+ add r14b,r14b
|
|
+ adc r13b,r13b
|
|
+ adc r12b,r12b
|
|
+ adc r11b,r11b
|
|
+
|
|
+
|
|
+ vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm1,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm0
|
|
+ vmovmskpd r8d,ymm1
|
|
+ shl r8b,4
|
|
+ or r9b,r8b
|
|
+
|
|
+ vpcmpeqq ymm2,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm11,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r8d,ymm2
|
|
+ vmovmskpd edx,ymm11
|
|
+ shl dl,4
|
|
+ or r8b,dl
|
|
+
|
|
+ vpcmpeqq ymm12,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd edx,ymm12
|
|
+ vmovmskpd ecx,ymm13
|
|
+ shl cl,4
|
|
+ or dl,cl
|
|
+
|
|
+ vpcmpeqq ymm14,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm15,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ecx,ymm14
|
|
+ vmovmskpd ebx,ymm15
|
|
+ shl bl,4
|
|
+ or cl,bl
|
|
+
|
|
+ add r14b,r9b
|
|
+ adc r13b,r8b
|
|
+ adc r12b,dl
|
|
+ adc r11b,cl
|
|
+
|
|
+ xor r14b,r9b
|
|
+ xor r13b,r8b
|
|
+ xor r12b,dl
|
|
+ xor r11b,cl
|
|
+
|
|
+ lea rdx,[$L$kmasklut]
|
|
+
|
|
+ mov r10b,r14b
|
|
+ and r14,0xf
|
|
+ vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ shl r14,5
|
|
+ vmovapd r14,(%rdx), %ymm2
|
|
+ vblendvpd ymm3,ymm3,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm4,ymm4,ymm0,ymm2
|
|
+
|
|
+ mov r10b,r13b
|
|
+ and r13,0xf
|
|
+ vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4]
|
|
+ shl r13,5
|
|
+ vmovapd r13,(%rdx), %ymm2
|
|
+ vblendvpd ymm5,ymm5,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm6,ymm6,ymm0,ymm2
|
|
+
|
|
+ mov r10b,r12b
|
|
+ and r12,0xf
|
|
+ vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4]
|
|
+ shl r12,5
|
|
+ vmovapd r12,(%rdx), %ymm2
|
|
+ vblendvpd ymm7,ymm7,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm8,ymm8,ymm0,ymm2
|
|
+
|
|
+ mov r10b,r11b
|
|
+ and r11,0xf
|
|
+ vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4]
|
|
+ shl r11,5
|
|
+ vmovapd r11,(%rdx), %ymm2
|
|
+ vblendvpd ymm9,ymm9,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm10,ymm10,ymm0,ymm2
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+ pop r8
|
|
+ pop rcx
|
|
+ pop rsi
|
|
+ pop r11
|
|
+
|
|
+ vmovdqu YMMWORD[rdi],ymm3
|
|
+ vmovdqu YMMWORD[32+rdi],ymm4
|
|
+ vmovdqu YMMWORD[64+rdi],ymm5
|
|
+ vmovdqu YMMWORD[96+rdi],ymm6
|
|
+ vmovdqu YMMWORD[128+rdi],ymm7
|
|
+ vmovdqu YMMWORD[160+rdi],ymm8
|
|
+ vmovdqu YMMWORD[192+rdi],ymm9
|
|
+ vmovdqu YMMWORD[224+rdi],ymm10
|
|
+
|
|
+ xor r15d,r15d
|
|
+
|
|
+ lea r11,[16+r11]
|
|
+ mov rax,0xfffffffffffff
|
|
+
|
|
+ mov ebx,30
|
|
+
|
|
+ vpxor ymm0,ymm0,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm4,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+ vmovapd ymm8,ymm0
|
|
+ vmovapd ymm9,ymm0
|
|
+ vmovapd ymm10,ymm0
|
|
+ALIGN 32
|
|
+$L$loop40:
|
|
+ mov r13,QWORD[r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[r11]
|
|
+ mov rdx,QWORD[256+rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,QWORD[8+r8]
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[256+rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-264))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 320(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 352(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 384(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 416(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 448(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 480(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 320(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 352(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 384(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 416(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 448(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 480(%rcx), %ymm2, %ymm10
|
|
+
|
|
+
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ mov QWORD[256+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 320(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 352(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 384(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 416(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 448(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 480(%rsi), %ymm1, %ymm10
|
|
+
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 320(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 352(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 384(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 416(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 448(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 480(%rcx), %ymm2, %ymm10
|
|
+
|
|
+ lea rsp,[264+rsp]
|
|
+ lea r11,[8+r11]
|
|
+ dec ebx
|
|
+ jne NEAR $L$loop40
|
|
+
|
|
+ vmovq xmm0,r9
|
|
+ vpbroadcastq ymm0,xmm0
|
|
+ vpblendd ymm3,ymm3,ymm0,3
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq ymm0,ymm3,52
|
|
+ vpsrlq ymm1,ymm4,52
|
|
+ vpsrlq ymm2,ymm5,52
|
|
+ vpsrlq ymm11,ymm6,52
|
|
+ vpsrlq ymm12,ymm7,52
|
|
+ vpsrlq ymm13,ymm8,52
|
|
+ vpsrlq ymm14,ymm9,52
|
|
+ vpsrlq ymm15,ymm10,52
|
|
+
|
|
+ lea rsp,[((-32))+rsp]
|
|
+ vmovupd YMMWORD[rsp],ymm3
|
|
+
|
|
+
|
|
+ vpermq ymm15,ymm15,144
|
|
+ vpermq ymm3,ymm14,3
|
|
+ vblendpd ymm15,ymm15,ymm3,1
|
|
+
|
|
+ vpermq ymm14,ymm14,144
|
|
+ vpermq ymm3,ymm13,3
|
|
+ vblendpd ymm14,ymm14,ymm3,1
|
|
+
|
|
+ vpermq ymm13,ymm13,144
|
|
+ vpermq ymm3,ymm12,3
|
|
+ vblendpd ymm13,ymm13,ymm3,1
|
|
+
|
|
+ vpermq ymm12,ymm12,144
|
|
+ vpermq ymm3,ymm11,3
|
|
+ vblendpd ymm12,ymm12,ymm3,1
|
|
+
|
|
+ vpermq ymm11,ymm11,144
|
|
+ vpermq ymm3,ymm2,3
|
|
+ vblendpd ymm11,ymm11,ymm3,1
|
|
+
|
|
+ vpermq ymm2,ymm2,144
|
|
+ vpermq ymm3,ymm1,3
|
|
+ vblendpd ymm2,ymm2,ymm3,1
|
|
+
|
|
+ vpermq ymm1,ymm1,144
|
|
+ vpermq ymm3,ymm0,3
|
|
+ vblendpd ymm1,ymm1,ymm3,1
|
|
+
|
|
+ vpermq ymm0,ymm0,144
|
|
+ vpand ymm0,ymm0,YMMWORD[$L$high64x3]
|
|
+
|
|
+ vmovupd ymm3,YMMWORD[rsp]
|
|
+ lea rsp,[32+rsp]
|
|
+
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+
|
|
+
|
|
+ vpaddq ymm3,ymm3,ymm0
|
|
+ vpaddq ymm4,ymm4,ymm1
|
|
+ vpaddq ymm5,ymm5,ymm2
|
|
+ vpaddq ymm6,ymm6,ymm11
|
|
+ vpaddq ymm7,ymm7,ymm12
|
|
+ vpaddq ymm8,ymm8,ymm13
|
|
+ vpaddq ymm9,ymm9,ymm14
|
|
+ vpaddq ymm10,ymm10,ymm15
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm1,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r14d,ymm0
|
|
+ vmovmskpd r13d,ymm1
|
|
+ shl r13b,4
|
|
+ or r14b,r13b
|
|
+
|
|
+ vpcmpgtq ymm2,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm11,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r13d,ymm2
|
|
+ vmovmskpd r12d,ymm11
|
|
+ shl r12b,4
|
|
+ or r13b,r12b
|
|
+
|
|
+ vpcmpgtq ymm12,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r12d,ymm12
|
|
+ vmovmskpd r11d,ymm13
|
|
+ shl r11b,4
|
|
+ or r12b,r11b
|
|
+
|
|
+ vpcmpgtq ymm14,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpcmpgtq ymm15,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r11d,ymm14
|
|
+ vmovmskpd r10d,ymm15
|
|
+ shl r10b,4
|
|
+ or r11b,r10b
|
|
+
|
|
+ add r14b,r14b
|
|
+ adc r13b,r13b
|
|
+ adc r12b,r12b
|
|
+ adc r11b,r11b
|
|
+
|
|
+
|
|
+ vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm1,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm0
|
|
+ vmovmskpd r8d,ymm1
|
|
+ shl r8b,4
|
|
+ or r9b,r8b
|
|
+
|
|
+ vpcmpeqq ymm2,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm11,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r8d,ymm2
|
|
+ vmovmskpd edx,ymm11
|
|
+ shl dl,4
|
|
+ or r8b,dl
|
|
+
|
|
+ vpcmpeqq ymm12,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd edx,ymm12
|
|
+ vmovmskpd ecx,ymm13
|
|
+ shl cl,4
|
|
+ or dl,cl
|
|
+
|
|
+ vpcmpeqq ymm14,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpcmpeqq ymm15,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ecx,ymm14
|
|
+ vmovmskpd ebx,ymm15
|
|
+ shl bl,4
|
|
+ or cl,bl
|
|
+
|
|
+ add r14b,r9b
|
|
+ adc r13b,r8b
|
|
+ adc r12b,dl
|
|
+ adc r11b,cl
|
|
+
|
|
+ xor r14b,r9b
|
|
+ xor r13b,r8b
|
|
+ xor r12b,dl
|
|
+ xor r11b,cl
|
|
+
|
|
+ lea rdx,[$L$kmasklut]
|
|
+
|
|
+ mov r10b,r14b
|
|
+ and r14,0xf
|
|
+ vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4]
|
|
+ shl r14,5
|
|
+ vmovapd r14,(%rdx), %ymm2
|
|
+ vblendvpd ymm3,ymm3,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm4,ymm4,ymm0,ymm2
|
|
+
|
|
+ mov r10b,r13b
|
|
+ and r13,0xf
|
|
+ vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4]
|
|
+ shl r13,5
|
|
+ vmovapd r13,(%rdx), %ymm2
|
|
+ vblendvpd ymm5,ymm5,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm6,ymm6,ymm0,ymm2
|
|
+
|
|
+ mov r10b,r12b
|
|
+ and r12,0xf
|
|
+ vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4]
|
|
+ shl r12,5
|
|
+ vmovapd r12,(%rdx), %ymm2
|
|
+ vblendvpd ymm7,ymm7,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm8,ymm8,ymm0,ymm2
|
|
+
|
|
+ mov r10b,r11b
|
|
+ and r11,0xf
|
|
+ vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4]
|
|
+ shl r11,5
|
|
+ vmovapd r11,(%rdx), %ymm2
|
|
+ vblendvpd ymm9,ymm9,ymm0,ymm2
|
|
+
|
|
+ shr r10b,4
|
|
+ and r10,0xf
|
|
+ vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%rdx), %ymm2
|
|
+ vblendvpd ymm10,ymm10,ymm0,ymm2
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vmovdqu YMMWORD[256+rdi],ymm3
|
|
+ vmovdqu YMMWORD[288+rdi],ymm4
|
|
+ vmovdqu YMMWORD[320+rdi],ymm5
|
|
+ vmovdqu YMMWORD[352+rdi],ymm6
|
|
+ vmovdqu YMMWORD[384+rdi],ymm7
|
|
+ vmovdqu YMMWORD[416+rdi],ymm8
|
|
+ vmovdqu YMMWORD[448+rdi],ymm9
|
|
+ vmovdqu YMMWORD[480+rdi],ymm10
|
|
+
|
|
+ vzeroupper
|
|
+ lea rax,[rsp]
|
|
+
|
|
+ vmovapd xmm6,XMMWORD[rax]
|
|
+ vmovapd xmm7,XMMWORD[16+rax]
|
|
+ vmovapd xmm8,XMMWORD[32+rax]
|
|
+ vmovapd xmm9,XMMWORD[48+rax]
|
|
+ vmovapd xmm10,XMMWORD[64+rax]
|
|
+ vmovapd xmm11,XMMWORD[80+rax]
|
|
+ vmovapd xmm12,XMMWORD[96+rax]
|
|
+ vmovapd xmm13,XMMWORD[112+rax]
|
|
+ vmovapd xmm14,XMMWORD[128+rax]
|
|
+ vmovapd xmm15,XMMWORD[144+rax]
|
|
+ lea rax,[168+rsp]
|
|
+ mov r15,QWORD[rax]
|
|
+
|
|
+ mov r14,QWORD[8+rax]
|
|
+
|
|
+ mov r13,QWORD[16+rax]
|
|
+
|
|
+ mov r12,QWORD[24+rax]
|
|
+
|
|
+ mov rbp,QWORD[32+rax]
|
|
+
|
|
+ mov rbx,QWORD[40+rax]
|
|
+
|
|
+ lea rsp,[48+rax]
|
|
+
|
|
+$L$ossl_rsaz_amm52x30_x2_avxifma256_epilogue:
|
|
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
+ mov rsi,QWORD[16+rsp]
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+$L$SEH_end_ossl_rsaz_amm52x30_x2_avxifma256:
|
|
+section .text code align=64
|
|
+
|
|
+
|
|
+ALIGN 32
|
|
+global ossl_extract_multiplier_2x30_win5_avx
|
|
+
|
|
+ossl_extract_multiplier_2x30_win5_avx:
|
|
+
|
|
+DB 243,15,30,250
|
|
+ vmovapd ymm12,YMMWORD[$L$ones]
|
|
+ vmovq xmm8,r8
|
|
+ vpbroadcastq ymm10,xmm8
|
|
+ vmovq xmm8,r9
|
|
+ vpbroadcastq ymm11,xmm8
|
|
+ lea rax,[16384+rdx]
|
|
+
|
|
+
|
|
+ vpxor xmm0,xmm0,xmm0
|
|
+ vmovapd ymm9,ymm0
|
|
+ vmovapd ymm1,ymm0
|
|
+ vmovapd ymm2,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm4,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+
|
|
+ALIGN 32
|
|
+$L$loop:
|
|
+ vpcmpeqq ymm13,ymm10,ymm9
|
|
+ vmovdqu ymm8,YMMWORD[rdx]
|
|
+
|
|
+ vblendvpd ymm0,ymm0,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[32+rdx]
|
|
+
|
|
+ vblendvpd ymm1,ymm1,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[64+rdx]
|
|
+
|
|
+ vblendvpd ymm2,ymm2,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[96+rdx]
|
|
+
|
|
+ vblendvpd ymm3,ymm3,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[128+rdx]
|
|
+
|
|
+ vblendvpd ymm4,ymm4,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[160+rdx]
|
|
+
|
|
+ vblendvpd ymm5,ymm5,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[192+rdx]
|
|
+
|
|
+ vblendvpd ymm6,ymm6,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[224+rdx]
|
|
+
|
|
+ vblendvpd ymm7,ymm7,ymm8,ymm13
|
|
+ vpaddq ymm9,ymm9,ymm12
|
|
+ add rdx,512
|
|
+ cmp rax,rdx
|
|
+ jne NEAR $L$loop
|
|
+ vmovdqu YMMWORD[rcx],ymm0
|
|
+ vmovdqu YMMWORD[32+rcx],ymm1
|
|
+ vmovdqu YMMWORD[64+rcx],ymm2
|
|
+ vmovdqu YMMWORD[96+rcx],ymm3
|
|
+ vmovdqu YMMWORD[128+rcx],ymm4
|
|
+ vmovdqu YMMWORD[160+rcx],ymm5
|
|
+ vmovdqu YMMWORD[192+rcx],ymm6
|
|
+ vmovdqu YMMWORD[224+rcx],ymm7
|
|
+ lea rdx,[((-16384))+rax]
|
|
+
|
|
+
|
|
+ vpxor xmm0,xmm0,xmm0
|
|
+ vmovapd ymm9,ymm0
|
|
+ vmovapd ymm0,ymm0
|
|
+ vmovapd ymm1,ymm0
|
|
+ vmovapd ymm2,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm4,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+
|
|
+ALIGN 32
|
|
+$L$loop_8_15:
|
|
+ vpcmpeqq ymm13,ymm11,ymm9
|
|
+ vmovdqu ymm8,YMMWORD[256+rdx]
|
|
+
|
|
+ vblendvpd ymm0,ymm0,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[288+rdx]
|
|
+
|
|
+ vblendvpd ymm1,ymm1,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[320+rdx]
|
|
+
|
|
+ vblendvpd ymm2,ymm2,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[352+rdx]
|
|
+
|
|
+ vblendvpd ymm3,ymm3,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[384+rdx]
|
|
+
|
|
+ vblendvpd ymm4,ymm4,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[416+rdx]
|
|
+
|
|
+ vblendvpd ymm5,ymm5,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[448+rdx]
|
|
+
|
|
+ vblendvpd ymm6,ymm6,ymm8,ymm13
|
|
+ vmovdqu ymm8,YMMWORD[480+rdx]
|
|
+
|
|
+ vblendvpd ymm7,ymm7,ymm8,ymm13
|
|
+ vpaddq ymm9,ymm9,ymm12
|
|
+ add rdx,512
|
|
+ cmp rax,rdx
|
|
+ jne NEAR $L$loop_8_15
|
|
+ vmovdqu YMMWORD[256+rcx],ymm0
|
|
+ vmovdqu YMMWORD[288+rcx],ymm1
|
|
+ vmovdqu YMMWORD[320+rcx],ymm2
|
|
+ vmovdqu YMMWORD[352+rcx],ymm3
|
|
+ vmovdqu YMMWORD[384+rcx],ymm4
|
|
+ vmovdqu YMMWORD[416+rcx],ymm5
|
|
+ vmovdqu YMMWORD[448+rcx],ymm6
|
|
+ vmovdqu YMMWORD[480+rcx],ymm7
|
|
+
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+
|
|
+section .rdata rdata align=32
|
|
+ALIGN 32
|
|
+$L$ones:
|
|
+ DQ 1,1,1,1
|
|
+$L$zeros:
|
|
+ DQ 0,0,0,0
|
|
+EXTERN __imp_RtlVirtualUnwind
|
|
+
|
|
+ALIGN 16
|
|
+rsaz_avx_handler:
|
|
+ push rsi
|
|
+ push rdi
|
|
+ push rbx
|
|
+ push rbp
|
|
+ push r12
|
|
+ push r13
|
|
+ push r14
|
|
+ push r15
|
|
+ pushfq
|
|
+ sub rsp,64
|
|
+
|
|
+ mov rax,QWORD[120+r8]
|
|
+ mov rbx,QWORD[248+r8]
|
|
+
|
|
+ mov rsi,QWORD[8+r9]
|
|
+ mov r11,QWORD[56+r9]
|
|
+
|
|
+ mov r10d,DWORD[r11]
|
|
+ lea r10,[r10*1+rsi]
|
|
+ cmp rbx,r10
|
|
+ jb NEAR $L$common_seh_tail
|
|
+
|
|
+ mov r10d,DWORD[4+r11]
|
|
+ lea r10,[r10*1+rsi]
|
|
+ cmp rbx,r10
|
|
+ jae NEAR $L$common_seh_tail
|
|
+
|
|
+ mov rax,QWORD[152+r8]
|
|
+
|
|
+ lea rsi,[rax]
|
|
+ lea rdi,[512+r8]
|
|
+ mov ecx,20
|
|
+ DD 0xa548f3fc
|
|
+
|
|
+ lea rax,[216+rax]
|
|
+
|
|
+ mov rbx,QWORD[((-8))+rax]
|
|
+ mov rbp,QWORD[((-16))+rax]
|
|
+ mov r12,QWORD[((-24))+rax]
|
|
+ mov r13,QWORD[((-32))+rax]
|
|
+ mov r14,QWORD[((-40))+rax]
|
|
+ mov r15,QWORD[((-48))+rax]
|
|
+ mov QWORD[144+r8],rbx
|
|
+ mov QWORD[160+r8],rbp
|
|
+ mov QWORD[216+r8],r12
|
|
+ mov QWORD[224+r8],r13
|
|
+ mov QWORD[232+r8],r14
|
|
+ mov QWORD[240+r8],r15
|
|
+
|
|
+$L$common_seh_tail:
|
|
+ mov rdi,QWORD[8+rax]
|
|
+ mov rsi,QWORD[16+rax]
|
|
+ mov QWORD[152+r8],rax
|
|
+ mov QWORD[168+r8],rsi
|
|
+ mov QWORD[176+r8],rdi
|
|
+
|
|
+ mov rdi,QWORD[40+r9]
|
|
+ mov rsi,r8
|
|
+ mov ecx,154
|
|
+ DD 0xa548f3fc
|
|
+
|
|
+ mov rsi,r9
|
|
+ xor rcx,rcx
|
|
+ mov rdx,QWORD[8+rsi]
|
|
+ mov r8,QWORD[rsi]
|
|
+ mov r9,QWORD[16+rsi]
|
|
+ mov r10,QWORD[40+rsi]
|
|
+ lea r11,[56+rsi]
|
|
+ lea r12,[24+rsi]
|
|
+ mov QWORD[32+rsp],r10
|
|
+ mov QWORD[40+rsp],r11
|
|
+ mov QWORD[48+rsp],r12
|
|
+ mov QWORD[56+rsp],rcx
|
|
+ call QWORD[__imp_RtlVirtualUnwind]
|
|
+
|
|
+ mov eax,1
|
|
+ add rsp,64
|
|
+ popfq
|
|
+ pop r15
|
|
+ pop r14
|
|
+ pop r13
|
|
+ pop r12
|
|
+ pop rbp
|
|
+ pop rbx
|
|
+ pop rdi
|
|
+ pop rsi
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+
|
|
+section .pdata rdata align=4
|
|
+ALIGN 4
|
|
+ DD $L$SEH_begin_ossl_rsaz_amm52x30_x1_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_end_ossl_rsaz_amm52x30_x1_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_info_ossl_rsaz_amm52x30_x1_avxifma256 wrt ..imagebase
|
|
+
|
|
+ DD $L$SEH_begin_ossl_rsaz_amm52x30_x2_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_end_ossl_rsaz_amm52x30_x2_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_info_ossl_rsaz_amm52x30_x2_avxifma256 wrt ..imagebase
|
|
+
|
|
+section .xdata rdata align=8
|
|
+ALIGN 8
|
|
+$L$SEH_info_ossl_rsaz_amm52x30_x1_avxifma256:
|
|
+DB 9,0,0,0
|
|
+ DD rsaz_avx_handler wrt ..imagebase
|
|
+ DD $L$ossl_rsaz_amm52x30_x1_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x30_x1_avxifma256_epilogue wrt ..imagebase
|
|
+$L$SEH_info_ossl_rsaz_amm52x30_x2_avxifma256:
|
|
+DB 9,0,0,0
|
|
+ DD rsaz_avx_handler wrt ..imagebase
|
|
+ DD $L$ossl_rsaz_amm52x30_x2_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x30_x2_avxifma256_epilogue wrt ..imagebase
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm
|
|
new file mode 100644
|
|
index 0000000000..ec91662d3b
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm
|
|
@@ -0,0 +1,2081 @@
|
|
+default rel
|
|
+%define XMMWORD
|
|
+%define YMMWORD
|
|
+%define ZMMWORD
|
|
+section .text code align=64
|
|
+
|
|
+
|
|
+global ossl_rsaz_amm52x40_x1_avxifma256
|
|
+
|
|
+ALIGN 32
|
|
+ossl_rsaz_amm52x40_x1_avxifma256:
|
|
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
+ mov QWORD[16+rsp],rsi
|
|
+ mov rax,rsp
|
|
+$L$SEH_begin_ossl_rsaz_amm52x40_x1_avxifma256:
|
|
+ mov rdi,rcx
|
|
+ mov rsi,rdx
|
|
+ mov rdx,r8
|
|
+ mov rcx,r9
|
|
+ mov r8,QWORD[40+rsp]
|
|
+
|
|
+
|
|
+
|
|
+DB 243,15,30,250
|
|
+ push rbx
|
|
+
|
|
+ push rbp
|
|
+
|
|
+ push r12
|
|
+
|
|
+ push r13
|
|
+
|
|
+ push r14
|
|
+
|
|
+ push r15
|
|
+
|
|
+ lea rsp,[((-168))+rsp]
|
|
+ vmovapd XMMWORD[rsp],xmm6
|
|
+ vmovapd XMMWORD[16+rsp],xmm7
|
|
+ vmovapd XMMWORD[32+rsp],xmm8
|
|
+ vmovapd XMMWORD[48+rsp],xmm9
|
|
+ vmovapd XMMWORD[64+rsp],xmm10
|
|
+ vmovapd XMMWORD[80+rsp],xmm11
|
|
+ vmovapd XMMWORD[96+rsp],xmm12
|
|
+ vmovapd XMMWORD[112+rsp],xmm13
|
|
+ vmovapd XMMWORD[128+rsp],xmm14
|
|
+ vmovapd XMMWORD[144+rsp],xmm15
|
|
+$L$ossl_rsaz_amm52x40_x1_avxifma256_body:
|
|
+
|
|
+ vpxor ymm0,ymm0,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm4,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+ vmovapd ymm8,ymm0
|
|
+ vmovapd ymm9,ymm0
|
|
+ vmovapd ymm10,ymm0
|
|
+ vmovapd ymm11,ymm0
|
|
+ vmovapd ymm12,ymm0
|
|
+
|
|
+ xor r9d,r9d
|
|
+
|
|
+ mov r11,rdx
|
|
+ mov rax,0xfffffffffffff
|
|
+
|
|
+
|
|
+ mov ebx,10
|
|
+
|
|
+ALIGN 32
|
|
+$L$loop10:
|
|
+ mov r13,QWORD[r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-328))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ vmovdqu YMMWORD[256+rsp],ymm11
|
|
+ vmovdqu YMMWORD[288+rsp],ymm12
|
|
+ mov QWORD[320+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+ vmovdqu ymm11,YMMWORD[264+rsp]
|
|
+ vmovdqu ymm12,YMMWORD[296+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ lea rsp,[328+rsp]
|
|
+ mov r13,QWORD[8+r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[8+r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-328))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ vmovdqu YMMWORD[256+rsp],ymm11
|
|
+ vmovdqu YMMWORD[288+rsp],ymm12
|
|
+ mov QWORD[320+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+ vmovdqu ymm11,YMMWORD[264+rsp]
|
|
+ vmovdqu ymm12,YMMWORD[296+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ lea rsp,[328+rsp]
|
|
+ mov r13,QWORD[16+r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[16+r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-328))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ vmovdqu YMMWORD[256+rsp],ymm11
|
|
+ vmovdqu YMMWORD[288+rsp],ymm12
|
|
+ mov QWORD[320+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+ vmovdqu ymm11,YMMWORD[264+rsp]
|
|
+ vmovdqu ymm12,YMMWORD[296+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ lea rsp,[328+rsp]
|
|
+ mov r13,QWORD[24+r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[24+r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,r8
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-328))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ vmovdqu YMMWORD[256+rsp],ymm11
|
|
+ vmovdqu YMMWORD[288+rsp],ymm12
|
|
+ mov QWORD[320+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+ vmovdqu ymm11,YMMWORD[264+rsp]
|
|
+ vmovdqu ymm12,YMMWORD[296+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ lea rsp,[328+rsp]
|
|
+ lea r11,[32+r11]
|
|
+ dec ebx
|
|
+ jne NEAR $L$loop10
|
|
+
|
|
+ vmovq xmm0,r9
|
|
+ vpbroadcastq ymm0,xmm0
|
|
+ vpblendd ymm3,ymm3,ymm0,3
|
|
+
|
|
+ lea rsp,[((-640))+rsp]
|
|
+ vmovupd YMMWORD[rsp],ymm3
|
|
+ vmovupd YMMWORD[32+rsp],ymm4
|
|
+ vmovupd YMMWORD[64+rsp],ymm5
|
|
+ vmovupd YMMWORD[96+rsp],ymm6
|
|
+ vmovupd YMMWORD[128+rsp],ymm7
|
|
+ vmovupd YMMWORD[160+rsp],ymm8
|
|
+ vmovupd YMMWORD[192+rsp],ymm9
|
|
+ vmovupd YMMWORD[224+rsp],ymm10
|
|
+ vmovupd YMMWORD[256+rsp],ymm11
|
|
+ vmovupd YMMWORD[288+rsp],ymm12
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq ymm3,ymm3,52
|
|
+ vpsrlq ymm4,ymm4,52
|
|
+ vpsrlq ymm5,ymm5,52
|
|
+ vpsrlq ymm6,ymm6,52
|
|
+ vpsrlq ymm7,ymm7,52
|
|
+ vpsrlq ymm8,ymm8,52
|
|
+ vpsrlq ymm9,ymm9,52
|
|
+ vpsrlq ymm10,ymm10,52
|
|
+ vpsrlq ymm11,ymm11,52
|
|
+ vpsrlq ymm12,ymm12,52
|
|
+
|
|
+
|
|
+ vpermq ymm12,ymm12,144
|
|
+ vpermq ymm13,ymm11,3
|
|
+ vblendpd ymm12,ymm12,ymm13,1
|
|
+
|
|
+ vpermq ymm11,ymm11,144
|
|
+ vpermq ymm13,ymm10,3
|
|
+ vblendpd ymm11,ymm11,ymm13,1
|
|
+
|
|
+ vpermq ymm10,ymm10,144
|
|
+ vpermq ymm13,ymm9,3
|
|
+ vblendpd ymm10,ymm10,ymm13,1
|
|
+
|
|
+ vpermq ymm9,ymm9,144
|
|
+ vpermq ymm13,ymm8,3
|
|
+ vblendpd ymm9,ymm9,ymm13,1
|
|
+
|
|
+ vpermq ymm8,ymm8,144
|
|
+ vpermq ymm13,ymm7,3
|
|
+ vblendpd ymm8,ymm8,ymm13,1
|
|
+
|
|
+ vpermq ymm7,ymm7,144
|
|
+ vpermq ymm13,ymm6,3
|
|
+ vblendpd ymm7,ymm7,ymm13,1
|
|
+
|
|
+ vpermq ymm6,ymm6,144
|
|
+ vpermq ymm13,ymm5,3
|
|
+ vblendpd ymm6,ymm6,ymm13,1
|
|
+
|
|
+ vpermq ymm5,ymm5,144
|
|
+ vpermq ymm13,ymm4,3
|
|
+ vblendpd ymm5,ymm5,ymm13,1
|
|
+
|
|
+ vpermq ymm4,ymm4,144
|
|
+ vpermq ymm13,ymm3,3
|
|
+ vblendpd ymm4,ymm4,ymm13,1
|
|
+
|
|
+ vpermq ymm3,ymm3,144
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$high64x3]
|
|
+
|
|
+ vmovupd YMMWORD[320+rsp],ymm3
|
|
+ vmovupd YMMWORD[352+rsp],ymm4
|
|
+ vmovupd YMMWORD[384+rsp],ymm5
|
|
+ vmovupd YMMWORD[416+rsp],ymm6
|
|
+ vmovupd YMMWORD[448+rsp],ymm7
|
|
+ vmovupd YMMWORD[480+rsp],ymm8
|
|
+ vmovupd YMMWORD[512+rsp],ymm9
|
|
+ vmovupd YMMWORD[544+rsp],ymm10
|
|
+ vmovupd YMMWORD[576+rsp],ymm11
|
|
+ vmovupd YMMWORD[608+rsp],ymm12
|
|
+
|
|
+ vmovupd ymm3,YMMWORD[rsp]
|
|
+ vmovupd ymm4,YMMWORD[32+rsp]
|
|
+ vmovupd ymm5,YMMWORD[64+rsp]
|
|
+ vmovupd ymm6,YMMWORD[96+rsp]
|
|
+ vmovupd ymm7,YMMWORD[128+rsp]
|
|
+ vmovupd ymm8,YMMWORD[160+rsp]
|
|
+ vmovupd ymm9,YMMWORD[192+rsp]
|
|
+ vmovupd ymm10,YMMWORD[224+rsp]
|
|
+ vmovupd ymm11,YMMWORD[256+rsp]
|
|
+ vmovupd ymm12,YMMWORD[288+rsp]
|
|
+
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
|
|
+
|
|
+
|
|
+ vpaddq ymm3,ymm3,YMMWORD[320+rsp]
|
|
+ vpaddq ymm4,ymm4,YMMWORD[352+rsp]
|
|
+ vpaddq ymm5,ymm5,YMMWORD[384+rsp]
|
|
+ vpaddq ymm6,ymm6,YMMWORD[416+rsp]
|
|
+ vpaddq ymm7,ymm7,YMMWORD[448+rsp]
|
|
+ vpaddq ymm8,ymm8,YMMWORD[480+rsp]
|
|
+ vpaddq ymm9,ymm9,YMMWORD[512+rsp]
|
|
+ vpaddq ymm10,ymm10,YMMWORD[544+rsp]
|
|
+ vpaddq ymm11,ymm11,YMMWORD[576+rsp]
|
|
+ vpaddq ymm12,ymm12,YMMWORD[608+rsp]
|
|
+
|
|
+ lea rsp,[640+rsp]
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq ymm13,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r14d,ymm13
|
|
+ vpcmpgtq ymm13,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r13d,ymm13
|
|
+ shl r13b,4
|
|
+ or r14b,r13b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r13d,ymm13
|
|
+ vpcmpgtq ymm13,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r12d,ymm13
|
|
+ shl r12b,4
|
|
+ or r13b,r12b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r12d,ymm13
|
|
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r11d,ymm13
|
|
+ shl r11b,4
|
|
+ or r12b,r11b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r11d,ymm13
|
|
+ vpcmpgtq ymm13,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r10d,ymm13
|
|
+ shl r10b,4
|
|
+ or r11b,r10b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r10d,ymm13
|
|
+ vpcmpgtq ymm13,ymm12,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm13
|
|
+ shl r9b,4
|
|
+ or r10b,r9b
|
|
+
|
|
+ add r14b,r14b
|
|
+ adc r13b,r13b
|
|
+ adc r12b,r12b
|
|
+ adc r11b,r11b
|
|
+ adc r10b,r10b
|
|
+
|
|
+
|
|
+ vpcmpeqq ymm13,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm13
|
|
+ vpcmpeqq ymm13,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r8d,ymm13
|
|
+ shl r8b,4
|
|
+ or r9b,r8b
|
|
+
|
|
+ vpcmpeqq ymm13,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r8d,ymm13
|
|
+ vpcmpeqq ymm13,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd edx,ymm13
|
|
+ shl dl,4
|
|
+ or r8b,dl
|
|
+
|
|
+ vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd edx,ymm13
|
|
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ecx,ymm13
|
|
+ shl cl,4
|
|
+ or dl,cl
|
|
+
|
|
+ vpcmpeqq ymm13,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ecx,ymm13
|
|
+ vpcmpeqq ymm13,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ebx,ymm13
|
|
+ shl bl,4
|
|
+ or cl,bl
|
|
+
|
|
+ vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ebx,ymm13
|
|
+ vpcmpeqq ymm13,ymm12,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd eax,ymm13
|
|
+ shl al,4
|
|
+ or bl,al
|
|
+
|
|
+ add r14b,r9b
|
|
+ adc r13b,r8b
|
|
+ adc r12b,dl
|
|
+ adc r11b,cl
|
|
+ adc r10b,bl
|
|
+
|
|
+ xor r14b,r9b
|
|
+ xor r13b,r8b
|
|
+ xor r12b,dl
|
|
+ xor r11b,cl
|
|
+ xor r10b,bl
|
|
+
|
|
+ push r9
|
|
+ push r8
|
|
+
|
|
+ lea r8,[$L$kmasklut]
|
|
+
|
|
+ mov r9b,r14b
|
|
+ and r14,0xf
|
|
+ vpsubq ymm13,ymm3,YMMWORD[$L$mask52x4]
|
|
+ shl r14,5
|
|
+ vmovapd r14,(%r8), %ymm14
|
|
+ vblendvpd ymm3,ymm3,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm4,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm4,ymm4,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r13b
|
|
+ and r13,0xf
|
|
+ vpsubq ymm13,ymm5,YMMWORD[$L$mask52x4]
|
|
+ shl r13,5
|
|
+ vmovapd r13,(%r8), %ymm14
|
|
+ vblendvpd ymm5,ymm5,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm6,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm6,ymm6,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r12b
|
|
+ and r12,0xf
|
|
+ vpsubq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ shl r12,5
|
|
+ vmovapd r12,(%r8), %ymm14
|
|
+ vblendvpd ymm7,ymm7,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm8,ymm8,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r11b
|
|
+ and r11,0xf
|
|
+ vpsubq ymm13,ymm9,YMMWORD[$L$mask52x4]
|
|
+ shl r11,5
|
|
+ vmovapd r11,(%r8), %ymm14
|
|
+ vblendvpd ymm9,ymm9,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm10,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm10,ymm10,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r10b
|
|
+ and r10,0xf
|
|
+ vpsubq ymm13,ymm11,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%r8), %ymm14
|
|
+ vblendvpd ymm11,ymm11,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm12,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm12,ymm12,ymm13,ymm14
|
|
+
|
|
+ pop r8
|
|
+ pop r9
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vmovdqu YMMWORD[rdi],ymm3
|
|
+ vmovdqu YMMWORD[32+rdi],ymm4
|
|
+ vmovdqu YMMWORD[64+rdi],ymm5
|
|
+ vmovdqu YMMWORD[96+rdi],ymm6
|
|
+ vmovdqu YMMWORD[128+rdi],ymm7
|
|
+ vmovdqu YMMWORD[160+rdi],ymm8
|
|
+ vmovdqu YMMWORD[192+rdi],ymm9
|
|
+ vmovdqu YMMWORD[224+rdi],ymm10
|
|
+ vmovdqu YMMWORD[256+rdi],ymm11
|
|
+ vmovdqu YMMWORD[288+rdi],ymm12
|
|
+
|
|
+ vzeroupper
|
|
+ lea rax,[rsp]
|
|
+
|
|
+ vmovapd xmm6,XMMWORD[rax]
|
|
+ vmovapd xmm7,XMMWORD[16+rax]
|
|
+ vmovapd xmm8,XMMWORD[32+rax]
|
|
+ vmovapd xmm9,XMMWORD[48+rax]
|
|
+ vmovapd xmm10,XMMWORD[64+rax]
|
|
+ vmovapd xmm11,XMMWORD[80+rax]
|
|
+ vmovapd xmm12,XMMWORD[96+rax]
|
|
+ vmovapd xmm13,XMMWORD[112+rax]
|
|
+ vmovapd xmm14,XMMWORD[128+rax]
|
|
+ vmovapd xmm15,XMMWORD[144+rax]
|
|
+ lea rax,[168+rsp]
|
|
+ mov r15,QWORD[rax]
|
|
+
|
|
+ mov r14,QWORD[8+rax]
|
|
+
|
|
+ mov r13,QWORD[16+rax]
|
|
+
|
|
+ mov r12,QWORD[24+rax]
|
|
+
|
|
+ mov rbp,QWORD[32+rax]
|
|
+
|
|
+ mov rbx,QWORD[40+rax]
|
|
+
|
|
+ lea rsp,[48+rax]
|
|
+
|
|
+$L$ossl_rsaz_amm52x40_x1_avxifma256_epilogue:
|
|
+
|
|
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
+ mov rsi,QWORD[16+rsp]
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+$L$SEH_end_ossl_rsaz_amm52x40_x1_avxifma256:
|
|
+section .rdata rdata align=32
|
|
+ALIGN 32
|
|
+$L$mask52x4:
|
|
+ DQ 0xfffffffffffff
|
|
+ DQ 0xfffffffffffff
|
|
+ DQ 0xfffffffffffff
|
|
+ DQ 0xfffffffffffff
|
|
+$L$high64x3:
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+$L$kmasklut:
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0x0
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+ DQ 0xffffffffffffffff
|
|
+section .text code align=64
|
|
+
|
|
+
|
|
+global ossl_rsaz_amm52x40_x2_avxifma256
|
|
+
|
|
+ALIGN 32
|
|
+ossl_rsaz_amm52x40_x2_avxifma256:
|
|
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
+ mov QWORD[16+rsp],rsi
|
|
+ mov rax,rsp
|
|
+$L$SEH_begin_ossl_rsaz_amm52x40_x2_avxifma256:
|
|
+ mov rdi,rcx
|
|
+ mov rsi,rdx
|
|
+ mov rdx,r8
|
|
+ mov rcx,r9
|
|
+ mov r8,QWORD[40+rsp]
|
|
+
|
|
+
|
|
+
|
|
+DB 243,15,30,250
|
|
+ push rbx
|
|
+
|
|
+ push rbp
|
|
+
|
|
+ push r12
|
|
+
|
|
+ push r13
|
|
+
|
|
+ push r14
|
|
+
|
|
+ push r15
|
|
+
|
|
+ lea rsp,[((-168))+rsp]
|
|
+ vmovapd XMMWORD[rsp],xmm6
|
|
+ vmovapd XMMWORD[16+rsp],xmm7
|
|
+ vmovapd XMMWORD[32+rsp],xmm8
|
|
+ vmovapd XMMWORD[48+rsp],xmm9
|
|
+ vmovapd XMMWORD[64+rsp],xmm10
|
|
+ vmovapd XMMWORD[80+rsp],xmm11
|
|
+ vmovapd XMMWORD[96+rsp],xmm12
|
|
+ vmovapd XMMWORD[112+rsp],xmm13
|
|
+ vmovapd XMMWORD[128+rsp],xmm14
|
|
+ vmovapd XMMWORD[144+rsp],xmm15
|
|
+$L$ossl_rsaz_amm52x40_x2_avxifma256_body:
|
|
+
|
|
+ vpxor ymm0,ymm0,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm4,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+ vmovapd ymm8,ymm0
|
|
+ vmovapd ymm9,ymm0
|
|
+ vmovapd ymm10,ymm0
|
|
+ vmovapd ymm11,ymm0
|
|
+ vmovapd ymm12,ymm0
|
|
+
|
|
+ xor r9d,r9d
|
|
+
|
|
+ mov r11,rdx
|
|
+ mov rax,0xfffffffffffff
|
|
+
|
|
+ mov ebx,40
|
|
+
|
|
+ALIGN 32
|
|
+$L$loop40:
|
|
+ mov r13,QWORD[r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[r11]
|
|
+ mov rdx,QWORD[rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,QWORD[r8]
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-328))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ vmovdqu YMMWORD[256+rsp],ymm11
|
|
+ vmovdqu YMMWORD[288+rsp],ymm12
|
|
+ mov QWORD[320+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+ vmovdqu ymm11,YMMWORD[264+rsp]
|
|
+ vmovdqu ymm12,YMMWORD[296+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12
|
|
+ lea rsp,[328+rsp]
|
|
+ lea r11,[8+r11]
|
|
+ dec ebx
|
|
+ jne NEAR $L$loop40
|
|
+
|
|
+ push r11
|
|
+ push rsi
|
|
+ push rcx
|
|
+ push r8
|
|
+
|
|
+ vmovq xmm0,r9
|
|
+ vpbroadcastq ymm0,xmm0
|
|
+ vpblendd ymm3,ymm3,ymm0,3
|
|
+
|
|
+ lea rsp,[((-640))+rsp]
|
|
+ vmovupd YMMWORD[rsp],ymm3
|
|
+ vmovupd YMMWORD[32+rsp],ymm4
|
|
+ vmovupd YMMWORD[64+rsp],ymm5
|
|
+ vmovupd YMMWORD[96+rsp],ymm6
|
|
+ vmovupd YMMWORD[128+rsp],ymm7
|
|
+ vmovupd YMMWORD[160+rsp],ymm8
|
|
+ vmovupd YMMWORD[192+rsp],ymm9
|
|
+ vmovupd YMMWORD[224+rsp],ymm10
|
|
+ vmovupd YMMWORD[256+rsp],ymm11
|
|
+ vmovupd YMMWORD[288+rsp],ymm12
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq ymm3,ymm3,52
|
|
+ vpsrlq ymm4,ymm4,52
|
|
+ vpsrlq ymm5,ymm5,52
|
|
+ vpsrlq ymm6,ymm6,52
|
|
+ vpsrlq ymm7,ymm7,52
|
|
+ vpsrlq ymm8,ymm8,52
|
|
+ vpsrlq ymm9,ymm9,52
|
|
+ vpsrlq ymm10,ymm10,52
|
|
+ vpsrlq ymm11,ymm11,52
|
|
+ vpsrlq ymm12,ymm12,52
|
|
+
|
|
+
|
|
+ vpermq ymm12,ymm12,144
|
|
+ vpermq ymm13,ymm11,3
|
|
+ vblendpd ymm12,ymm12,ymm13,1
|
|
+
|
|
+ vpermq ymm11,ymm11,144
|
|
+ vpermq ymm13,ymm10,3
|
|
+ vblendpd ymm11,ymm11,ymm13,1
|
|
+
|
|
+ vpermq ymm10,ymm10,144
|
|
+ vpermq ymm13,ymm9,3
|
|
+ vblendpd ymm10,ymm10,ymm13,1
|
|
+
|
|
+ vpermq ymm9,ymm9,144
|
|
+ vpermq ymm13,ymm8,3
|
|
+ vblendpd ymm9,ymm9,ymm13,1
|
|
+
|
|
+ vpermq ymm8,ymm8,144
|
|
+ vpermq ymm13,ymm7,3
|
|
+ vblendpd ymm8,ymm8,ymm13,1
|
|
+
|
|
+ vpermq ymm7,ymm7,144
|
|
+ vpermq ymm13,ymm6,3
|
|
+ vblendpd ymm7,ymm7,ymm13,1
|
|
+
|
|
+ vpermq ymm6,ymm6,144
|
|
+ vpermq ymm13,ymm5,3
|
|
+ vblendpd ymm6,ymm6,ymm13,1
|
|
+
|
|
+ vpermq ymm5,ymm5,144
|
|
+ vpermq ymm13,ymm4,3
|
|
+ vblendpd ymm5,ymm5,ymm13,1
|
|
+
|
|
+ vpermq ymm4,ymm4,144
|
|
+ vpermq ymm13,ymm3,3
|
|
+ vblendpd ymm4,ymm4,ymm13,1
|
|
+
|
|
+ vpermq ymm3,ymm3,144
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$high64x3]
|
|
+
|
|
+ vmovupd YMMWORD[320+rsp],ymm3
|
|
+ vmovupd YMMWORD[352+rsp],ymm4
|
|
+ vmovupd YMMWORD[384+rsp],ymm5
|
|
+ vmovupd YMMWORD[416+rsp],ymm6
|
|
+ vmovupd YMMWORD[448+rsp],ymm7
|
|
+ vmovupd YMMWORD[480+rsp],ymm8
|
|
+ vmovupd YMMWORD[512+rsp],ymm9
|
|
+ vmovupd YMMWORD[544+rsp],ymm10
|
|
+ vmovupd YMMWORD[576+rsp],ymm11
|
|
+ vmovupd YMMWORD[608+rsp],ymm12
|
|
+
|
|
+ vmovupd ymm3,YMMWORD[rsp]
|
|
+ vmovupd ymm4,YMMWORD[32+rsp]
|
|
+ vmovupd ymm5,YMMWORD[64+rsp]
|
|
+ vmovupd ymm6,YMMWORD[96+rsp]
|
|
+ vmovupd ymm7,YMMWORD[128+rsp]
|
|
+ vmovupd ymm8,YMMWORD[160+rsp]
|
|
+ vmovupd ymm9,YMMWORD[192+rsp]
|
|
+ vmovupd ymm10,YMMWORD[224+rsp]
|
|
+ vmovupd ymm11,YMMWORD[256+rsp]
|
|
+ vmovupd ymm12,YMMWORD[288+rsp]
|
|
+
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
|
|
+
|
|
+
|
|
+ vpaddq ymm3,ymm3,YMMWORD[320+rsp]
|
|
+ vpaddq ymm4,ymm4,YMMWORD[352+rsp]
|
|
+ vpaddq ymm5,ymm5,YMMWORD[384+rsp]
|
|
+ vpaddq ymm6,ymm6,YMMWORD[416+rsp]
|
|
+ vpaddq ymm7,ymm7,YMMWORD[448+rsp]
|
|
+ vpaddq ymm8,ymm8,YMMWORD[480+rsp]
|
|
+ vpaddq ymm9,ymm9,YMMWORD[512+rsp]
|
|
+ vpaddq ymm10,ymm10,YMMWORD[544+rsp]
|
|
+ vpaddq ymm11,ymm11,YMMWORD[576+rsp]
|
|
+ vpaddq ymm12,ymm12,YMMWORD[608+rsp]
|
|
+
|
|
+ lea rsp,[640+rsp]
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq ymm13,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r14d,ymm13
|
|
+ vpcmpgtq ymm13,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r13d,ymm13
|
|
+ shl r13b,4
|
|
+ or r14b,r13b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r13d,ymm13
|
|
+ vpcmpgtq ymm13,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r12d,ymm13
|
|
+ shl r12b,4
|
|
+ or r13b,r12b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r12d,ymm13
|
|
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r11d,ymm13
|
|
+ shl r11b,4
|
|
+ or r12b,r11b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r11d,ymm13
|
|
+ vpcmpgtq ymm13,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r10d,ymm13
|
|
+ shl r10b,4
|
|
+ or r11b,r10b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r10d,ymm13
|
|
+ vpcmpgtq ymm13,ymm12,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm13
|
|
+ shl r9b,4
|
|
+ or r10b,r9b
|
|
+
|
|
+ add r14b,r14b
|
|
+ adc r13b,r13b
|
|
+ adc r12b,r12b
|
|
+ adc r11b,r11b
|
|
+ adc r10b,r10b
|
|
+
|
|
+
|
|
+ vpcmpeqq ymm13,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm13
|
|
+ vpcmpeqq ymm13,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r8d,ymm13
|
|
+ shl r8b,4
|
|
+ or r9b,r8b
|
|
+
|
|
+ vpcmpeqq ymm13,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r8d,ymm13
|
|
+ vpcmpeqq ymm13,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd edx,ymm13
|
|
+ shl dl,4
|
|
+ or r8b,dl
|
|
+
|
|
+ vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd edx,ymm13
|
|
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ecx,ymm13
|
|
+ shl cl,4
|
|
+ or dl,cl
|
|
+
|
|
+ vpcmpeqq ymm13,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ecx,ymm13
|
|
+ vpcmpeqq ymm13,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ebx,ymm13
|
|
+ shl bl,4
|
|
+ or cl,bl
|
|
+
|
|
+ vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ebx,ymm13
|
|
+ vpcmpeqq ymm13,ymm12,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd eax,ymm13
|
|
+ shl al,4
|
|
+ or bl,al
|
|
+
|
|
+ add r14b,r9b
|
|
+ adc r13b,r8b
|
|
+ adc r12b,dl
|
|
+ adc r11b,cl
|
|
+ adc r10b,bl
|
|
+
|
|
+ xor r14b,r9b
|
|
+ xor r13b,r8b
|
|
+ xor r12b,dl
|
|
+ xor r11b,cl
|
|
+ xor r10b,bl
|
|
+
|
|
+ push r9
|
|
+ push r8
|
|
+
|
|
+ lea r8,[$L$kmasklut]
|
|
+
|
|
+ mov r9b,r14b
|
|
+ and r14,0xf
|
|
+ vpsubq ymm13,ymm3,YMMWORD[$L$mask52x4]
|
|
+ shl r14,5
|
|
+ vmovapd r14,(%r8), %ymm14
|
|
+ vblendvpd ymm3,ymm3,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm4,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm4,ymm4,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r13b
|
|
+ and r13,0xf
|
|
+ vpsubq ymm13,ymm5,YMMWORD[$L$mask52x4]
|
|
+ shl r13,5
|
|
+ vmovapd r13,(%r8), %ymm14
|
|
+ vblendvpd ymm5,ymm5,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm6,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm6,ymm6,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r12b
|
|
+ and r12,0xf
|
|
+ vpsubq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ shl r12,5
|
|
+ vmovapd r12,(%r8), %ymm14
|
|
+ vblendvpd ymm7,ymm7,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm8,ymm8,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r11b
|
|
+ and r11,0xf
|
|
+ vpsubq ymm13,ymm9,YMMWORD[$L$mask52x4]
|
|
+ shl r11,5
|
|
+ vmovapd r11,(%r8), %ymm14
|
|
+ vblendvpd ymm9,ymm9,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm10,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm10,ymm10,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r10b
|
|
+ and r10,0xf
|
|
+ vpsubq ymm13,ymm11,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%r8), %ymm14
|
|
+ vblendvpd ymm11,ymm11,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm12,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm12,ymm12,ymm13,ymm14
|
|
+
|
|
+ pop r8
|
|
+ pop r9
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ pop r8
|
|
+ pop rcx
|
|
+ pop rsi
|
|
+ pop r11
|
|
+
|
|
+ vmovdqu YMMWORD[rdi],ymm3
|
|
+ vmovdqu YMMWORD[32+rdi],ymm4
|
|
+ vmovdqu YMMWORD[64+rdi],ymm5
|
|
+ vmovdqu YMMWORD[96+rdi],ymm6
|
|
+ vmovdqu YMMWORD[128+rdi],ymm7
|
|
+ vmovdqu YMMWORD[160+rdi],ymm8
|
|
+ vmovdqu YMMWORD[192+rdi],ymm9
|
|
+ vmovdqu YMMWORD[224+rdi],ymm10
|
|
+ vmovdqu YMMWORD[256+rdi],ymm11
|
|
+ vmovdqu YMMWORD[288+rdi],ymm12
|
|
+
|
|
+ xor r15d,r15d
|
|
+
|
|
+ mov rax,0xfffffffffffff
|
|
+
|
|
+ mov ebx,40
|
|
+
|
|
+ vpxor ymm0,ymm0,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm4,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+ vmovapd ymm8,ymm0
|
|
+ vmovapd ymm9,ymm0
|
|
+ vmovapd ymm10,ymm0
|
|
+ vmovapd ymm11,ymm0
|
|
+ vmovapd ymm12,ymm0
|
|
+ALIGN 32
|
|
+$L$loop40_1:
|
|
+ mov r13,QWORD[r11]
|
|
+
|
|
+ vpbroadcastq ymm1,QWORD[r11]
|
|
+ mov rdx,QWORD[320+rsi]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ mov r10,r12
|
|
+ adc r10,0
|
|
+
|
|
+ mov r13,QWORD[8+r8]
|
|
+ imul r13,r9
|
|
+ and r13,rax
|
|
+
|
|
+ vmovq xmm2,r13
|
|
+ vpbroadcastq ymm2,xmm2
|
|
+ mov rdx,QWORD[320+rcx]
|
|
+ mulx r12,r13,r13
|
|
+ add r9,r13
|
|
+ adc r10,r12
|
|
+
|
|
+ shr r9,52
|
|
+ sal r10,12
|
|
+ or r9,r10
|
|
+
|
|
+ lea rsp,[((-328))+rsp]
|
|
+
|
|
+{vex} vpmadd52luq 320(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52luq 352(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52luq 384(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52luq 416(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52luq 448(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52luq 480(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52luq 512(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52luq 544(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52luq 576(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52luq 608(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52luq 320(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52luq 352(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52luq 384(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52luq 416(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52luq 448(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52luq 480(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52luq 512(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52luq 544(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52luq 576(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52luq 608(%rcx), %ymm2, %ymm12
|
|
+ vmovdqu YMMWORD[rsp],ymm3
|
|
+ vmovdqu YMMWORD[32+rsp],ymm4
|
|
+ vmovdqu YMMWORD[64+rsp],ymm5
|
|
+ vmovdqu YMMWORD[96+rsp],ymm6
|
|
+ vmovdqu YMMWORD[128+rsp],ymm7
|
|
+ vmovdqu YMMWORD[160+rsp],ymm8
|
|
+ vmovdqu YMMWORD[192+rsp],ymm9
|
|
+ vmovdqu YMMWORD[224+rsp],ymm10
|
|
+ vmovdqu YMMWORD[256+rsp],ymm11
|
|
+ vmovdqu YMMWORD[288+rsp],ymm12
|
|
+ mov QWORD[320+rsp],0
|
|
+
|
|
+ vmovdqu ymm3,YMMWORD[8+rsp]
|
|
+ vmovdqu ymm4,YMMWORD[40+rsp]
|
|
+ vmovdqu ymm5,YMMWORD[72+rsp]
|
|
+ vmovdqu ymm6,YMMWORD[104+rsp]
|
|
+ vmovdqu ymm7,YMMWORD[136+rsp]
|
|
+ vmovdqu ymm8,YMMWORD[168+rsp]
|
|
+ vmovdqu ymm9,YMMWORD[200+rsp]
|
|
+ vmovdqu ymm10,YMMWORD[232+rsp]
|
|
+ vmovdqu ymm11,YMMWORD[264+rsp]
|
|
+ vmovdqu ymm12,YMMWORD[296+rsp]
|
|
+
|
|
+ add r9,QWORD[8+rsp]
|
|
+
|
|
+{vex} vpmadd52huq 320(%rsi), %ymm1, %ymm3
|
|
+{vex} vpmadd52huq 352(%rsi), %ymm1, %ymm4
|
|
+{vex} vpmadd52huq 384(%rsi), %ymm1, %ymm5
|
|
+{vex} vpmadd52huq 416(%rsi), %ymm1, %ymm6
|
|
+{vex} vpmadd52huq 448(%rsi), %ymm1, %ymm7
|
|
+{vex} vpmadd52huq 480(%rsi), %ymm1, %ymm8
|
|
+{vex} vpmadd52huq 512(%rsi), %ymm1, %ymm9
|
|
+{vex} vpmadd52huq 544(%rsi), %ymm1, %ymm10
|
|
+{vex} vpmadd52huq 576(%rsi), %ymm1, %ymm11
|
|
+{vex} vpmadd52huq 608(%rsi), %ymm1, %ymm12
|
|
+
|
|
+{vex} vpmadd52huq 320(%rcx), %ymm2, %ymm3
|
|
+{vex} vpmadd52huq 352(%rcx), %ymm2, %ymm4
|
|
+{vex} vpmadd52huq 384(%rcx), %ymm2, %ymm5
|
|
+{vex} vpmadd52huq 416(%rcx), %ymm2, %ymm6
|
|
+{vex} vpmadd52huq 448(%rcx), %ymm2, %ymm7
|
|
+{vex} vpmadd52huq 480(%rcx), %ymm2, %ymm8
|
|
+{vex} vpmadd52huq 512(%rcx), %ymm2, %ymm9
|
|
+{vex} vpmadd52huq 544(%rcx), %ymm2, %ymm10
|
|
+{vex} vpmadd52huq 576(%rcx), %ymm2, %ymm11
|
|
+{vex} vpmadd52huq 608(%rcx), %ymm2, %ymm12
|
|
+ lea rsp,[328+rsp]
|
|
+ lea r11,[8+r11]
|
|
+ dec ebx
|
|
+ jne NEAR $L$loop40_1
|
|
+
|
|
+ vmovq xmm0,r9
|
|
+ vpbroadcastq ymm0,xmm0
|
|
+ vpblendd ymm3,ymm3,ymm0,3
|
|
+
|
|
+ lea rsp,[((-640))+rsp]
|
|
+ vmovupd YMMWORD[rsp],ymm3
|
|
+ vmovupd YMMWORD[32+rsp],ymm4
|
|
+ vmovupd YMMWORD[64+rsp],ymm5
|
|
+ vmovupd YMMWORD[96+rsp],ymm6
|
|
+ vmovupd YMMWORD[128+rsp],ymm7
|
|
+ vmovupd YMMWORD[160+rsp],ymm8
|
|
+ vmovupd YMMWORD[192+rsp],ymm9
|
|
+ vmovupd YMMWORD[224+rsp],ymm10
|
|
+ vmovupd YMMWORD[256+rsp],ymm11
|
|
+ vmovupd YMMWORD[288+rsp],ymm12
|
|
+
|
|
+
|
|
+
|
|
+ vpsrlq ymm3,ymm3,52
|
|
+ vpsrlq ymm4,ymm4,52
|
|
+ vpsrlq ymm5,ymm5,52
|
|
+ vpsrlq ymm6,ymm6,52
|
|
+ vpsrlq ymm7,ymm7,52
|
|
+ vpsrlq ymm8,ymm8,52
|
|
+ vpsrlq ymm9,ymm9,52
|
|
+ vpsrlq ymm10,ymm10,52
|
|
+ vpsrlq ymm11,ymm11,52
|
|
+ vpsrlq ymm12,ymm12,52
|
|
+
|
|
+
|
|
+ vpermq ymm12,ymm12,144
|
|
+ vpermq ymm13,ymm11,3
|
|
+ vblendpd ymm12,ymm12,ymm13,1
|
|
+
|
|
+ vpermq ymm11,ymm11,144
|
|
+ vpermq ymm13,ymm10,3
|
|
+ vblendpd ymm11,ymm11,ymm13,1
|
|
+
|
|
+ vpermq ymm10,ymm10,144
|
|
+ vpermq ymm13,ymm9,3
|
|
+ vblendpd ymm10,ymm10,ymm13,1
|
|
+
|
|
+ vpermq ymm9,ymm9,144
|
|
+ vpermq ymm13,ymm8,3
|
|
+ vblendpd ymm9,ymm9,ymm13,1
|
|
+
|
|
+ vpermq ymm8,ymm8,144
|
|
+ vpermq ymm13,ymm7,3
|
|
+ vblendpd ymm8,ymm8,ymm13,1
|
|
+
|
|
+ vpermq ymm7,ymm7,144
|
|
+ vpermq ymm13,ymm6,3
|
|
+ vblendpd ymm7,ymm7,ymm13,1
|
|
+
|
|
+ vpermq ymm6,ymm6,144
|
|
+ vpermq ymm13,ymm5,3
|
|
+ vblendpd ymm6,ymm6,ymm13,1
|
|
+
|
|
+ vpermq ymm5,ymm5,144
|
|
+ vpermq ymm13,ymm4,3
|
|
+ vblendpd ymm5,ymm5,ymm13,1
|
|
+
|
|
+ vpermq ymm4,ymm4,144
|
|
+ vpermq ymm13,ymm3,3
|
|
+ vblendpd ymm4,ymm4,ymm13,1
|
|
+
|
|
+ vpermq ymm3,ymm3,144
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$high64x3]
|
|
+
|
|
+ vmovupd YMMWORD[320+rsp],ymm3
|
|
+ vmovupd YMMWORD[352+rsp],ymm4
|
|
+ vmovupd YMMWORD[384+rsp],ymm5
|
|
+ vmovupd YMMWORD[416+rsp],ymm6
|
|
+ vmovupd YMMWORD[448+rsp],ymm7
|
|
+ vmovupd YMMWORD[480+rsp],ymm8
|
|
+ vmovupd YMMWORD[512+rsp],ymm9
|
|
+ vmovupd YMMWORD[544+rsp],ymm10
|
|
+ vmovupd YMMWORD[576+rsp],ymm11
|
|
+ vmovupd YMMWORD[608+rsp],ymm12
|
|
+
|
|
+ vmovupd ymm3,YMMWORD[rsp]
|
|
+ vmovupd ymm4,YMMWORD[32+rsp]
|
|
+ vmovupd ymm5,YMMWORD[64+rsp]
|
|
+ vmovupd ymm6,YMMWORD[96+rsp]
|
|
+ vmovupd ymm7,YMMWORD[128+rsp]
|
|
+ vmovupd ymm8,YMMWORD[160+rsp]
|
|
+ vmovupd ymm9,YMMWORD[192+rsp]
|
|
+ vmovupd ymm10,YMMWORD[224+rsp]
|
|
+ vmovupd ymm11,YMMWORD[256+rsp]
|
|
+ vmovupd ymm12,YMMWORD[288+rsp]
|
|
+
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
|
|
+
|
|
+
|
|
+ vpaddq ymm3,ymm3,YMMWORD[320+rsp]
|
|
+ vpaddq ymm4,ymm4,YMMWORD[352+rsp]
|
|
+ vpaddq ymm5,ymm5,YMMWORD[384+rsp]
|
|
+ vpaddq ymm6,ymm6,YMMWORD[416+rsp]
|
|
+ vpaddq ymm7,ymm7,YMMWORD[448+rsp]
|
|
+ vpaddq ymm8,ymm8,YMMWORD[480+rsp]
|
|
+ vpaddq ymm9,ymm9,YMMWORD[512+rsp]
|
|
+ vpaddq ymm10,ymm10,YMMWORD[544+rsp]
|
|
+ vpaddq ymm11,ymm11,YMMWORD[576+rsp]
|
|
+ vpaddq ymm12,ymm12,YMMWORD[608+rsp]
|
|
+
|
|
+ lea rsp,[640+rsp]
|
|
+
|
|
+
|
|
+
|
|
+ vpcmpgtq ymm13,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r14d,ymm13
|
|
+ vpcmpgtq ymm13,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r13d,ymm13
|
|
+ shl r13b,4
|
|
+ or r14b,r13b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r13d,ymm13
|
|
+ vpcmpgtq ymm13,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r12d,ymm13
|
|
+ shl r12b,4
|
|
+ or r13b,r12b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r12d,ymm13
|
|
+ vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r11d,ymm13
|
|
+ shl r11b,4
|
|
+ or r12b,r11b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r11d,ymm13
|
|
+ vpcmpgtq ymm13,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r10d,ymm13
|
|
+ shl r10b,4
|
|
+ or r11b,r10b
|
|
+
|
|
+ vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r10d,ymm13
|
|
+ vpcmpgtq ymm13,ymm12,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm13
|
|
+ shl r9b,4
|
|
+ or r10b,r9b
|
|
+
|
|
+ add r14b,r14b
|
|
+ adc r13b,r13b
|
|
+ adc r12b,r12b
|
|
+ adc r11b,r11b
|
|
+ adc r10b,r10b
|
|
+
|
|
+
|
|
+ vpcmpeqq ymm13,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r9d,ymm13
|
|
+ vpcmpeqq ymm13,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r8d,ymm13
|
|
+ shl r8b,4
|
|
+ or r9b,r8b
|
|
+
|
|
+ vpcmpeqq ymm13,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd r8d,ymm13
|
|
+ vpcmpeqq ymm13,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd edx,ymm13
|
|
+ shl dl,4
|
|
+ or r8b,dl
|
|
+
|
|
+ vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd edx,ymm13
|
|
+ vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ecx,ymm13
|
|
+ shl cl,4
|
|
+ or dl,cl
|
|
+
|
|
+ vpcmpeqq ymm13,ymm9,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ecx,ymm13
|
|
+ vpcmpeqq ymm13,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ebx,ymm13
|
|
+ shl bl,4
|
|
+ or cl,bl
|
|
+
|
|
+ vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd ebx,ymm13
|
|
+ vpcmpeqq ymm13,ymm12,YMMWORD[$L$mask52x4]
|
|
+ vmovmskpd eax,ymm13
|
|
+ shl al,4
|
|
+ or bl,al
|
|
+
|
|
+ add r14b,r9b
|
|
+ adc r13b,r8b
|
|
+ adc r12b,dl
|
|
+ adc r11b,cl
|
|
+ adc r10b,bl
|
|
+
|
|
+ xor r14b,r9b
|
|
+ xor r13b,r8b
|
|
+ xor r12b,dl
|
|
+ xor r11b,cl
|
|
+ xor r10b,bl
|
|
+
|
|
+ push r9
|
|
+ push r8
|
|
+
|
|
+ lea r8,[$L$kmasklut]
|
|
+
|
|
+ mov r9b,r14b
|
|
+ and r14,0xf
|
|
+ vpsubq ymm13,ymm3,YMMWORD[$L$mask52x4]
|
|
+ shl r14,5
|
|
+ vmovapd r14,(%r8), %ymm14
|
|
+ vblendvpd ymm3,ymm3,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm4,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm4,ymm4,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r13b
|
|
+ and r13,0xf
|
|
+ vpsubq ymm13,ymm5,YMMWORD[$L$mask52x4]
|
|
+ shl r13,5
|
|
+ vmovapd r13,(%r8), %ymm14
|
|
+ vblendvpd ymm5,ymm5,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm6,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm6,ymm6,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r12b
|
|
+ and r12,0xf
|
|
+ vpsubq ymm13,ymm7,YMMWORD[$L$mask52x4]
|
|
+ shl r12,5
|
|
+ vmovapd r12,(%r8), %ymm14
|
|
+ vblendvpd ymm7,ymm7,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm8,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm8,ymm8,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r11b
|
|
+ and r11,0xf
|
|
+ vpsubq ymm13,ymm9,YMMWORD[$L$mask52x4]
|
|
+ shl r11,5
|
|
+ vmovapd r11,(%r8), %ymm14
|
|
+ vblendvpd ymm9,ymm9,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm10,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm10,ymm10,ymm13,ymm14
|
|
+
|
|
+ mov r9b,r10b
|
|
+ and r10,0xf
|
|
+ vpsubq ymm13,ymm11,YMMWORD[$L$mask52x4]
|
|
+ shl r10,5
|
|
+ vmovapd r10,(%r8), %ymm14
|
|
+ vblendvpd ymm11,ymm11,ymm13,ymm14
|
|
+
|
|
+ shr r9b,4
|
|
+ and r9,0xf
|
|
+ vpsubq ymm13,ymm12,YMMWORD[$L$mask52x4]
|
|
+ shl r9,5
|
|
+ vmovapd r9,(%r8), %ymm14
|
|
+ vblendvpd ymm12,ymm12,ymm13,ymm14
|
|
+
|
|
+ pop r8
|
|
+ pop r9
|
|
+
|
|
+ vpand ymm3,ymm3,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm4,ymm4,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm5,ymm5,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm6,ymm6,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm7,ymm7,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm8,ymm8,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm9,ymm9,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vpand ymm10,ymm10,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm11,ymm11,YMMWORD[$L$mask52x4]
|
|
+ vpand ymm12,ymm12,YMMWORD[$L$mask52x4]
|
|
+
|
|
+ vmovdqu YMMWORD[320+rdi],ymm3
|
|
+ vmovdqu YMMWORD[352+rdi],ymm4
|
|
+ vmovdqu YMMWORD[384+rdi],ymm5
|
|
+ vmovdqu YMMWORD[416+rdi],ymm6
|
|
+ vmovdqu YMMWORD[448+rdi],ymm7
|
|
+ vmovdqu YMMWORD[480+rdi],ymm8
|
|
+ vmovdqu YMMWORD[512+rdi],ymm9
|
|
+ vmovdqu YMMWORD[544+rdi],ymm10
|
|
+ vmovdqu YMMWORD[576+rdi],ymm11
|
|
+ vmovdqu YMMWORD[608+rdi],ymm12
|
|
+
|
|
+ vzeroupper
|
|
+ lea rax,[rsp]
|
|
+
|
|
+ vmovapd xmm6,XMMWORD[rax]
|
|
+ vmovapd xmm7,XMMWORD[16+rax]
|
|
+ vmovapd xmm8,XMMWORD[32+rax]
|
|
+ vmovapd xmm9,XMMWORD[48+rax]
|
|
+ vmovapd xmm10,XMMWORD[64+rax]
|
|
+ vmovapd xmm11,XMMWORD[80+rax]
|
|
+ vmovapd xmm12,XMMWORD[96+rax]
|
|
+ vmovapd xmm13,XMMWORD[112+rax]
|
|
+ vmovapd xmm14,XMMWORD[128+rax]
|
|
+ vmovapd xmm15,XMMWORD[144+rax]
|
|
+ lea rax,[168+rsp]
|
|
+ mov r15,QWORD[rax]
|
|
+
|
|
+ mov r14,QWORD[8+rax]
|
|
+
|
|
+ mov r13,QWORD[16+rax]
|
|
+
|
|
+ mov r12,QWORD[24+rax]
|
|
+
|
|
+ mov rbp,QWORD[32+rax]
|
|
+
|
|
+ mov rbx,QWORD[40+rax]
|
|
+
|
|
+ lea rsp,[48+rax]
|
|
+
|
|
+$L$ossl_rsaz_amm52x40_x2_avxifma256_epilogue:
|
|
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
+ mov rsi,QWORD[16+rsp]
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+$L$SEH_end_ossl_rsaz_amm52x40_x2_avxifma256:
|
|
+section .text code align=64
|
|
+
|
|
+
|
|
+ALIGN 32
|
|
+global ossl_extract_multiplier_2x40_win5_avx
|
|
+
|
|
+ossl_extract_multiplier_2x40_win5_avx:
|
|
+
|
|
+DB 243,15,30,250
|
|
+ vmovapd ymm14,YMMWORD[$L$ones]
|
|
+ vmovq xmm10,r8
|
|
+ vpbroadcastq ymm12,xmm10
|
|
+ vmovq xmm10,r9
|
|
+ vpbroadcastq ymm13,xmm10
|
|
+ lea rax,[20480+rdx]
|
|
+
|
|
+
|
|
+ mov r10,rdx
|
|
+
|
|
+
|
|
+ vpxor xmm0,xmm0,xmm0
|
|
+ vmovapd ymm1,ymm0
|
|
+ vmovapd ymm2,ymm0
|
|
+ vmovapd ymm3,ymm0
|
|
+ vmovapd ymm4,ymm0
|
|
+ vmovapd ymm5,ymm0
|
|
+ vmovapd ymm6,ymm0
|
|
+ vmovapd ymm7,ymm0
|
|
+ vmovapd ymm8,ymm0
|
|
+ vmovapd ymm9,ymm0
|
|
+ vpxor ymm11,ymm11,ymm11
|
|
+ALIGN 32
|
|
+$L$loop_0:
|
|
+ vpcmpeqq ymm15,ymm12,ymm11
|
|
+ vmovdqu ymm10,YMMWORD[rdx]
|
|
+
|
|
+ vblendvpd ymm0,ymm0,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[32+rdx]
|
|
+
|
|
+ vblendvpd ymm1,ymm1,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[64+rdx]
|
|
+
|
|
+ vblendvpd ymm2,ymm2,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[96+rdx]
|
|
+
|
|
+ vblendvpd ymm3,ymm3,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[128+rdx]
|
|
+
|
|
+ vblendvpd ymm4,ymm4,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[160+rdx]
|
|
+
|
|
+ vblendvpd ymm5,ymm5,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[192+rdx]
|
|
+
|
|
+ vblendvpd ymm6,ymm6,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[224+rdx]
|
|
+
|
|
+ vblendvpd ymm7,ymm7,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[256+rdx]
|
|
+
|
|
+ vblendvpd ymm8,ymm8,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[288+rdx]
|
|
+
|
|
+ vblendvpd ymm9,ymm9,ymm10,ymm15
|
|
+ vpaddq ymm11,ymm11,ymm14
|
|
+ add rdx,640
|
|
+ cmp rax,rdx
|
|
+ jne NEAR $L$loop_0
|
|
+ vmovdqu YMMWORD[rcx],ymm0
|
|
+ vmovdqu YMMWORD[32+rcx],ymm1
|
|
+ vmovdqu YMMWORD[64+rcx],ymm2
|
|
+ vmovdqu YMMWORD[96+rcx],ymm3
|
|
+ vmovdqu YMMWORD[128+rcx],ymm4
|
|
+ vmovdqu YMMWORD[160+rcx],ymm5
|
|
+ vmovdqu YMMWORD[192+rcx],ymm6
|
|
+ vmovdqu YMMWORD[224+rcx],ymm7
|
|
+ vmovdqu YMMWORD[256+rcx],ymm8
|
|
+ vmovdqu YMMWORD[288+rcx],ymm9
|
|
+ mov rdx,r10
|
|
+ vpxor ymm11,ymm11,ymm11
|
|
+ALIGN 32
|
|
+$L$loop_320:
|
|
+ vpcmpeqq ymm15,ymm13,ymm11
|
|
+ vmovdqu ymm10,YMMWORD[320+rdx]
|
|
+
|
|
+ vblendvpd ymm0,ymm0,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[352+rdx]
|
|
+
|
|
+ vblendvpd ymm1,ymm1,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[384+rdx]
|
|
+
|
|
+ vblendvpd ymm2,ymm2,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[416+rdx]
|
|
+
|
|
+ vblendvpd ymm3,ymm3,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[448+rdx]
|
|
+
|
|
+ vblendvpd ymm4,ymm4,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[480+rdx]
|
|
+
|
|
+ vblendvpd ymm5,ymm5,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[512+rdx]
|
|
+
|
|
+ vblendvpd ymm6,ymm6,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[544+rdx]
|
|
+
|
|
+ vblendvpd ymm7,ymm7,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[576+rdx]
|
|
+
|
|
+ vblendvpd ymm8,ymm8,ymm10,ymm15
|
|
+ vmovdqu ymm10,YMMWORD[608+rdx]
|
|
+
|
|
+ vblendvpd ymm9,ymm9,ymm10,ymm15
|
|
+ vpaddq ymm11,ymm11,ymm14
|
|
+ add rdx,640
|
|
+ cmp rax,rdx
|
|
+ jne NEAR $L$loop_320
|
|
+ vmovdqu YMMWORD[320+rcx],ymm0
|
|
+ vmovdqu YMMWORD[352+rcx],ymm1
|
|
+ vmovdqu YMMWORD[384+rcx],ymm2
|
|
+ vmovdqu YMMWORD[416+rcx],ymm3
|
|
+ vmovdqu YMMWORD[448+rcx],ymm4
|
|
+ vmovdqu YMMWORD[480+rcx],ymm5
|
|
+ vmovdqu YMMWORD[512+rcx],ymm6
|
|
+ vmovdqu YMMWORD[544+rcx],ymm7
|
|
+ vmovdqu YMMWORD[576+rcx],ymm8
|
|
+ vmovdqu YMMWORD[608+rcx],ymm9
|
|
+
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+
|
|
+section .rdata rdata align=32
|
|
+ALIGN 32
|
|
+$L$ones:
|
|
+ DQ 1,1,1,1
|
|
+$L$zeros:
|
|
+ DQ 0,0,0,0
|
|
+EXTERN __imp_RtlVirtualUnwind
|
|
+
|
|
+ALIGN 16
|
|
+rsaz_avx_handler:
|
|
+ push rsi
|
|
+ push rdi
|
|
+ push rbx
|
|
+ push rbp
|
|
+ push r12
|
|
+ push r13
|
|
+ push r14
|
|
+ push r15
|
|
+ pushfq
|
|
+ sub rsp,64
|
|
+
|
|
+ mov rax,QWORD[120+r8]
|
|
+ mov rbx,QWORD[248+r8]
|
|
+
|
|
+ mov rsi,QWORD[8+r9]
|
|
+ mov r11,QWORD[56+r9]
|
|
+
|
|
+ mov r10d,DWORD[r11]
|
|
+ lea r10,[r10*1+rsi]
|
|
+ cmp rbx,r10
|
|
+ jb NEAR $L$common_seh_tail
|
|
+
|
|
+ mov r10d,DWORD[4+r11]
|
|
+ lea r10,[r10*1+rsi]
|
|
+ cmp rbx,r10
|
|
+ jae NEAR $L$common_seh_tail
|
|
+
|
|
+ mov rax,QWORD[152+r8]
|
|
+
|
|
+ lea rsi,[rax]
|
|
+ lea rdi,[512+r8]
|
|
+ mov ecx,20
|
|
+ DD 0xa548f3fc
|
|
+
|
|
+ lea rax,[216+rax]
|
|
+
|
|
+ mov rbx,QWORD[((-8))+rax]
|
|
+ mov rbp,QWORD[((-16))+rax]
|
|
+ mov r12,QWORD[((-24))+rax]
|
|
+ mov r13,QWORD[((-32))+rax]
|
|
+ mov r14,QWORD[((-40))+rax]
|
|
+ mov r15,QWORD[((-48))+rax]
|
|
+ mov QWORD[144+r8],rbx
|
|
+ mov QWORD[160+r8],rbp
|
|
+ mov QWORD[216+r8],r12
|
|
+ mov QWORD[224+r8],r13
|
|
+ mov QWORD[232+r8],r14
|
|
+ mov QWORD[240+r8],r15
|
|
+
|
|
+$L$common_seh_tail:
|
|
+ mov rdi,QWORD[8+rax]
|
|
+ mov rsi,QWORD[16+rax]
|
|
+ mov QWORD[152+r8],rax
|
|
+ mov QWORD[168+r8],rsi
|
|
+ mov QWORD[176+r8],rdi
|
|
+
|
|
+ mov rdi,QWORD[40+r9]
|
|
+ mov rsi,r8
|
|
+ mov ecx,154
|
|
+ DD 0xa548f3fc
|
|
+
|
|
+ mov rsi,r9
|
|
+ xor rcx,rcx
|
|
+ mov rdx,QWORD[8+rsi]
|
|
+ mov r8,QWORD[rsi]
|
|
+ mov r9,QWORD[16+rsi]
|
|
+ mov r10,QWORD[40+rsi]
|
|
+ lea r11,[56+rsi]
|
|
+ lea r12,[24+rsi]
|
|
+ mov QWORD[32+rsp],r10
|
|
+ mov QWORD[40+rsp],r11
|
|
+ mov QWORD[48+rsp],r12
|
|
+ mov QWORD[56+rsp],rcx
|
|
+ call QWORD[__imp_RtlVirtualUnwind]
|
|
+
|
|
+ mov eax,1
|
|
+ add rsp,64
|
|
+ popfq
|
|
+ pop r15
|
|
+ pop r14
|
|
+ pop r13
|
|
+ pop r12
|
|
+ pop rbp
|
|
+ pop rbx
|
|
+ pop rdi
|
|
+ pop rsi
|
|
+ DB 0F3h,0C3h ;repret
|
|
+
|
|
+
|
|
+section .pdata rdata align=4
|
|
+ALIGN 4
|
|
+ DD $L$SEH_begin_ossl_rsaz_amm52x40_x1_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_end_ossl_rsaz_amm52x40_x1_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_info_ossl_rsaz_amm52x40_x1_avxifma256 wrt ..imagebase
|
|
+
|
|
+ DD $L$SEH_begin_ossl_rsaz_amm52x40_x2_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_end_ossl_rsaz_amm52x40_x2_avxifma256 wrt ..imagebase
|
|
+ DD $L$SEH_info_ossl_rsaz_amm52x40_x2_avxifma256 wrt ..imagebase
|
|
+
|
|
+section .xdata rdata align=8
|
|
+ALIGN 8
|
|
+$L$SEH_info_ossl_rsaz_amm52x40_x1_avxifma256:
|
|
+DB 9,0,0,0
|
|
+ DD rsaz_avx_handler wrt ..imagebase
|
|
+ DD $L$ossl_rsaz_amm52x40_x1_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x40_x1_avxifma256_epilogue wrt ..imagebase
|
|
+$L$SEH_info_ossl_rsaz_amm52x40_x2_avxifma256:
|
|
+DB 9,0,0,0
|
|
+ DD rsaz_avx_handler wrt ..imagebase
|
|
+ DD $L$ossl_rsaz_amm52x40_x2_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x40_x2_avxifma256_epilogue wrt ..imagebase
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm
|
|
index 9139d4c44a..f71708242f 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm
|
|
@@ -210,7 +210,7 @@ $L$loop:
|
|
lea eax,[((-165796510))+r10*1+rax]
|
|
and r11d,ecx
|
|
mov r10d,DWORD[24+rsi]
|
|
- or r12d,r11d
|
|
+ add eax,r11d
|
|
mov r11d,ecx
|
|
add eax,r12d
|
|
mov r12d,ecx
|
|
@@ -221,7 +221,7 @@ $L$loop:
|
|
lea edx,[((-1069501632))+r10*1+rdx]
|
|
and r11d,ebx
|
|
mov r10d,DWORD[44+rsi]
|
|
- or r12d,r11d
|
|
+ add edx,r11d
|
|
mov r11d,ebx
|
|
add edx,r12d
|
|
mov r12d,ebx
|
|
@@ -232,7 +232,7 @@ $L$loop:
|
|
lea ecx,[643717713+r10*1+rcx]
|
|
and r11d,eax
|
|
mov r10d,DWORD[rsi]
|
|
- or r12d,r11d
|
|
+ add ecx,r11d
|
|
mov r11d,eax
|
|
add ecx,r12d
|
|
mov r12d,eax
|
|
@@ -243,7 +243,7 @@ $L$loop:
|
|
lea ebx,[((-373897302))+r10*1+rbx]
|
|
and r11d,edx
|
|
mov r10d,DWORD[20+rsi]
|
|
- or r12d,r11d
|
|
+ add ebx,r11d
|
|
mov r11d,edx
|
|
add ebx,r12d
|
|
mov r12d,edx
|
|
@@ -254,7 +254,7 @@ $L$loop:
|
|
lea eax,[((-701558691))+r10*1+rax]
|
|
and r11d,ecx
|
|
mov r10d,DWORD[40+rsi]
|
|
- or r12d,r11d
|
|
+ add eax,r11d
|
|
mov r11d,ecx
|
|
add eax,r12d
|
|
mov r12d,ecx
|
|
@@ -265,7 +265,7 @@ $L$loop:
|
|
lea edx,[38016083+r10*1+rdx]
|
|
and r11d,ebx
|
|
mov r10d,DWORD[60+rsi]
|
|
- or r12d,r11d
|
|
+ add edx,r11d
|
|
mov r11d,ebx
|
|
add edx,r12d
|
|
mov r12d,ebx
|
|
@@ -276,7 +276,7 @@ $L$loop:
|
|
lea ecx,[((-660478335))+r10*1+rcx]
|
|
and r11d,eax
|
|
mov r10d,DWORD[16+rsi]
|
|
- or r12d,r11d
|
|
+ add ecx,r11d
|
|
mov r11d,eax
|
|
add ecx,r12d
|
|
mov r12d,eax
|
|
@@ -287,7 +287,7 @@ $L$loop:
|
|
lea ebx,[((-405537848))+r10*1+rbx]
|
|
and r11d,edx
|
|
mov r10d,DWORD[36+rsi]
|
|
- or r12d,r11d
|
|
+ add ebx,r11d
|
|
mov r11d,edx
|
|
add ebx,r12d
|
|
mov r12d,edx
|
|
@@ -298,7 +298,7 @@ $L$loop:
|
|
lea eax,[568446438+r10*1+rax]
|
|
and r11d,ecx
|
|
mov r10d,DWORD[56+rsi]
|
|
- or r12d,r11d
|
|
+ add eax,r11d
|
|
mov r11d,ecx
|
|
add eax,r12d
|
|
mov r12d,ecx
|
|
@@ -309,7 +309,7 @@ $L$loop:
|
|
lea edx,[((-1019803690))+r10*1+rdx]
|
|
and r11d,ebx
|
|
mov r10d,DWORD[12+rsi]
|
|
- or r12d,r11d
|
|
+ add edx,r11d
|
|
mov r11d,ebx
|
|
add edx,r12d
|
|
mov r12d,ebx
|
|
@@ -320,7 +320,7 @@ $L$loop:
|
|
lea ecx,[((-187363961))+r10*1+rcx]
|
|
and r11d,eax
|
|
mov r10d,DWORD[32+rsi]
|
|
- or r12d,r11d
|
|
+ add ecx,r11d
|
|
mov r11d,eax
|
|
add ecx,r12d
|
|
mov r12d,eax
|
|
@@ -331,7 +331,7 @@ $L$loop:
|
|
lea ebx,[1163531501+r10*1+rbx]
|
|
and r11d,edx
|
|
mov r10d,DWORD[52+rsi]
|
|
- or r12d,r11d
|
|
+ add ebx,r11d
|
|
mov r11d,edx
|
|
add ebx,r12d
|
|
mov r12d,edx
|
|
@@ -342,7 +342,7 @@ $L$loop:
|
|
lea eax,[((-1444681467))+r10*1+rax]
|
|
and r11d,ecx
|
|
mov r10d,DWORD[8+rsi]
|
|
- or r12d,r11d
|
|
+ add eax,r11d
|
|
mov r11d,ecx
|
|
add eax,r12d
|
|
mov r12d,ecx
|
|
@@ -353,7 +353,7 @@ $L$loop:
|
|
lea edx,[((-51403784))+r10*1+rdx]
|
|
and r11d,ebx
|
|
mov r10d,DWORD[28+rsi]
|
|
- or r12d,r11d
|
|
+ add edx,r11d
|
|
mov r11d,ebx
|
|
add edx,r12d
|
|
mov r12d,ebx
|
|
@@ -364,7 +364,7 @@ $L$loop:
|
|
lea ecx,[1735328473+r10*1+rcx]
|
|
and r11d,eax
|
|
mov r10d,DWORD[48+rsi]
|
|
- or r12d,r11d
|
|
+ add ecx,r11d
|
|
mov r11d,eax
|
|
add ecx,r12d
|
|
mov r12d,eax
|
|
@@ -375,7 +375,7 @@ $L$loop:
|
|
lea ebx,[((-1926607734))+r10*1+rbx]
|
|
and r11d,edx
|
|
mov r10d,DWORD[20+rsi]
|
|
- or r12d,r11d
|
|
+ add ebx,r11d
|
|
mov r11d,edx
|
|
add ebx,r12d
|
|
mov r12d,edx
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm
|
|
index 2ce22321d7..b58086af0a 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm
|
|
@@ -2,14 +2,14 @@ default rel
|
|
%define XMMWORD
|
|
%define YMMWORD
|
|
%define ZMMWORD
|
|
+
|
|
EXTERN OPENSSL_cpuid_setup
|
|
|
|
section .CRT$XCU rdata align=8
|
|
DQ OPENSSL_cpuid_setup
|
|
|
|
|
|
-common OPENSSL_ia32cap_P 16
|
|
-
|
|
+common OPENSSL_ia32cap_P 40
|
|
section .text code align=64
|
|
|
|
|
|
@@ -175,6 +175,7 @@ $L$generic:
|
|
mov eax,7
|
|
xor ecx,ecx
|
|
cpuid
|
|
+ movd xmm1,eax
|
|
bt r9d,26
|
|
jc NEAR $L$notknights
|
|
and ebx,0xfff7ffff
|
|
@@ -185,9 +186,31 @@ $L$notknights:
|
|
jne NEAR $L$notskylakex
|
|
and ebx,0xfffeffff
|
|
|
|
+
|
|
$L$notskylakex:
|
|
mov DWORD[8+rdi],ebx
|
|
mov DWORD[12+rdi],ecx
|
|
+ mov DWORD[16+rdi],edx
|
|
+
|
|
+ movd eax,xmm1
|
|
+ cmp eax,0x1
|
|
+ jb NEAR $L$no_extended_info
|
|
+ mov eax,0x7
|
|
+ mov ecx,0x1
|
|
+ cpuid
|
|
+ mov DWORD[20+rdi],eax
|
|
+ mov DWORD[24+rdi],edx
|
|
+ mov DWORD[28+rdi],ebx
|
|
+ mov DWORD[32+rdi],ecx
|
|
+
|
|
+ and edx,0x80000
|
|
+ cmp edx,0x0
|
|
+ je NEAR $L$no_extended_info
|
|
+ mov eax,0x24
|
|
+ mov ecx,0x0
|
|
+ cpuid
|
|
+ mov DWORD[36+rdi],ebx
|
|
+
|
|
$L$no_extended_info:
|
|
|
|
bt r9d,27
|
|
@@ -206,6 +229,9 @@ DB 0x0f,0x01,0xd0
|
|
cmp eax,6
|
|
je NEAR $L$done
|
|
$L$clear_avx:
|
|
+ and DWORD[20+rdi],0xff7fffff
|
|
+
|
|
+
|
|
mov eax,0xefffe7ff
|
|
and r9d,eax
|
|
mov eax,0x3fdeffdf
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c b/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c
|
|
index fd9b9ae658..e77a242602 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c
|
|
@@ -33,6 +33,10 @@ int ossl_param_find_pidx(const char *s)
|
|
switch(s[2]) {
|
|
default:
|
|
break;
|
|
+ case 'd':
|
|
+ if (strcmp("itional-random", s + 3) == 0)
|
|
+ return PIDX_SIGNATURE_PARAM_ADD_RANDOM;
|
|
+ break;
|
|
case '\0':
|
|
return PIDX_KDF_PARAM_ARGON2_AD;
|
|
}
|
|
@@ -322,6 +326,10 @@ int ossl_param_find_pidx(const char *s)
|
|
case 's':
|
|
if (strcmp("c", s + 3) == 0)
|
|
return PIDX_OBJECT_PARAM_DESC;
|
|
+ break;
|
|
+ case 't':
|
|
+ if (strcmp("erministic", s + 3) == 0)
|
|
+ return PIDX_SIGNATURE_PARAM_DETERMINISTIC;
|
|
}
|
|
break;
|
|
case 'h':
|
|
@@ -936,8 +944,17 @@ int ossl_param_find_pidx(const char *s)
|
|
default:
|
|
break;
|
|
case '-':
|
|
- if (strcmp("check", s + 4) == 0)
|
|
- return PIDX_PKEY_PARAM_FIPS_KEY_CHECK;
|
|
+ switch(s[4]) {
|
|
+ default:
|
|
+ break;
|
|
+ case 'c':
|
|
+ if (strcmp("heck", s + 5) == 0)
|
|
+ return PIDX_PKEY_PARAM_FIPS_KEY_CHECK;
|
|
+ break;
|
|
+ case 'l':
|
|
+ if (strcmp("ength", s + 5) == 0)
|
|
+ return PIDX_SKEY_PARAM_KEY_LENGTH;
|
|
+ }
|
|
break;
|
|
case 'b':
|
|
if (strcmp("its", s + 4) == 0)
|
|
@@ -1060,8 +1077,17 @@ int ossl_param_find_pidx(const char *s)
|
|
}
|
|
break;
|
|
case 'e':
|
|
- if (strcmp("mcost", s + 2) == 0)
|
|
- return PIDX_KDF_PARAM_ARGON2_MEMCOST;
|
|
+ switch(s[2]) {
|
|
+ default:
|
|
+ break;
|
|
+ case 'm':
|
|
+ if (strcmp("cost", s + 3) == 0)
|
|
+ return PIDX_KDF_PARAM_ARGON2_MEMCOST;
|
|
+ break;
|
|
+ case 's':
|
|
+ if (strcmp("sage-encoding", s + 3) == 0)
|
|
+ return PIDX_SIGNATURE_PARAM_MESSAGE_ENCODING;
|
|
+ }
|
|
break;
|
|
case 'g':
|
|
switch(s[2]) {
|
|
@@ -1125,6 +1151,97 @@ int ossl_param_find_pidx(const char *s)
|
|
}
|
|
}
|
|
break;
|
|
+ case 'l':
|
|
+ switch(s[2]) {
|
|
+ default:
|
|
+ break;
|
|
+ case '-':
|
|
+ switch(s[3]) {
|
|
+ default:
|
|
+ break;
|
|
+ case 'd':
|
|
+ switch(s[4]) {
|
|
+ default:
|
|
+ break;
|
|
+ case 's':
|
|
+ switch(s[5]) {
|
|
+ default:
|
|
+ break;
|
|
+ case 'a':
|
|
+ switch(s[6]) {
|
|
+ default:
|
|
+ break;
|
|
+ case '.':
|
|
+ switch(s[7]) {
|
|
+ default:
|
|
+ break;
|
|
+ case 'i':
|
|
+ if (strcmp("nput_formats", s + 8) == 0)
|
|
+ return PIDX_PKEY_PARAM_ML_DSA_INPUT_FORMATS;
|
|
+ break;
|
|
+ case 'o':
|
|
+ if (strcmp("utput_formats", s + 8) == 0)
|
|
+ return PIDX_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS;
|
|
+ break;
|
|
+ case 'p':
|
|
+ if (strcmp("refer_seed", s + 8) == 0)
|
|
+ return PIDX_PKEY_PARAM_ML_DSA_PREFER_SEED;
|
|
+ break;
|
|
+ case 'r':
|
|
+ if (strcmp("etain_seed", s + 8) == 0)
|
|
+ return PIDX_PKEY_PARAM_ML_DSA_RETAIN_SEED;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+ case 'k':
|
|
+ switch(s[4]) {
|
|
+ default:
|
|
+ break;
|
|
+ case 'e':
|
|
+ switch(s[5]) {
|
|
+ default:
|
|
+ break;
|
|
+ case 'm':
|
|
+ switch(s[6]) {
|
|
+ default:
|
|
+ break;
|
|
+ case '.':
|
|
+ switch(s[7]) {
|
|
+ default:
|
|
+ break;
|
|
+ case 'i':
|
|
+ switch(s[8]) {
|
|
+ default:
|
|
+ break;
|
|
+ case 'm':
|
|
+ if (strcmp("port_pct_type", s + 9) == 0)
|
|
+ return PIDX_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE;
|
|
+ break;
|
|
+ case 'n':
|
|
+ if (strcmp("put_formats", s + 9) == 0)
|
|
+ return PIDX_PKEY_PARAM_ML_KEM_INPUT_FORMATS;
|
|
+ }
|
|
+ break;
|
|
+ case 'o':
|
|
+ if (strcmp("utput_formats", s + 8) == 0)
|
|
+ return PIDX_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS;
|
|
+ break;
|
|
+ case 'p':
|
|
+ if (strcmp("refer_seed", s + 8) == 0)
|
|
+ return PIDX_PKEY_PARAM_ML_KEM_PREFER_SEED;
|
|
+ break;
|
|
+ case 'r':
|
|
+ if (strcmp("etain_seed", s + 8) == 0)
|
|
+ return PIDX_PKEY_PARAM_ML_KEM_RETAIN_SEED;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
case 'o':
|
|
switch(s[2]) {
|
|
default:
|
|
@@ -1147,6 +1264,14 @@ int ossl_param_find_pidx(const char *s)
|
|
}
|
|
}
|
|
break;
|
|
+ case 'u':
|
|
+ switch(s[2]) {
|
|
+ default:
|
|
+ break;
|
|
+ case '\0':
|
|
+ return PIDX_SIGNATURE_PARAM_MU;
|
|
+ }
|
|
+ break;
|
|
case '\0':
|
|
return PIDX_PKEY_PARAM_EC_CHAR2_M;
|
|
}
|
|
@@ -1327,6 +1452,10 @@ int ossl_param_find_pidx(const char *s)
|
|
if (strcmp("ounter", s + 2) == 0)
|
|
return PIDX_PKEY_PARAM_FFC_PCOUNTER;
|
|
break;
|
|
+ case 'i':
|
|
+ if (strcmp("peline-tag", s + 2) == 0)
|
|
+ return PIDX_CIPHER_PARAM_PIPELINE_AEAD_TAG;
|
|
+ break;
|
|
case 'k':
|
|
if (strcmp("cs5", s + 2) == 0)
|
|
return PIDX_KDF_PARAM_PKCS5;
|
|
@@ -1473,6 +1602,10 @@ int ossl_param_find_pidx(const char *s)
|
|
return PIDX_DRBG_PARAM_RANDOM_DATA;
|
|
}
|
|
}
|
|
+ break;
|
|
+ case 'w':
|
|
+ if (strcmp("-bytes", s + 3) == 0)
|
|
+ return PIDX_SKEY_PARAM_RAW_BYTES;
|
|
}
|
|
break;
|
|
case 'e':
|
|
@@ -2064,7 +2197,7 @@ int ossl_param_find_pidx(const char *s)
|
|
break;
|
|
case 'e':
|
|
if (strcmp("d", s + 3) == 0)
|
|
- return PIDX_PKEY_PARAM_FFC_SEED;
|
|
+ return PIDX_PKEY_PARAM_SLH_DSA_SEED;
|
|
break;
|
|
case 'r':
|
|
if (strcmp("ial", s + 3) == 0)
|
|
@@ -2350,6 +2483,10 @@ int ossl_param_find_pidx(const char *s)
|
|
switch(s[4]) {
|
|
default:
|
|
break;
|
|
+ case '-':
|
|
+ if (strcmp("entropy", s + 5) == 0)
|
|
+ return PIDX_SIGNATURE_PARAM_TEST_ENTROPY;
|
|
+ break;
|
|
case '_':
|
|
switch(s[5]) {
|
|
default:
|
|
@@ -2502,7 +2639,7 @@ int ossl_param_find_pidx(const char *s)
|
|
break;
|
|
case 'd':
|
|
if (strcmp("tls", s + 9) == 0)
|
|
- return PIDX_CAPABILITY_TLS_GROUP_MAX_DTLS;
|
|
+ return PIDX_CAPABILITY_TLS_SIGALG_MAX_DTLS;
|
|
break;
|
|
case 't':
|
|
if (strcmp("ls", s + 9) == 0)
|
|
@@ -2525,7 +2662,7 @@ int ossl_param_find_pidx(const char *s)
|
|
break;
|
|
case 'd':
|
|
if (strcmp("tls", s + 9) == 0)
|
|
- return PIDX_CAPABILITY_TLS_GROUP_MIN_DTLS;
|
|
+ return PIDX_CAPABILITY_TLS_SIGALG_MIN_DTLS;
|
|
break;
|
|
case 't':
|
|
if (strcmp("ls", s + 9) == 0)
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h
|
|
index 27bcea8137..10e995f20c 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h
|
|
@@ -14,7 +14,7 @@
|
|
int ossl_param_find_pidx(const char *s);
|
|
|
|
/* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */
|
|
-#define NUM_PIDX 329
|
|
+#define NUM_PIDX 346
|
|
|
|
#define PIDX_ALG_PARAM_ALGORITHM_ID 0
|
|
#define PIDX_ALG_PARAM_ALGORITHM_ID_PARAMS 1
|
|
@@ -55,7 +55,9 @@ int ossl_param_find_pidx(const char *s);
|
|
#define PIDX_CAPABILITY_TLS_SIGALG_IANA_NAME 26
|
|
#define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE 27
|
|
#define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE_OID 28
|
|
+#define PIDX_CAPABILITY_TLS_SIGALG_MAX_DTLS 16
|
|
#define PIDX_CAPABILITY_TLS_SIGALG_MAX_TLS 17
|
|
+#define PIDX_CAPABILITY_TLS_SIGALG_MIN_DTLS 18
|
|
#define PIDX_CAPABILITY_TLS_SIGALG_MIN_TLS 19
|
|
#define PIDX_CAPABILITY_TLS_SIGALG_NAME 29
|
|
#define PIDX_CAPABILITY_TLS_SIGALG_OID 30
|
|
@@ -90,357 +92,378 @@ int ossl_param_find_pidx(const char *s);
|
|
#define PIDX_CIPHER_PARAM_MODE 55
|
|
#define PIDX_CIPHER_PARAM_NUM 56
|
|
#define PIDX_CIPHER_PARAM_PADDING 57
|
|
-#define PIDX_CIPHER_PARAM_RANDOM_KEY 58
|
|
-#define PIDX_CIPHER_PARAM_RC2_KEYBITS 59
|
|
-#define PIDX_CIPHER_PARAM_ROUNDS 60
|
|
-#define PIDX_CIPHER_PARAM_SPEED 61
|
|
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK 62
|
|
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD 63
|
|
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN 64
|
|
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC 65
|
|
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN 66
|
|
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN 67
|
|
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE 68
|
|
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE 69
|
|
-#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT 70
|
|
-#define PIDX_CIPHER_PARAM_TLS_MAC 71
|
|
-#define PIDX_CIPHER_PARAM_TLS_MAC_SIZE 72
|
|
-#define PIDX_CIPHER_PARAM_TLS_VERSION 73
|
|
-#define PIDX_CIPHER_PARAM_UPDATED_IV 74
|
|
-#define PIDX_CIPHER_PARAM_USE_BITS 75
|
|
-#define PIDX_CIPHER_PARAM_XTS_STANDARD 76
|
|
+#define PIDX_CIPHER_PARAM_PIPELINE_AEAD_TAG 58
|
|
+#define PIDX_CIPHER_PARAM_RANDOM_KEY 59
|
|
+#define PIDX_CIPHER_PARAM_RC2_KEYBITS 60
|
|
+#define PIDX_CIPHER_PARAM_ROUNDS 61
|
|
+#define PIDX_CIPHER_PARAM_SPEED 62
|
|
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK 63
|
|
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD 64
|
|
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN 65
|
|
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC 66
|
|
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN 67
|
|
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN 68
|
|
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE 69
|
|
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE 70
|
|
+#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT 71
|
|
+#define PIDX_CIPHER_PARAM_TLS_MAC 72
|
|
+#define PIDX_CIPHER_PARAM_TLS_MAC_SIZE 73
|
|
+#define PIDX_CIPHER_PARAM_TLS_VERSION 74
|
|
+#define PIDX_CIPHER_PARAM_UPDATED_IV 75
|
|
+#define PIDX_CIPHER_PARAM_USE_BITS 76
|
|
+#define PIDX_CIPHER_PARAM_XTS_STANDARD 77
|
|
#define PIDX_DECODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
|
|
-#define PIDX_DIGEST_PARAM_ALGID_ABSENT 77
|
|
+#define PIDX_DIGEST_PARAM_ALGID_ABSENT 78
|
|
#define PIDX_DIGEST_PARAM_BLOCK_SIZE 45
|
|
-#define PIDX_DIGEST_PARAM_MICALG 78
|
|
-#define PIDX_DIGEST_PARAM_PAD_TYPE 79
|
|
-#define PIDX_DIGEST_PARAM_SIZE 80
|
|
-#define PIDX_DIGEST_PARAM_SSL3_MS 81
|
|
-#define PIDX_DIGEST_PARAM_XOF 82
|
|
-#define PIDX_DIGEST_PARAM_XOFLEN 83
|
|
+#define PIDX_DIGEST_PARAM_MICALG 79
|
|
+#define PIDX_DIGEST_PARAM_PAD_TYPE 80
|
|
+#define PIDX_DIGEST_PARAM_SIZE 81
|
|
+#define PIDX_DIGEST_PARAM_SSL3_MS 82
|
|
+#define PIDX_DIGEST_PARAM_XOF 83
|
|
+#define PIDX_DIGEST_PARAM_XOFLEN 84
|
|
#define PIDX_DRBG_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER
|
|
#define PIDX_DRBG_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST
|
|
-#define PIDX_DRBG_PARAM_ENTROPY_REQUIRED 84
|
|
+#define PIDX_DRBG_PARAM_ENTROPY_REQUIRED 85
|
|
#define PIDX_DRBG_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
|
|
#define PIDX_DRBG_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK
|
|
#define PIDX_DRBG_PARAM_MAC PIDX_ALG_PARAM_MAC
|
|
-#define PIDX_DRBG_PARAM_MAX_ADINLEN 85
|
|
-#define PIDX_DRBG_PARAM_MAX_ENTROPYLEN 86
|
|
-#define PIDX_DRBG_PARAM_MAX_LENGTH 87
|
|
-#define PIDX_DRBG_PARAM_MAX_NONCELEN 88
|
|
-#define PIDX_DRBG_PARAM_MAX_PERSLEN 89
|
|
-#define PIDX_DRBG_PARAM_MIN_ENTROPYLEN 90
|
|
-#define PIDX_DRBG_PARAM_MIN_LENGTH 91
|
|
-#define PIDX_DRBG_PARAM_MIN_NONCELEN 92
|
|
-#define PIDX_DRBG_PARAM_PREDICTION_RESISTANCE 93
|
|
+#define PIDX_DRBG_PARAM_MAX_ADINLEN 86
|
|
+#define PIDX_DRBG_PARAM_MAX_ENTROPYLEN 87
|
|
+#define PIDX_DRBG_PARAM_MAX_LENGTH 88
|
|
+#define PIDX_DRBG_PARAM_MAX_NONCELEN 89
|
|
+#define PIDX_DRBG_PARAM_MAX_PERSLEN 90
|
|
+#define PIDX_DRBG_PARAM_MIN_ENTROPYLEN 91
|
|
+#define PIDX_DRBG_PARAM_MIN_LENGTH 92
|
|
+#define PIDX_DRBG_PARAM_MIN_NONCELEN 93
|
|
+#define PIDX_DRBG_PARAM_PREDICTION_RESISTANCE 94
|
|
#define PIDX_DRBG_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
|
|
-#define PIDX_DRBG_PARAM_RANDOM_DATA 94
|
|
-#define PIDX_DRBG_PARAM_RESEED_COUNTER 95
|
|
-#define PIDX_DRBG_PARAM_RESEED_REQUESTS 96
|
|
-#define PIDX_DRBG_PARAM_RESEED_TIME 97
|
|
-#define PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL 98
|
|
-#define PIDX_DRBG_PARAM_SIZE 80
|
|
-#define PIDX_DRBG_PARAM_USE_DF 99
|
|
+#define PIDX_DRBG_PARAM_RANDOM_DATA 95
|
|
+#define PIDX_DRBG_PARAM_RESEED_COUNTER 96
|
|
+#define PIDX_DRBG_PARAM_RESEED_REQUESTS 97
|
|
+#define PIDX_DRBG_PARAM_RESEED_TIME 98
|
|
+#define PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL 99
|
|
+#define PIDX_DRBG_PARAM_SIZE 81
|
|
+#define PIDX_DRBG_PARAM_USE_DF 100
|
|
#define PIDX_ENCODER_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER
|
|
-#define PIDX_ENCODER_PARAM_ENCRYPT_LEVEL 100
|
|
+#define PIDX_ENCODER_PARAM_ENCRYPT_LEVEL 101
|
|
#define PIDX_ENCODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
|
|
-#define PIDX_ENCODER_PARAM_SAVE_PARAMETERS 101
|
|
-#define PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE 102
|
|
+#define PIDX_ENCODER_PARAM_SAVE_PARAMETERS 102
|
|
+#define PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE 103
|
|
#define PIDX_EXCHANGE_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
|
|
#define PIDX_EXCHANGE_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK
|
|
#define PIDX_EXCHANGE_PARAM_FIPS_ECDH_COFACTOR_CHECK PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK
|
|
#define PIDX_EXCHANGE_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK
|
|
-#define PIDX_EXCHANGE_PARAM_KDF_DIGEST 103
|
|
-#define PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS 104
|
|
-#define PIDX_EXCHANGE_PARAM_KDF_OUTLEN 105
|
|
-#define PIDX_EXCHANGE_PARAM_KDF_TYPE 106
|
|
-#define PIDX_EXCHANGE_PARAM_KDF_UKM 107
|
|
-#define PIDX_EXCHANGE_PARAM_PAD 108
|
|
-#define PIDX_GEN_PARAM_ITERATION 109
|
|
-#define PIDX_GEN_PARAM_POTENTIAL 110
|
|
-#define PIDX_KDF_PARAM_ARGON2_AD 111
|
|
-#define PIDX_KDF_PARAM_ARGON2_LANES 112
|
|
-#define PIDX_KDF_PARAM_ARGON2_MEMCOST 113
|
|
-#define PIDX_KDF_PARAM_ARGON2_VERSION 114
|
|
-#define PIDX_KDF_PARAM_CEK_ALG 115
|
|
+#define PIDX_EXCHANGE_PARAM_KDF_DIGEST 104
|
|
+#define PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS 105
|
|
+#define PIDX_EXCHANGE_PARAM_KDF_OUTLEN 106
|
|
+#define PIDX_EXCHANGE_PARAM_KDF_TYPE 107
|
|
+#define PIDX_EXCHANGE_PARAM_KDF_UKM 108
|
|
+#define PIDX_EXCHANGE_PARAM_PAD 109
|
|
+#define PIDX_GEN_PARAM_ITERATION 110
|
|
+#define PIDX_GEN_PARAM_POTENTIAL 111
|
|
+#define PIDX_KDF_PARAM_ARGON2_AD 112
|
|
+#define PIDX_KDF_PARAM_ARGON2_LANES 113
|
|
+#define PIDX_KDF_PARAM_ARGON2_MEMCOST 114
|
|
+#define PIDX_KDF_PARAM_ARGON2_VERSION 115
|
|
+#define PIDX_KDF_PARAM_CEK_ALG 116
|
|
#define PIDX_KDF_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER
|
|
-#define PIDX_KDF_PARAM_CONSTANT 116
|
|
-#define PIDX_KDF_PARAM_DATA 117
|
|
+#define PIDX_KDF_PARAM_CONSTANT 117
|
|
+#define PIDX_KDF_PARAM_DATA 118
|
|
#define PIDX_KDF_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST
|
|
-#define PIDX_KDF_PARAM_EARLY_CLEAN 118
|
|
+#define PIDX_KDF_PARAM_EARLY_CLEAN 119
|
|
#define PIDX_KDF_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
|
|
#define PIDX_KDF_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK
|
|
-#define PIDX_KDF_PARAM_FIPS_EMS_CHECK 119
|
|
+#define PIDX_KDF_PARAM_FIPS_EMS_CHECK 120
|
|
#define PIDX_KDF_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK
|
|
-#define PIDX_KDF_PARAM_HMACDRBG_ENTROPY 120
|
|
-#define PIDX_KDF_PARAM_HMACDRBG_NONCE 121
|
|
-#define PIDX_KDF_PARAM_INFO 122
|
|
-#define PIDX_KDF_PARAM_ITER 123
|
|
-#define PIDX_KDF_PARAM_KBKDF_R 124
|
|
-#define PIDX_KDF_PARAM_KBKDF_USE_L 125
|
|
-#define PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR 126
|
|
-#define PIDX_KDF_PARAM_KEY 127
|
|
-#define PIDX_KDF_PARAM_LABEL 128
|
|
+#define PIDX_KDF_PARAM_HMACDRBG_ENTROPY 121
|
|
+#define PIDX_KDF_PARAM_HMACDRBG_NONCE 122
|
|
+#define PIDX_KDF_PARAM_INFO 123
|
|
+#define PIDX_KDF_PARAM_ITER 124
|
|
+#define PIDX_KDF_PARAM_KBKDF_R 125
|
|
+#define PIDX_KDF_PARAM_KBKDF_USE_L 126
|
|
+#define PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR 127
|
|
+#define PIDX_KDF_PARAM_KEY 128
|
|
+#define PIDX_KDF_PARAM_LABEL 129
|
|
#define PIDX_KDF_PARAM_MAC PIDX_ALG_PARAM_MAC
|
|
-#define PIDX_KDF_PARAM_MAC_SIZE 129
|
|
+#define PIDX_KDF_PARAM_MAC_SIZE 130
|
|
#define PIDX_KDF_PARAM_MODE 55
|
|
-#define PIDX_KDF_PARAM_PASSWORD 130
|
|
-#define PIDX_KDF_PARAM_PKCS12_ID 131
|
|
-#define PIDX_KDF_PARAM_PKCS5 132
|
|
-#define PIDX_KDF_PARAM_PREFIX 133
|
|
+#define PIDX_KDF_PARAM_PASSWORD 131
|
|
+#define PIDX_KDF_PARAM_PKCS12_ID 132
|
|
+#define PIDX_KDF_PARAM_PKCS5 133
|
|
+#define PIDX_KDF_PARAM_PREFIX 134
|
|
#define PIDX_KDF_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
|
|
-#define PIDX_KDF_PARAM_SALT 134
|
|
-#define PIDX_KDF_PARAM_SCRYPT_MAXMEM 135
|
|
-#define PIDX_KDF_PARAM_SCRYPT_N 136
|
|
-#define PIDX_KDF_PARAM_SCRYPT_P 137
|
|
-#define PIDX_KDF_PARAM_SCRYPT_R 124
|
|
-#define PIDX_KDF_PARAM_SECRET 138
|
|
-#define PIDX_KDF_PARAM_SEED 139
|
|
-#define PIDX_KDF_PARAM_SIZE 80
|
|
-#define PIDX_KDF_PARAM_SSHKDF_SESSION_ID 140
|
|
-#define PIDX_KDF_PARAM_SSHKDF_TYPE 141
|
|
-#define PIDX_KDF_PARAM_SSHKDF_XCGHASH 142
|
|
-#define PIDX_KDF_PARAM_THREADS 143
|
|
-#define PIDX_KDF_PARAM_UKM 144
|
|
-#define PIDX_KDF_PARAM_X942_ACVPINFO 145
|
|
-#define PIDX_KDF_PARAM_X942_PARTYUINFO 146
|
|
-#define PIDX_KDF_PARAM_X942_PARTYVINFO 147
|
|
-#define PIDX_KDF_PARAM_X942_SUPP_PRIVINFO 148
|
|
-#define PIDX_KDF_PARAM_X942_SUPP_PUBINFO 149
|
|
-#define PIDX_KDF_PARAM_X942_USE_KEYBITS 150
|
|
+#define PIDX_KDF_PARAM_SALT 135
|
|
+#define PIDX_KDF_PARAM_SCRYPT_MAXMEM 136
|
|
+#define PIDX_KDF_PARAM_SCRYPT_N 137
|
|
+#define PIDX_KDF_PARAM_SCRYPT_P 138
|
|
+#define PIDX_KDF_PARAM_SCRYPT_R 125
|
|
+#define PIDX_KDF_PARAM_SECRET 139
|
|
+#define PIDX_KDF_PARAM_SEED 140
|
|
+#define PIDX_KDF_PARAM_SIZE 81
|
|
+#define PIDX_KDF_PARAM_SSHKDF_SESSION_ID 141
|
|
+#define PIDX_KDF_PARAM_SSHKDF_TYPE 142
|
|
+#define PIDX_KDF_PARAM_SSHKDF_XCGHASH 143
|
|
+#define PIDX_KDF_PARAM_THREADS 144
|
|
+#define PIDX_KDF_PARAM_UKM 145
|
|
+#define PIDX_KDF_PARAM_X942_ACVPINFO 146
|
|
+#define PIDX_KDF_PARAM_X942_PARTYUINFO 147
|
|
+#define PIDX_KDF_PARAM_X942_PARTYVINFO 148
|
|
+#define PIDX_KDF_PARAM_X942_SUPP_PRIVINFO 149
|
|
+#define PIDX_KDF_PARAM_X942_SUPP_PUBINFO 150
|
|
+#define PIDX_KDF_PARAM_X942_USE_KEYBITS 151
|
|
#define PIDX_KEM_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
|
|
#define PIDX_KEM_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK
|
|
-#define PIDX_KEM_PARAM_IKME 151
|
|
-#define PIDX_KEM_PARAM_OPERATION 152
|
|
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING 153
|
|
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_HS_PADDING 154
|
|
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA 155
|
|
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN 156
|
|
+#define PIDX_KEM_PARAM_IKME 152
|
|
+#define PIDX_KEM_PARAM_OPERATION 153
|
|
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING 154
|
|
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_HS_PADDING 155
|
|
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA 156
|
|
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN 157
|
|
#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MODE 55
|
|
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS 157
|
|
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD 158
|
|
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC 159
|
|
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE 160
|
|
-#define PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM 161
|
|
-#define PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN 162
|
|
-#define PIDX_MAC_PARAM_BLOCK_SIZE 163
|
|
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS 158
|
|
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD 159
|
|
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC 160
|
|
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE 161
|
|
+#define PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM 162
|
|
+#define PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN 163
|
|
+#define PIDX_MAC_PARAM_BLOCK_SIZE 164
|
|
#define PIDX_MAC_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER
|
|
-#define PIDX_MAC_PARAM_CUSTOM 164
|
|
-#define PIDX_MAC_PARAM_C_ROUNDS 165
|
|
+#define PIDX_MAC_PARAM_CUSTOM 165
|
|
+#define PIDX_MAC_PARAM_C_ROUNDS 166
|
|
#define PIDX_MAC_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST
|
|
-#define PIDX_MAC_PARAM_DIGEST_NOINIT 166
|
|
-#define PIDX_MAC_PARAM_DIGEST_ONESHOT 167
|
|
-#define PIDX_MAC_PARAM_D_ROUNDS 168
|
|
+#define PIDX_MAC_PARAM_DIGEST_NOINIT 167
|
|
+#define PIDX_MAC_PARAM_DIGEST_ONESHOT 168
|
|
+#define PIDX_MAC_PARAM_D_ROUNDS 169
|
|
#define PIDX_MAC_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
|
|
#define PIDX_MAC_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK
|
|
#define PIDX_MAC_PARAM_FIPS_NO_SHORT_MAC PIDX_PROV_PARAM_NO_SHORT_MAC
|
|
#define PIDX_MAC_PARAM_IV 52
|
|
-#define PIDX_MAC_PARAM_KEY 127
|
|
+#define PIDX_MAC_PARAM_KEY 128
|
|
#define PIDX_MAC_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
|
|
-#define PIDX_MAC_PARAM_SALT 134
|
|
-#define PIDX_MAC_PARAM_SIZE 80
|
|
-#define PIDX_MAC_PARAM_TLS_DATA_SIZE 169
|
|
-#define PIDX_MAC_PARAM_XOF 82
|
|
-#define PIDX_OBJECT_PARAM_DATA 117
|
|
-#define PIDX_OBJECT_PARAM_DATA_STRUCTURE 170
|
|
-#define PIDX_OBJECT_PARAM_DATA_TYPE 171
|
|
-#define PIDX_OBJECT_PARAM_DESC 172
|
|
-#define PIDX_OBJECT_PARAM_REFERENCE 173
|
|
-#define PIDX_OBJECT_PARAM_TYPE 141
|
|
-#define PIDX_PASSPHRASE_PARAM_INFO 122
|
|
+#define PIDX_MAC_PARAM_SALT 135
|
|
+#define PIDX_MAC_PARAM_SIZE 81
|
|
+#define PIDX_MAC_PARAM_TLS_DATA_SIZE 170
|
|
+#define PIDX_MAC_PARAM_XOF 83
|
|
+#define PIDX_OBJECT_PARAM_DATA 118
|
|
+#define PIDX_OBJECT_PARAM_DATA_STRUCTURE 171
|
|
+#define PIDX_OBJECT_PARAM_DATA_TYPE 172
|
|
+#define PIDX_OBJECT_PARAM_DESC 173
|
|
+#define PIDX_OBJECT_PARAM_INPUT_TYPE 174
|
|
+#define PIDX_OBJECT_PARAM_REFERENCE 175
|
|
+#define PIDX_OBJECT_PARAM_TYPE 142
|
|
+#define PIDX_PASSPHRASE_PARAM_INFO 123
|
|
#define PIDX_PKEY_PARAM_ALGORITHM_ID PIDX_ALG_PARAM_ALGORITHM_ID
|
|
#define PIDX_PKEY_PARAM_ALGORITHM_ID_PARAMS PIDX_ALG_PARAM_ALGORITHM_ID_PARAMS
|
|
-#define PIDX_PKEY_PARAM_BITS 174
|
|
+#define PIDX_PKEY_PARAM_BITS 176
|
|
#define PIDX_PKEY_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER
|
|
-#define PIDX_PKEY_PARAM_DEFAULT_DIGEST 175
|
|
-#define PIDX_PKEY_PARAM_DHKEM_IKM 176
|
|
-#define PIDX_PKEY_PARAM_DH_GENERATOR 177
|
|
-#define PIDX_PKEY_PARAM_DH_PRIV_LEN 178
|
|
+#define PIDX_PKEY_PARAM_DEFAULT_DIGEST 177
|
|
+#define PIDX_PKEY_PARAM_DHKEM_IKM 178
|
|
+#define PIDX_PKEY_PARAM_DH_GENERATOR 179
|
|
+#define PIDX_PKEY_PARAM_DH_PRIV_LEN 180
|
|
#define PIDX_PKEY_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST
|
|
-#define PIDX_PKEY_PARAM_DIGEST_SIZE 179
|
|
-#define PIDX_PKEY_PARAM_DIST_ID 180
|
|
-#define PIDX_PKEY_PARAM_EC_A 181
|
|
-#define PIDX_PKEY_PARAM_EC_B 182
|
|
-#define PIDX_PKEY_PARAM_EC_CHAR2_M 183
|
|
-#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K1 184
|
|
-#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K2 185
|
|
-#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K3 186
|
|
-#define PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS 187
|
|
-#define PIDX_PKEY_PARAM_EC_CHAR2_TYPE 188
|
|
-#define PIDX_PKEY_PARAM_EC_COFACTOR 189
|
|
-#define PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS 190
|
|
-#define PIDX_PKEY_PARAM_EC_ENCODING 191
|
|
-#define PIDX_PKEY_PARAM_EC_FIELD_TYPE 192
|
|
-#define PIDX_PKEY_PARAM_EC_GENERATOR 193
|
|
-#define PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE 194
|
|
-#define PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC 195
|
|
-#define PIDX_PKEY_PARAM_EC_ORDER 196
|
|
-#define PIDX_PKEY_PARAM_EC_P 137
|
|
-#define PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT 197
|
|
-#define PIDX_PKEY_PARAM_EC_PUB_X 198
|
|
-#define PIDX_PKEY_PARAM_EC_PUB_Y 199
|
|
-#define PIDX_PKEY_PARAM_EC_SEED 139
|
|
-#define PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY 200
|
|
+#define PIDX_PKEY_PARAM_DIGEST_SIZE 181
|
|
+#define PIDX_PKEY_PARAM_DIST_ID 182
|
|
+#define PIDX_PKEY_PARAM_EC_A 183
|
|
+#define PIDX_PKEY_PARAM_EC_B 184
|
|
+#define PIDX_PKEY_PARAM_EC_CHAR2_M 185
|
|
+#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K1 186
|
|
+#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K2 187
|
|
+#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K3 188
|
|
+#define PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS 189
|
|
+#define PIDX_PKEY_PARAM_EC_CHAR2_TYPE 190
|
|
+#define PIDX_PKEY_PARAM_EC_COFACTOR 191
|
|
+#define PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS 192
|
|
+#define PIDX_PKEY_PARAM_EC_ENCODING 193
|
|
+#define PIDX_PKEY_PARAM_EC_FIELD_TYPE 194
|
|
+#define PIDX_PKEY_PARAM_EC_GENERATOR 195
|
|
+#define PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE 196
|
|
+#define PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC 197
|
|
+#define PIDX_PKEY_PARAM_EC_ORDER 198
|
|
+#define PIDX_PKEY_PARAM_EC_P 138
|
|
+#define PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT 199
|
|
+#define PIDX_PKEY_PARAM_EC_PUB_X 200
|
|
+#define PIDX_PKEY_PARAM_EC_PUB_Y 201
|
|
+#define PIDX_PKEY_PARAM_EC_SEED 140
|
|
+#define PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY 202
|
|
#define PIDX_PKEY_PARAM_ENGINE PIDX_ALG_PARAM_ENGINE
|
|
-#define PIDX_PKEY_PARAM_FFC_COFACTOR 201
|
|
+#define PIDX_PKEY_PARAM_FFC_COFACTOR 203
|
|
#define PIDX_PKEY_PARAM_FFC_DIGEST PIDX_PKEY_PARAM_DIGEST
|
|
#define PIDX_PKEY_PARAM_FFC_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES
|
|
-#define PIDX_PKEY_PARAM_FFC_G 202
|
|
-#define PIDX_PKEY_PARAM_FFC_GINDEX 203
|
|
-#define PIDX_PKEY_PARAM_FFC_H 204
|
|
-#define PIDX_PKEY_PARAM_FFC_P 137
|
|
-#define PIDX_PKEY_PARAM_FFC_PBITS 205
|
|
-#define PIDX_PKEY_PARAM_FFC_PCOUNTER 206
|
|
-#define PIDX_PKEY_PARAM_FFC_Q 207
|
|
-#define PIDX_PKEY_PARAM_FFC_QBITS 208
|
|
-#define PIDX_PKEY_PARAM_FFC_SEED 139
|
|
-#define PIDX_PKEY_PARAM_FFC_TYPE 141
|
|
-#define PIDX_PKEY_PARAM_FFC_VALIDATE_G 209
|
|
-#define PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY 210
|
|
-#define PIDX_PKEY_PARAM_FFC_VALIDATE_PQ 211
|
|
+#define PIDX_PKEY_PARAM_FFC_G 204
|
|
+#define PIDX_PKEY_PARAM_FFC_GINDEX 205
|
|
+#define PIDX_PKEY_PARAM_FFC_H 206
|
|
+#define PIDX_PKEY_PARAM_FFC_P 138
|
|
+#define PIDX_PKEY_PARAM_FFC_PBITS 207
|
|
+#define PIDX_PKEY_PARAM_FFC_PCOUNTER 208
|
|
+#define PIDX_PKEY_PARAM_FFC_Q 209
|
|
+#define PIDX_PKEY_PARAM_FFC_QBITS 210
|
|
+#define PIDX_PKEY_PARAM_FFC_SEED 140
|
|
+#define PIDX_PKEY_PARAM_FFC_TYPE 142
|
|
+#define PIDX_PKEY_PARAM_FFC_VALIDATE_G 211
|
|
+#define PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY 212
|
|
+#define PIDX_PKEY_PARAM_FFC_VALIDATE_PQ 213
|
|
#define PIDX_PKEY_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
|
|
-#define PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK 212
|
|
-#define PIDX_PKEY_PARAM_FIPS_KEY_CHECK 213
|
|
-#define PIDX_PKEY_PARAM_FIPS_SIGN_CHECK 214
|
|
-#define PIDX_PKEY_PARAM_GROUP_NAME 215
|
|
+#define PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK 214
|
|
+#define PIDX_PKEY_PARAM_FIPS_KEY_CHECK 215
|
|
+#define PIDX_PKEY_PARAM_FIPS_SIGN_CHECK 216
|
|
+#define PIDX_PKEY_PARAM_GROUP_NAME 217
|
|
#define PIDX_PKEY_PARAM_IMPLICIT_REJECTION 8
|
|
-#define PIDX_PKEY_PARAM_MANDATORY_DIGEST 216
|
|
-#define PIDX_PKEY_PARAM_MASKGENFUNC 217
|
|
-#define PIDX_PKEY_PARAM_MAX_SIZE 218
|
|
-#define PIDX_PKEY_PARAM_MGF1_DIGEST 219
|
|
-#define PIDX_PKEY_PARAM_MGF1_PROPERTIES 220
|
|
-#define PIDX_PKEY_PARAM_PAD_MODE 221
|
|
-#define PIDX_PKEY_PARAM_PRIV_KEY 222
|
|
+#define PIDX_PKEY_PARAM_MANDATORY_DIGEST 218
|
|
+#define PIDX_PKEY_PARAM_MASKGENFUNC 219
|
|
+#define PIDX_PKEY_PARAM_MAX_SIZE 220
|
|
+#define PIDX_PKEY_PARAM_MGF1_DIGEST 221
|
|
+#define PIDX_PKEY_PARAM_MGF1_PROPERTIES 222
|
|
+#define PIDX_PKEY_PARAM_ML_DSA_INPUT_FORMATS 223
|
|
+#define PIDX_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS 224
|
|
+#define PIDX_PKEY_PARAM_ML_DSA_PREFER_SEED 225
|
|
+#define PIDX_PKEY_PARAM_ML_DSA_RETAIN_SEED 226
|
|
+#define PIDX_PKEY_PARAM_ML_DSA_SEED 140
|
|
+#define PIDX_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE 227
|
|
+#define PIDX_PKEY_PARAM_ML_KEM_INPUT_FORMATS 228
|
|
+#define PIDX_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS 229
|
|
+#define PIDX_PKEY_PARAM_ML_KEM_PREFER_SEED 230
|
|
+#define PIDX_PKEY_PARAM_ML_KEM_RETAIN_SEED 231
|
|
+#define PIDX_PKEY_PARAM_ML_KEM_SEED 140
|
|
+#define PIDX_PKEY_PARAM_PAD_MODE 232
|
|
+#define PIDX_PKEY_PARAM_PRIV_KEY 233
|
|
#define PIDX_PKEY_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES
|
|
-#define PIDX_PKEY_PARAM_PUB_KEY 223
|
|
+#define PIDX_PKEY_PARAM_PUB_KEY 234
|
|
#define PIDX_PKEY_PARAM_RSA_BITS PIDX_PKEY_PARAM_BITS
|
|
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT 224
|
|
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT1 225
|
|
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT2 226
|
|
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT3 227
|
|
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT4 228
|
|
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT5 229
|
|
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT6 230
|
|
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT7 231
|
|
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT8 232
|
|
-#define PIDX_PKEY_PARAM_RSA_COEFFICIENT9 233
|
|
-#define PIDX_PKEY_PARAM_RSA_D 234
|
|
-#define PIDX_PKEY_PARAM_RSA_DERIVE_FROM_PQ 235
|
|
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT 235
|
|
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT1 236
|
|
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT2 237
|
|
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT3 238
|
|
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT4 239
|
|
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT5 240
|
|
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT6 241
|
|
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT7 242
|
|
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT8 243
|
|
+#define PIDX_PKEY_PARAM_RSA_COEFFICIENT9 244
|
|
+#define PIDX_PKEY_PARAM_RSA_D 245
|
|
+#define PIDX_PKEY_PARAM_RSA_DERIVE_FROM_PQ 246
|
|
#define PIDX_PKEY_PARAM_RSA_DIGEST PIDX_PKEY_PARAM_DIGEST
|
|
#define PIDX_PKEY_PARAM_RSA_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES
|
|
-#define PIDX_PKEY_PARAM_RSA_E 236
|
|
-#define PIDX_PKEY_PARAM_RSA_EXPONENT 237
|
|
-#define PIDX_PKEY_PARAM_RSA_EXPONENT1 238
|
|
-#define PIDX_PKEY_PARAM_RSA_EXPONENT10 239
|
|
-#define PIDX_PKEY_PARAM_RSA_EXPONENT2 240
|
|
-#define PIDX_PKEY_PARAM_RSA_EXPONENT3 241
|
|
-#define PIDX_PKEY_PARAM_RSA_EXPONENT4 242
|
|
-#define PIDX_PKEY_PARAM_RSA_EXPONENT5 243
|
|
-#define PIDX_PKEY_PARAM_RSA_EXPONENT6 244
|
|
-#define PIDX_PKEY_PARAM_RSA_EXPONENT7 245
|
|
-#define PIDX_PKEY_PARAM_RSA_EXPONENT8 246
|
|
-#define PIDX_PKEY_PARAM_RSA_EXPONENT9 247
|
|
-#define PIDX_PKEY_PARAM_RSA_FACTOR 248
|
|
-#define PIDX_PKEY_PARAM_RSA_FACTOR1 249
|
|
-#define PIDX_PKEY_PARAM_RSA_FACTOR10 250
|
|
-#define PIDX_PKEY_PARAM_RSA_FACTOR2 251
|
|
-#define PIDX_PKEY_PARAM_RSA_FACTOR3 252
|
|
-#define PIDX_PKEY_PARAM_RSA_FACTOR4 253
|
|
-#define PIDX_PKEY_PARAM_RSA_FACTOR5 254
|
|
-#define PIDX_PKEY_PARAM_RSA_FACTOR6 255
|
|
-#define PIDX_PKEY_PARAM_RSA_FACTOR7 256
|
|
-#define PIDX_PKEY_PARAM_RSA_FACTOR8 257
|
|
-#define PIDX_PKEY_PARAM_RSA_FACTOR9 258
|
|
+#define PIDX_PKEY_PARAM_RSA_E 247
|
|
+#define PIDX_PKEY_PARAM_RSA_EXPONENT 248
|
|
+#define PIDX_PKEY_PARAM_RSA_EXPONENT1 249
|
|
+#define PIDX_PKEY_PARAM_RSA_EXPONENT10 250
|
|
+#define PIDX_PKEY_PARAM_RSA_EXPONENT2 251
|
|
+#define PIDX_PKEY_PARAM_RSA_EXPONENT3 252
|
|
+#define PIDX_PKEY_PARAM_RSA_EXPONENT4 253
|
|
+#define PIDX_PKEY_PARAM_RSA_EXPONENT5 254
|
|
+#define PIDX_PKEY_PARAM_RSA_EXPONENT6 255
|
|
+#define PIDX_PKEY_PARAM_RSA_EXPONENT7 256
|
|
+#define PIDX_PKEY_PARAM_RSA_EXPONENT8 257
|
|
+#define PIDX_PKEY_PARAM_RSA_EXPONENT9 258
|
|
+#define PIDX_PKEY_PARAM_RSA_FACTOR 259
|
|
+#define PIDX_PKEY_PARAM_RSA_FACTOR1 260
|
|
+#define PIDX_PKEY_PARAM_RSA_FACTOR10 261
|
|
+#define PIDX_PKEY_PARAM_RSA_FACTOR2 262
|
|
+#define PIDX_PKEY_PARAM_RSA_FACTOR3 263
|
|
+#define PIDX_PKEY_PARAM_RSA_FACTOR4 264
|
|
+#define PIDX_PKEY_PARAM_RSA_FACTOR5 265
|
|
+#define PIDX_PKEY_PARAM_RSA_FACTOR6 266
|
|
+#define PIDX_PKEY_PARAM_RSA_FACTOR7 267
|
|
+#define PIDX_PKEY_PARAM_RSA_FACTOR8 268
|
|
+#define PIDX_PKEY_PARAM_RSA_FACTOR9 269
|
|
#define PIDX_PKEY_PARAM_RSA_MASKGENFUNC PIDX_PKEY_PARAM_MASKGENFUNC
|
|
#define PIDX_PKEY_PARAM_RSA_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST
|
|
-#define PIDX_PKEY_PARAM_RSA_N 136
|
|
-#define PIDX_PKEY_PARAM_RSA_PRIMES 259
|
|
-#define PIDX_PKEY_PARAM_RSA_PSS_SALTLEN 260
|
|
-#define PIDX_PKEY_PARAM_RSA_TEST_P1 261
|
|
-#define PIDX_PKEY_PARAM_RSA_TEST_P2 262
|
|
-#define PIDX_PKEY_PARAM_RSA_TEST_Q1 263
|
|
-#define PIDX_PKEY_PARAM_RSA_TEST_Q2 264
|
|
-#define PIDX_PKEY_PARAM_RSA_TEST_XP 265
|
|
-#define PIDX_PKEY_PARAM_RSA_TEST_XP1 266
|
|
-#define PIDX_PKEY_PARAM_RSA_TEST_XP2 267
|
|
-#define PIDX_PKEY_PARAM_RSA_TEST_XQ 268
|
|
-#define PIDX_PKEY_PARAM_RSA_TEST_XQ1 269
|
|
-#define PIDX_PKEY_PARAM_RSA_TEST_XQ2 270
|
|
-#define PIDX_PKEY_PARAM_SECURITY_BITS 271
|
|
+#define PIDX_PKEY_PARAM_RSA_N 137
|
|
+#define PIDX_PKEY_PARAM_RSA_PRIMES 270
|
|
+#define PIDX_PKEY_PARAM_RSA_PSS_SALTLEN 271
|
|
+#define PIDX_PKEY_PARAM_RSA_TEST_P1 272
|
|
+#define PIDX_PKEY_PARAM_RSA_TEST_P2 273
|
|
+#define PIDX_PKEY_PARAM_RSA_TEST_Q1 274
|
|
+#define PIDX_PKEY_PARAM_RSA_TEST_Q2 275
|
|
+#define PIDX_PKEY_PARAM_RSA_TEST_XP 276
|
|
+#define PIDX_PKEY_PARAM_RSA_TEST_XP1 277
|
|
+#define PIDX_PKEY_PARAM_RSA_TEST_XP2 278
|
|
+#define PIDX_PKEY_PARAM_RSA_TEST_XQ 279
|
|
+#define PIDX_PKEY_PARAM_RSA_TEST_XQ1 280
|
|
+#define PIDX_PKEY_PARAM_RSA_TEST_XQ2 281
|
|
+#define PIDX_PKEY_PARAM_SECURITY_BITS 282
|
|
+#define PIDX_PKEY_PARAM_SLH_DSA_SEED 140
|
|
#define PIDX_PKEY_PARAM_USE_COFACTOR_ECDH PIDX_PKEY_PARAM_USE_COFACTOR_FLAG
|
|
-#define PIDX_PKEY_PARAM_USE_COFACTOR_FLAG 272
|
|
-#define PIDX_PROV_PARAM_BUILDINFO 273
|
|
-#define PIDX_PROV_PARAM_CORE_MODULE_FILENAME 274
|
|
-#define PIDX_PROV_PARAM_CORE_PROV_NAME 275
|
|
-#define PIDX_PROV_PARAM_CORE_VERSION 276
|
|
-#define PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST 277
|
|
-#define PIDX_PROV_PARAM_DSA_SIGN_DISABLED 278
|
|
-#define PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK 279
|
|
-#define PIDX_PROV_PARAM_HKDF_DIGEST_CHECK 280
|
|
-#define PIDX_PROV_PARAM_HKDF_KEY_CHECK 281
|
|
-#define PIDX_PROV_PARAM_HMAC_KEY_CHECK 282
|
|
-#define PIDX_PROV_PARAM_KBKDF_KEY_CHECK 283
|
|
-#define PIDX_PROV_PARAM_KMAC_KEY_CHECK 284
|
|
-#define PIDX_PROV_PARAM_NAME 285
|
|
-#define PIDX_PROV_PARAM_NO_SHORT_MAC 286
|
|
-#define PIDX_PROV_PARAM_PBKDF2_LOWER_BOUND_CHECK 287
|
|
-#define PIDX_PROV_PARAM_RSA_PKCS15_PAD_DISABLED 288
|
|
-#define PIDX_PROV_PARAM_RSA_PSS_SALTLEN_CHECK 289
|
|
-#define PIDX_PROV_PARAM_RSA_SIGN_X931_PAD_DISABLED 290
|
|
-#define PIDX_PROV_PARAM_SECURITY_CHECKS 291
|
|
-#define PIDX_PROV_PARAM_SELF_TEST_DESC 292
|
|
-#define PIDX_PROV_PARAM_SELF_TEST_PHASE 293
|
|
-#define PIDX_PROV_PARAM_SELF_TEST_TYPE 294
|
|
-#define PIDX_PROV_PARAM_SIGNATURE_DIGEST_CHECK 295
|
|
-#define PIDX_PROV_PARAM_SSHKDF_DIGEST_CHECK 296
|
|
-#define PIDX_PROV_PARAM_SSHKDF_KEY_CHECK 297
|
|
-#define PIDX_PROV_PARAM_SSKDF_DIGEST_CHECK 298
|
|
-#define PIDX_PROV_PARAM_SSKDF_KEY_CHECK 299
|
|
-#define PIDX_PROV_PARAM_STATUS 300
|
|
-#define PIDX_PROV_PARAM_TDES_ENCRYPT_DISABLED 301
|
|
-#define PIDX_PROV_PARAM_TLS13_KDF_DIGEST_CHECK 302
|
|
-#define PIDX_PROV_PARAM_TLS13_KDF_KEY_CHECK 303
|
|
-#define PIDX_PROV_PARAM_TLS1_PRF_DIGEST_CHECK 304
|
|
-#define PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK 305
|
|
-#define PIDX_PROV_PARAM_TLS1_PRF_KEY_CHECK 306
|
|
-#define PIDX_PROV_PARAM_VERSION 114
|
|
-#define PIDX_PROV_PARAM_X942KDF_KEY_CHECK 307
|
|
-#define PIDX_PROV_PARAM_X963KDF_DIGEST_CHECK 308
|
|
-#define PIDX_PROV_PARAM_X963KDF_KEY_CHECK 309
|
|
+#define PIDX_PKEY_PARAM_USE_COFACTOR_FLAG 283
|
|
+#define PIDX_PROV_PARAM_BUILDINFO 284
|
|
+#define PIDX_PROV_PARAM_CORE_MODULE_FILENAME 285
|
|
+#define PIDX_PROV_PARAM_CORE_PROV_NAME 286
|
|
+#define PIDX_PROV_PARAM_CORE_VERSION 287
|
|
+#define PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST 288
|
|
+#define PIDX_PROV_PARAM_DSA_SIGN_DISABLED 289
|
|
+#define PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK 290
|
|
+#define PIDX_PROV_PARAM_HKDF_DIGEST_CHECK 291
|
|
+#define PIDX_PROV_PARAM_HKDF_KEY_CHECK 292
|
|
+#define PIDX_PROV_PARAM_HMAC_KEY_CHECK 293
|
|
+#define PIDX_PROV_PARAM_KBKDF_KEY_CHECK 294
|
|
+#define PIDX_PROV_PARAM_KMAC_KEY_CHECK 295
|
|
+#define PIDX_PROV_PARAM_NAME 296
|
|
+#define PIDX_PROV_PARAM_NO_SHORT_MAC 297
|
|
+#define PIDX_PROV_PARAM_PBKDF2_LOWER_BOUND_CHECK 298
|
|
+#define PIDX_PROV_PARAM_RSA_PKCS15_PAD_DISABLED 299
|
|
+#define PIDX_PROV_PARAM_RSA_PSS_SALTLEN_CHECK 300
|
|
+#define PIDX_PROV_PARAM_RSA_SIGN_X931_PAD_DISABLED 301
|
|
+#define PIDX_PROV_PARAM_SECURITY_CHECKS 302
|
|
+#define PIDX_PROV_PARAM_SELF_TEST_DESC 303
|
|
+#define PIDX_PROV_PARAM_SELF_TEST_PHASE 304
|
|
+#define PIDX_PROV_PARAM_SELF_TEST_TYPE 305
|
|
+#define PIDX_PROV_PARAM_SIGNATURE_DIGEST_CHECK 306
|
|
+#define PIDX_PROV_PARAM_SSHKDF_DIGEST_CHECK 307
|
|
+#define PIDX_PROV_PARAM_SSHKDF_KEY_CHECK 308
|
|
+#define PIDX_PROV_PARAM_SSKDF_DIGEST_CHECK 309
|
|
+#define PIDX_PROV_PARAM_SSKDF_KEY_CHECK 310
|
|
+#define PIDX_PROV_PARAM_STATUS 311
|
|
+#define PIDX_PROV_PARAM_TDES_ENCRYPT_DISABLED 312
|
|
+#define PIDX_PROV_PARAM_TLS13_KDF_DIGEST_CHECK 313
|
|
+#define PIDX_PROV_PARAM_TLS13_KDF_KEY_CHECK 314
|
|
+#define PIDX_PROV_PARAM_TLS1_PRF_DIGEST_CHECK 315
|
|
+#define PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK 316
|
|
+#define PIDX_PROV_PARAM_TLS1_PRF_KEY_CHECK 317
|
|
+#define PIDX_PROV_PARAM_VERSION 115
|
|
+#define PIDX_PROV_PARAM_X942KDF_KEY_CHECK 318
|
|
+#define PIDX_PROV_PARAM_X963KDF_DIGEST_CHECK 319
|
|
+#define PIDX_PROV_PARAM_X963KDF_KEY_CHECK 320
|
|
#define PIDX_RAND_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
|
|
-#define PIDX_RAND_PARAM_GENERATE 310
|
|
-#define PIDX_RAND_PARAM_MAX_REQUEST 311
|
|
-#define PIDX_RAND_PARAM_STATE 312
|
|
-#define PIDX_RAND_PARAM_STRENGTH 313
|
|
-#define PIDX_RAND_PARAM_TEST_ENTROPY 314
|
|
-#define PIDX_RAND_PARAM_TEST_NONCE 315
|
|
+#define PIDX_RAND_PARAM_GENERATE 321
|
|
+#define PIDX_RAND_PARAM_MAX_REQUEST 322
|
|
+#define PIDX_RAND_PARAM_STATE 323
|
|
+#define PIDX_RAND_PARAM_STRENGTH 324
|
|
+#define PIDX_RAND_PARAM_TEST_ENTROPY 325
|
|
+#define PIDX_RAND_PARAM_TEST_NONCE 326
|
|
+#define PIDX_SIGNATURE_PARAM_ADD_RANDOM 327
|
|
#define PIDX_SIGNATURE_PARAM_ALGORITHM_ID PIDX_PKEY_PARAM_ALGORITHM_ID
|
|
#define PIDX_SIGNATURE_PARAM_ALGORITHM_ID_PARAMS PIDX_PKEY_PARAM_ALGORITHM_ID_PARAMS
|
|
-#define PIDX_SIGNATURE_PARAM_CONTEXT_STRING 316
|
|
+#define PIDX_SIGNATURE_PARAM_CONTEXT_STRING 328
|
|
+#define PIDX_SIGNATURE_PARAM_DETERMINISTIC 329
|
|
#define PIDX_SIGNATURE_PARAM_DIGEST PIDX_PKEY_PARAM_DIGEST
|
|
#define PIDX_SIGNATURE_PARAM_DIGEST_SIZE PIDX_PKEY_PARAM_DIGEST_SIZE
|
|
#define PIDX_SIGNATURE_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR
|
|
#define PIDX_SIGNATURE_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK
|
|
#define PIDX_SIGNATURE_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK
|
|
-#define PIDX_SIGNATURE_PARAM_FIPS_RSA_PSS_SALTLEN_CHECK 289
|
|
+#define PIDX_SIGNATURE_PARAM_FIPS_RSA_PSS_SALTLEN_CHECK 300
|
|
#define PIDX_SIGNATURE_PARAM_FIPS_SIGN_CHECK PIDX_PKEY_PARAM_FIPS_SIGN_CHECK
|
|
-#define PIDX_SIGNATURE_PARAM_FIPS_SIGN_X931_PAD_CHECK 317
|
|
-#define PIDX_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE 318
|
|
-#define PIDX_SIGNATURE_PARAM_INSTANCE 319
|
|
-#define PIDX_SIGNATURE_PARAM_KAT 320
|
|
+#define PIDX_SIGNATURE_PARAM_FIPS_SIGN_X931_PAD_CHECK 330
|
|
+#define PIDX_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE 331
|
|
+#define PIDX_SIGNATURE_PARAM_INSTANCE 332
|
|
+#define PIDX_SIGNATURE_PARAM_KAT 333
|
|
+#define PIDX_SIGNATURE_PARAM_MESSAGE_ENCODING 334
|
|
#define PIDX_SIGNATURE_PARAM_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST
|
|
#define PIDX_SIGNATURE_PARAM_MGF1_PROPERTIES PIDX_PKEY_PARAM_MGF1_PROPERTIES
|
|
-#define PIDX_SIGNATURE_PARAM_NONCE_TYPE 321
|
|
+#define PIDX_SIGNATURE_PARAM_MU 335
|
|
+#define PIDX_SIGNATURE_PARAM_NONCE_TYPE 336
|
|
#define PIDX_SIGNATURE_PARAM_PAD_MODE PIDX_PKEY_PARAM_PAD_MODE
|
|
#define PIDX_SIGNATURE_PARAM_PROPERTIES PIDX_PKEY_PARAM_PROPERTIES
|
|
-#define PIDX_SIGNATURE_PARAM_PSS_SALTLEN 260
|
|
-#define PIDX_SIGNATURE_PARAM_SIGNATURE 322
|
|
-#define PIDX_STORE_PARAM_ALIAS 323
|
|
+#define PIDX_SIGNATURE_PARAM_PSS_SALTLEN 271
|
|
+#define PIDX_SIGNATURE_PARAM_SIGNATURE 337
|
|
+#define PIDX_SIGNATURE_PARAM_TEST_ENTROPY 338
|
|
+#define PIDX_SKEY_PARAM_KEY_LENGTH 339
|
|
+#define PIDX_SKEY_PARAM_RAW_BYTES 340
|
|
+#define PIDX_STORE_PARAM_ALIAS 341
|
|
#define PIDX_STORE_PARAM_DIGEST 3
|
|
-#define PIDX_STORE_PARAM_EXPECT 324
|
|
-#define PIDX_STORE_PARAM_FINGERPRINT 325
|
|
-#define PIDX_STORE_PARAM_INPUT_TYPE 326
|
|
-#define PIDX_STORE_PARAM_ISSUER 285
|
|
+#define PIDX_STORE_PARAM_EXPECT 342
|
|
+#define PIDX_STORE_PARAM_FINGERPRINT 343
|
|
+#define PIDX_STORE_PARAM_INPUT_TYPE 174
|
|
+#define PIDX_STORE_PARAM_ISSUER 296
|
|
#define PIDX_STORE_PARAM_PROPERTIES 7
|
|
-#define PIDX_STORE_PARAM_SERIAL 327
|
|
-#define PIDX_STORE_PARAM_SUBJECT 328
|
|
+#define PIDX_STORE_PARAM_SERIAL 344
|
|
+#define PIDX_STORE_PARAM_SUBJECT 345
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h
|
|
index 2425fa10cf..d6c943ac69 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h
|
|
@@ -2,7 +2,7 @@
|
|
* WARNING: do not edit!
|
|
* Generated by Makefile from include/openssl/asn1.h.in
|
|
*
|
|
- * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
*
|
|
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
* this file except in compliance with the License. You can obtain a copy
|
|
@@ -53,14 +53,14 @@ extern "C" {
|
|
# define V_ASN1_PRIMITIVE_TAG 0x1f
|
|
# define V_ASN1_PRIMATIVE_TAG /*compat*/ V_ASN1_PRIMITIVE_TAG
|
|
|
|
-# define V_ASN1_APP_CHOOSE -2/* let the recipient choose */
|
|
-# define V_ASN1_OTHER -3/* used in ASN1_TYPE */
|
|
-# define V_ASN1_ANY -4/* used in ASN1 template code */
|
|
+# define V_ASN1_APP_CHOOSE -2 /* let the recipient choose */
|
|
+# define V_ASN1_OTHER -3 /* used in ASN1_TYPE */
|
|
+# define V_ASN1_ANY -4 /* used in ASN1 template code */
|
|
|
|
# define V_ASN1_UNDEF -1
|
|
/* ASN.1 tag values */
|
|
# define V_ASN1_EOC 0
|
|
-# define V_ASN1_BOOLEAN 1 /**/
|
|
+# define V_ASN1_BOOLEAN 1
|
|
# define V_ASN1_INTEGER 2
|
|
# define V_ASN1_BIT_STRING 3
|
|
# define V_ASN1_OCTET_STRING 4
|
|
@@ -73,19 +73,19 @@ extern "C" {
|
|
# define V_ASN1_UTF8STRING 12
|
|
# define V_ASN1_SEQUENCE 16
|
|
# define V_ASN1_SET 17
|
|
-# define V_ASN1_NUMERICSTRING 18 /**/
|
|
+# define V_ASN1_NUMERICSTRING 18
|
|
# define V_ASN1_PRINTABLESTRING 19
|
|
# define V_ASN1_T61STRING 20
|
|
-# define V_ASN1_TELETEXSTRING 20/* alias */
|
|
-# define V_ASN1_VIDEOTEXSTRING 21 /**/
|
|
+# define V_ASN1_TELETEXSTRING 20 /* alias */
|
|
+# define V_ASN1_VIDEOTEXSTRING 21
|
|
# define V_ASN1_IA5STRING 22
|
|
# define V_ASN1_UTCTIME 23
|
|
-# define V_ASN1_GENERALIZEDTIME 24 /**/
|
|
-# define V_ASN1_GRAPHICSTRING 25 /**/
|
|
-# define V_ASN1_ISO64STRING 26 /**/
|
|
-# define V_ASN1_VISIBLESTRING 26/* alias */
|
|
-# define V_ASN1_GENERALSTRING 27 /**/
|
|
-# define V_ASN1_UNIVERSALSTRING 28 /**/
|
|
+# define V_ASN1_GENERALIZEDTIME 24
|
|
+# define V_ASN1_GRAPHICSTRING 25
|
|
+# define V_ASN1_ISO64STRING 26
|
|
+# define V_ASN1_VISIBLESTRING 26 /* alias */
|
|
+# define V_ASN1_GENERALSTRING 27
|
|
+# define V_ASN1_UNIVERSALSTRING 28
|
|
# define V_ASN1_BMPSTRING 30
|
|
|
|
/*
|
|
@@ -278,7 +278,7 @@ typedef struct ASN1_TLC_st ASN1_TLC;
|
|
/* This is just an opaque pointer */
|
|
typedef struct ASN1_VALUE_st ASN1_VALUE;
|
|
|
|
-/* Declare ASN1 functions: the implement macro in in asn1t.h */
|
|
+/* Declare ASN1 functions: the implement macro is in asn1t.h */
|
|
|
|
/*
|
|
* The mysterious 'extern' that's passed to some macros is innocuous,
|
|
@@ -371,6 +371,7 @@ typedef struct ASN1_VALUE_st ASN1_VALUE;
|
|
|
|
typedef void *d2i_of_void(void **, const unsigned char **, long);
|
|
typedef int i2d_of_void(const void *, unsigned char **);
|
|
+typedef int OSSL_i2d_of_void_ctx(const void *, unsigned char **, void *vctx);
|
|
|
|
/*-
|
|
* The following macros and typedefs allow an ASN1_ITEM
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h
|
|
index 89ed6c060d..8a1f9f039b 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h
|
|
@@ -2,7 +2,7 @@
|
|
* WARNING: do not edit!
|
|
* Generated by Makefile from include/openssl/bio.h.in
|
|
*
|
|
- * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
*
|
|
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
* this file except in compliance with the License. You can obtain a copy
|
|
@@ -193,6 +193,7 @@ extern "C" {
|
|
# define BIO_CTRL_GET_RPOLL_DESCRIPTOR 91
|
|
# define BIO_CTRL_GET_WPOLL_DESCRIPTOR 92
|
|
# define BIO_CTRL_DGRAM_DETECT_PEER_ADDR 93
|
|
+# define BIO_CTRL_DGRAM_SET0_LOCAL_ADDR 94
|
|
|
|
# define BIO_DGRAM_CAP_NONE 0U
|
|
# define BIO_DGRAM_CAP_HANDLES_SRC_ADDR (1U << 0)
|
|
@@ -693,6 +694,8 @@ int BIO_ctrl_reset_read_request(BIO *b);
|
|
(unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_MTU, 0, NULL)
|
|
# define BIO_dgram_set_mtu(b, mtu) \
|
|
(int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_MTU, (mtu), NULL)
|
|
+# define BIO_dgram_set0_local_addr(b, addr) \
|
|
+ (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET0_LOCAL_ADDR, 0, (addr))
|
|
|
|
/* ctrl macros for BIO_f_prefix */
|
|
# define BIO_set_prefix(b,p) BIO_ctrl((b), BIO_CTRL_SET_PREFIX, 0, (void *)(p))
|
|
@@ -965,9 +968,6 @@ ossl_bio__attr__((__format__(ossl_bio__printf__, 3, 0)));
|
|
|
|
BIO_METHOD *BIO_meth_new(int type, const char *name);
|
|
void BIO_meth_free(BIO_METHOD *biom);
|
|
-int (*BIO_meth_get_write(const BIO_METHOD *biom)) (BIO *, const char *, int);
|
|
-int (*BIO_meth_get_write_ex(const BIO_METHOD *biom)) (BIO *, const char *, size_t,
|
|
- size_t *);
|
|
int BIO_meth_set_write(BIO_METHOD *biom,
|
|
int (*write) (BIO *, const char *, int));
|
|
int BIO_meth_set_write_ex(BIO_METHOD *biom,
|
|
@@ -975,11 +975,6 @@ int BIO_meth_set_write_ex(BIO_METHOD *biom,
|
|
int BIO_meth_set_sendmmsg(BIO_METHOD *biom,
|
|
int (*f) (BIO *, BIO_MSG *, size_t, size_t,
|
|
uint64_t, size_t *));
|
|
-int (*BIO_meth_get_sendmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *,
|
|
- size_t, size_t,
|
|
- uint64_t, size_t *);
|
|
-int (*BIO_meth_get_read(const BIO_METHOD *biom)) (BIO *, char *, int);
|
|
-int (*BIO_meth_get_read_ex(const BIO_METHOD *biom)) (BIO *, char *, size_t, size_t *);
|
|
int BIO_meth_set_read(BIO_METHOD *biom,
|
|
int (*read) (BIO *, char *, int));
|
|
int BIO_meth_set_read_ex(BIO_METHOD *biom,
|
|
@@ -987,28 +982,40 @@ int BIO_meth_set_read_ex(BIO_METHOD *biom,
|
|
int BIO_meth_set_recvmmsg(BIO_METHOD *biom,
|
|
int (*f) (BIO *, BIO_MSG *, size_t, size_t,
|
|
uint64_t, size_t *));
|
|
-int (*BIO_meth_get_recvmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *,
|
|
- size_t, size_t,
|
|
- uint64_t, size_t *);
|
|
-int (*BIO_meth_get_puts(const BIO_METHOD *biom)) (BIO *, const char *);
|
|
int BIO_meth_set_puts(BIO_METHOD *biom,
|
|
int (*puts) (BIO *, const char *));
|
|
-int (*BIO_meth_get_gets(const BIO_METHOD *biom)) (BIO *, char *, int);
|
|
int BIO_meth_set_gets(BIO_METHOD *biom,
|
|
int (*ossl_gets) (BIO *, char *, int));
|
|
-long (*BIO_meth_get_ctrl(const BIO_METHOD *biom)) (BIO *, int, long, void *);
|
|
int BIO_meth_set_ctrl(BIO_METHOD *biom,
|
|
long (*ctrl) (BIO *, int, long, void *));
|
|
-int (*BIO_meth_get_create(const BIO_METHOD *bion)) (BIO *);
|
|
int BIO_meth_set_create(BIO_METHOD *biom, int (*create) (BIO *));
|
|
-int (*BIO_meth_get_destroy(const BIO_METHOD *biom)) (BIO *);
|
|
int BIO_meth_set_destroy(BIO_METHOD *biom, int (*destroy) (BIO *));
|
|
-long (*BIO_meth_get_callback_ctrl(const BIO_METHOD *biom))
|
|
- (BIO *, int, BIO_info_cb *);
|
|
int BIO_meth_set_callback_ctrl(BIO_METHOD *biom,
|
|
long (*callback_ctrl) (BIO *, int,
|
|
BIO_info_cb *));
|
|
-
|
|
+# ifndef OPENSSL_NO_DEPRECATED_3_5
|
|
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_write(const BIO_METHOD *biom)) (BIO *, const char *,
|
|
+ int);
|
|
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_write_ex(const BIO_METHOD *biom)) (BIO *, const char *,
|
|
+ size_t, size_t *);
|
|
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_sendmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *,
|
|
+ size_t, size_t,
|
|
+ uint64_t, size_t *);
|
|
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_read(const BIO_METHOD *biom)) (BIO *, char *, int);
|
|
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_read_ex(const BIO_METHOD *biom)) (BIO *, char *,
|
|
+ size_t, size_t *);
|
|
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_recvmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *,
|
|
+ size_t, size_t,
|
|
+ uint64_t, size_t *);
|
|
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_puts(const BIO_METHOD *biom)) (BIO *, const char *);
|
|
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_gets(const BIO_METHOD *biom)) (BIO *, char *, int);
|
|
+OSSL_DEPRECATEDIN_3_5 long (*BIO_meth_get_ctrl(const BIO_METHOD *biom)) (BIO *, int,
|
|
+ long, void *);
|
|
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_create(const BIO_METHOD *bion)) (BIO *);
|
|
+OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_destroy(const BIO_METHOD *biom)) (BIO *);
|
|
+OSSL_DEPRECATEDIN_3_5 long (*BIO_meth_get_callback_ctrl(const BIO_METHOD *biom)) (BIO *, int,
|
|
+ BIO_info_cb *);
|
|
+# endif
|
|
# ifdef __cplusplus
|
|
}
|
|
# endif
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h
|
|
index 0f21a51930..6713419cfc 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h
|
|
@@ -2,7 +2,7 @@
|
|
* WARNING: do not edit!
|
|
* Generated by Makefile from include/openssl/cms.h.in
|
|
*
|
|
- * Copyright 2008-2022 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ * Copyright 2008-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
*
|
|
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
* this file except in compliance with the License. You can obtain a copy
|
|
@@ -155,6 +155,8 @@ DECLARE_ASN1_FUNCTIONS(CMS_ContentInfo)
|
|
DECLARE_ASN1_FUNCTIONS(CMS_ReceiptRequest)
|
|
DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo)
|
|
|
|
+DECLARE_ASN1_DUP_FUNCTION(CMS_EnvelopedData)
|
|
+
|
|
CMS_ContentInfo *CMS_ContentInfo_new_ex(OSSL_LIB_CTX *libctx, const char *propq);
|
|
|
|
# define CMS_SIGNERINFO_ISSUER_SERIAL 0
|
|
@@ -194,6 +196,7 @@ CMS_ContentInfo *CMS_ContentInfo_new_ex(OSSL_LIB_CTX *libctx, const char *propq)
|
|
# define CMS_ASCIICRLF 0x80000
|
|
# define CMS_CADES 0x100000
|
|
# define CMS_USE_ORIGINATOR_KEYID 0x200000
|
|
+# define CMS_NO_SIGNING_TIME 0x400000
|
|
|
|
const ASN1_OBJECT *CMS_get0_type(const CMS_ContentInfo *cms);
|
|
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h
|
|
index a292da4b5b..809941506e 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h
|
|
@@ -166,6 +166,9 @@ extern "C" {
|
|
# ifndef OPENSSL_NO_FILENAMES
|
|
# define OPENSSL_NO_FILENAMES
|
|
# endif
|
|
+# ifndef OPENSSL_NO_FIPS_JITTER
|
|
+# define OPENSSL_NO_FIPS_JITTER
|
|
+# endif
|
|
# ifndef OPENSSL_NO_FIPS_POST
|
|
# define OPENSSL_NO_FIPS_POST
|
|
# endif
|
|
@@ -184,6 +187,9 @@ extern "C" {
|
|
# ifndef OPENSSL_NO_H3DEMO
|
|
# define OPENSSL_NO_H3DEMO
|
|
# endif
|
|
+# ifndef OPENSSL_NO_HQINTEROP
|
|
+# define OPENSSL_NO_HQINTEROP
|
|
+# endif
|
|
# ifndef OPENSSL_NO_IDEA
|
|
# define OPENSSL_NO_IDEA
|
|
# endif
|
|
@@ -295,6 +301,9 @@ extern "C" {
|
|
# ifndef OPENSSL_NO_SSL3_METHOD
|
|
# define OPENSSL_NO_SSL3_METHOD
|
|
# endif
|
|
+# ifndef OPENSSL_NO_SSLKEYLOG
|
|
+# define OPENSSL_NO_SSLKEYLOG
|
|
+# endif
|
|
# ifndef OPENSSL_NO_STDIO
|
|
# define OPENSSL_NO_STDIO
|
|
# endif
|
|
@@ -307,6 +316,9 @@ extern "C" {
|
|
# ifndef OPENSSL_NO_THREAD_POOL
|
|
# define OPENSSL_NO_THREAD_POOL
|
|
# endif
|
|
+# ifndef OPENSSL_NO_TLS_DEPRECATED_EC
|
|
+# define OPENSSL_NO_TLS_DEPRECATED_EC
|
|
+# endif
|
|
# ifndef OPENSSL_NO_TLS1_3
|
|
# define OPENSSL_NO_TLS1_3
|
|
# endif
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h
|
|
index 14bb6dca8b..7bf4eb30b4 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h
|
|
@@ -178,6 +178,9 @@ extern "C" {
|
|
# ifndef OPENSSL_NO_FILENAMES
|
|
# define OPENSSL_NO_FILENAMES
|
|
# endif
|
|
+# ifndef OPENSSL_NO_FIPS_JITTER
|
|
+# define OPENSSL_NO_FIPS_JITTER
|
|
+# endif
|
|
# ifndef OPENSSL_NO_FIPS_POST
|
|
# define OPENSSL_NO_FIPS_POST
|
|
# endif
|
|
@@ -196,6 +199,9 @@ extern "C" {
|
|
# ifndef OPENSSL_NO_H3DEMO
|
|
# define OPENSSL_NO_H3DEMO
|
|
# endif
|
|
+# ifndef OPENSSL_NO_HQINTEROP
|
|
+# define OPENSSL_NO_HQINTEROP
|
|
+# endif
|
|
# ifndef OPENSSL_NO_IDEA
|
|
# define OPENSSL_NO_IDEA
|
|
# endif
|
|
@@ -307,6 +313,9 @@ extern "C" {
|
|
# ifndef OPENSSL_NO_SSL3_METHOD
|
|
# define OPENSSL_NO_SSL3_METHOD
|
|
# endif
|
|
+# ifndef OPENSSL_NO_SSLKEYLOG
|
|
+# define OPENSSL_NO_SSLKEYLOG
|
|
+# endif
|
|
# ifndef OPENSSL_NO_STDIO
|
|
# define OPENSSL_NO_STDIO
|
|
# endif
|
|
@@ -319,6 +328,9 @@ extern "C" {
|
|
# ifndef OPENSSL_NO_THREAD_POOL
|
|
# define OPENSSL_NO_THREAD_POOL
|
|
# endif
|
|
+# ifndef OPENSSL_NO_TLS_DEPRECATED_EC
|
|
+# define OPENSSL_NO_TLS_DEPRECATED_EC
|
|
+# endif
|
|
# ifndef OPENSSL_NO_TLS1_3
|
|
# define OPENSSL_NO_TLS1_3
|
|
# endif
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h
|
|
index 072a6b8a8c..3ed524600b 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h
|
|
@@ -2,7 +2,7 @@
|
|
* WARNING: do not edit!
|
|
* Generated by Makefile from include/openssl/core_names.h.in
|
|
*
|
|
- * Copyright 2019-2023 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ * Copyright 2019-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
*
|
|
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
* this file except in compliance with the License. You can obtain a copy
|
|
@@ -104,10 +104,17 @@ extern "C" {
|
|
# define OSSL_PKEY_EC_GROUP_CHECK_NAMED "named"
|
|
# define OSSL_PKEY_EC_GROUP_CHECK_NAMED_NIST "named-nist"
|
|
|
|
+/* PROV_SKEY well known key types */
|
|
+# define OSSL_SKEY_TYPE_GENERIC "GENERIC-SECRET"
|
|
+# define OSSL_SKEY_TYPE_AES "AES"
|
|
+
|
|
/* OSSL_KEM_PARAM_OPERATION values */
|
|
#define OSSL_KEM_PARAM_OPERATION_RSASVE "RSASVE"
|
|
#define OSSL_KEM_PARAM_OPERATION_DHKEM "DHKEM"
|
|
|
|
+/* Provider configuration variables */
|
|
+#define OSSL_PKEY_RETAIN_SEED "pkey_retain_seed"
|
|
+
|
|
/* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */
|
|
# define OSSL_ALG_PARAM_ALGORITHM_ID "algorithm-id"
|
|
# define OSSL_ALG_PARAM_ALGORITHM_ID_PARAMS "algorithm-id-params"
|
|
@@ -148,7 +155,9 @@ extern "C" {
|
|
# define OSSL_CAPABILITY_TLS_SIGALG_IANA_NAME "tls-sigalg-iana-name"
|
|
# define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE "tls-sigalg-keytype"
|
|
# define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE_OID "tls-sigalg-keytype-oid"
|
|
+# define OSSL_CAPABILITY_TLS_SIGALG_MAX_DTLS "tls-max-dtls"
|
|
# define OSSL_CAPABILITY_TLS_SIGALG_MAX_TLS "tls-max-tls"
|
|
+# define OSSL_CAPABILITY_TLS_SIGALG_MIN_DTLS "tls-min-dtls"
|
|
# define OSSL_CAPABILITY_TLS_SIGALG_MIN_TLS "tls-min-tls"
|
|
# define OSSL_CAPABILITY_TLS_SIGALG_NAME "tls-sigalg-name"
|
|
# define OSSL_CAPABILITY_TLS_SIGALG_OID "tls-sigalg-oid"
|
|
@@ -183,6 +192,7 @@ extern "C" {
|
|
# define OSSL_CIPHER_PARAM_MODE "mode"
|
|
# define OSSL_CIPHER_PARAM_NUM "num"
|
|
# define OSSL_CIPHER_PARAM_PADDING "padding"
|
|
+# define OSSL_CIPHER_PARAM_PIPELINE_AEAD_TAG "pipeline-tag"
|
|
# define OSSL_CIPHER_PARAM_RANDOM_KEY "randkey"
|
|
# define OSSL_CIPHER_PARAM_RC2_KEYBITS "keybits"
|
|
# define OSSL_CIPHER_PARAM_ROUNDS "rounds"
|
|
@@ -338,6 +348,7 @@ extern "C" {
|
|
# define OSSL_OBJECT_PARAM_DATA_STRUCTURE "data-structure"
|
|
# define OSSL_OBJECT_PARAM_DATA_TYPE "data-type"
|
|
# define OSSL_OBJECT_PARAM_DESC "desc"
|
|
+# define OSSL_OBJECT_PARAM_INPUT_TYPE "input-type"
|
|
# define OSSL_OBJECT_PARAM_REFERENCE "reference"
|
|
# define OSSL_OBJECT_PARAM_TYPE "type"
|
|
# define OSSL_PASSPHRASE_PARAM_INFO "info"
|
|
@@ -402,6 +413,17 @@ extern "C" {
|
|
# define OSSL_PKEY_PARAM_MAX_SIZE "max-size"
|
|
# define OSSL_PKEY_PARAM_MGF1_DIGEST "mgf1-digest"
|
|
# define OSSL_PKEY_PARAM_MGF1_PROPERTIES "mgf1-properties"
|
|
+# define OSSL_PKEY_PARAM_ML_DSA_INPUT_FORMATS "ml-dsa.input_formats"
|
|
+# define OSSL_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS "ml-dsa.output_formats"
|
|
+# define OSSL_PKEY_PARAM_ML_DSA_PREFER_SEED "ml-dsa.prefer_seed"
|
|
+# define OSSL_PKEY_PARAM_ML_DSA_RETAIN_SEED "ml-dsa.retain_seed"
|
|
+# define OSSL_PKEY_PARAM_ML_DSA_SEED "seed"
|
|
+# define OSSL_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE "ml-kem.import_pct_type"
|
|
+# define OSSL_PKEY_PARAM_ML_KEM_INPUT_FORMATS "ml-kem.input_formats"
|
|
+# define OSSL_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS "ml-kem.output_formats"
|
|
+# define OSSL_PKEY_PARAM_ML_KEM_PREFER_SEED "ml-kem.prefer_seed"
|
|
+# define OSSL_PKEY_PARAM_ML_KEM_RETAIN_SEED "ml-kem.retain_seed"
|
|
+# define OSSL_PKEY_PARAM_ML_KEM_SEED "seed"
|
|
# define OSSL_PKEY_PARAM_PAD_MODE "pad-mode"
|
|
# define OSSL_PKEY_PARAM_PRIV_KEY "priv"
|
|
# define OSSL_PKEY_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES
|
|
@@ -460,6 +482,7 @@ extern "C" {
|
|
# define OSSL_PKEY_PARAM_RSA_TEST_XQ1 "xq1"
|
|
# define OSSL_PKEY_PARAM_RSA_TEST_XQ2 "xq2"
|
|
# define OSSL_PKEY_PARAM_SECURITY_BITS "security-bits"
|
|
+# define OSSL_PKEY_PARAM_SLH_DSA_SEED "seed"
|
|
# define OSSL_PKEY_PARAM_USE_COFACTOR_ECDH OSSL_PKEY_PARAM_USE_COFACTOR_FLAG
|
|
# define OSSL_PKEY_PARAM_USE_COFACTOR_FLAG "use-cofactor-flag"
|
|
# define OSSL_PROV_PARAM_BUILDINFO "buildinfo"
|
|
@@ -507,9 +530,11 @@ extern "C" {
|
|
# define OSSL_RAND_PARAM_STRENGTH "strength"
|
|
# define OSSL_RAND_PARAM_TEST_ENTROPY "test_entropy"
|
|
# define OSSL_RAND_PARAM_TEST_NONCE "test_nonce"
|
|
+# define OSSL_SIGNATURE_PARAM_ADD_RANDOM "additional-random"
|
|
# define OSSL_SIGNATURE_PARAM_ALGORITHM_ID OSSL_PKEY_PARAM_ALGORITHM_ID
|
|
# define OSSL_SIGNATURE_PARAM_ALGORITHM_ID_PARAMS OSSL_PKEY_PARAM_ALGORITHM_ID_PARAMS
|
|
# define OSSL_SIGNATURE_PARAM_CONTEXT_STRING "context-string"
|
|
+# define OSSL_SIGNATURE_PARAM_DETERMINISTIC "deterministic"
|
|
# define OSSL_SIGNATURE_PARAM_DIGEST OSSL_PKEY_PARAM_DIGEST
|
|
# define OSSL_SIGNATURE_PARAM_DIGEST_SIZE OSSL_PKEY_PARAM_DIGEST_SIZE
|
|
# define OSSL_SIGNATURE_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR
|
|
@@ -521,13 +546,18 @@ extern "C" {
|
|
# define OSSL_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE "verify-message"
|
|
# define OSSL_SIGNATURE_PARAM_INSTANCE "instance"
|
|
# define OSSL_SIGNATURE_PARAM_KAT "kat"
|
|
+# define OSSL_SIGNATURE_PARAM_MESSAGE_ENCODING "message-encoding"
|
|
# define OSSL_SIGNATURE_PARAM_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST
|
|
# define OSSL_SIGNATURE_PARAM_MGF1_PROPERTIES OSSL_PKEY_PARAM_MGF1_PROPERTIES
|
|
+# define OSSL_SIGNATURE_PARAM_MU "mu"
|
|
# define OSSL_SIGNATURE_PARAM_NONCE_TYPE "nonce-type"
|
|
# define OSSL_SIGNATURE_PARAM_PAD_MODE OSSL_PKEY_PARAM_PAD_MODE
|
|
# define OSSL_SIGNATURE_PARAM_PROPERTIES OSSL_PKEY_PARAM_PROPERTIES
|
|
# define OSSL_SIGNATURE_PARAM_PSS_SALTLEN "saltlen"
|
|
# define OSSL_SIGNATURE_PARAM_SIGNATURE "signature"
|
|
+# define OSSL_SIGNATURE_PARAM_TEST_ENTROPY "test-entropy"
|
|
+# define OSSL_SKEY_PARAM_KEY_LENGTH "key-length"
|
|
+# define OSSL_SKEY_PARAM_RAW_BYTES "raw-bytes"
|
|
# define OSSL_STORE_PARAM_ALIAS "alias"
|
|
# define OSSL_STORE_PARAM_DIGEST "digest"
|
|
# define OSSL_STORE_PARAM_EXPECT "expect"
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h
|
|
index 9900edfdde..551394d314 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h
|
|
@@ -2,7 +2,7 @@
|
|
* WARNING: do not edit!
|
|
* Generated by Makefile from include/openssl/crmf.h.in
|
|
*
|
|
- * Copyright 2007-2024 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ * Copyright 2007-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
* Copyright Nokia 2007-2019
|
|
* Copyright Siemens AG 2015-2019
|
|
*
|
|
@@ -26,6 +26,7 @@
|
|
# include <openssl/safestack.h>
|
|
# include <openssl/crmferr.h>
|
|
# include <openssl/x509v3.h> /* for GENERAL_NAME etc. */
|
|
+# include <openssl/cms.h>
|
|
|
|
/* explicit #includes not strictly needed since implied by the above: */
|
|
# include <openssl/types.h>
|
|
@@ -44,8 +45,11 @@ extern "C" {
|
|
# define OSSL_CRMF_SUBSEQUENTMESSAGE_ENCRCERT 0
|
|
# define OSSL_CRMF_SUBSEQUENTMESSAGE_CHALLENGERESP 1
|
|
typedef struct ossl_crmf_encryptedvalue_st OSSL_CRMF_ENCRYPTEDVALUE;
|
|
-
|
|
DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_ENCRYPTEDVALUE)
|
|
+
|
|
+typedef struct ossl_crmf_encryptedkey_st OSSL_CRMF_ENCRYPTEDKEY;
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_ENCRYPTEDKEY)
|
|
+
|
|
typedef struct ossl_crmf_msg_st OSSL_CRMF_MSG;
|
|
DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_MSG)
|
|
DECLARE_ASN1_DUP_FUNCTION(OSSL_CRMF_MSG)
|
|
@@ -248,10 +252,24 @@ int OSSL_CRMF_CERTTEMPLATE_fill(OSSL_CRMF_CERTTEMPLATE *tmpl,
|
|
const X509_NAME *subject,
|
|
const X509_NAME *issuer,
|
|
const ASN1_INTEGER *serial);
|
|
-X509
|
|
-*OSSL_CRMF_ENCRYPTEDVALUE_get1_encCert(const OSSL_CRMF_ENCRYPTEDVALUE *ecert,
|
|
- OSSL_LIB_CTX *libctx, const char *propq,
|
|
- EVP_PKEY *pkey);
|
|
+X509 *OSSL_CRMF_ENCRYPTEDVALUE_get1_encCert(const OSSL_CRMF_ENCRYPTEDVALUE *ecert,
|
|
+ OSSL_LIB_CTX *libctx, const char *propq,
|
|
+ EVP_PKEY *pkey);
|
|
+X509 *OSSL_CRMF_ENCRYPTEDKEY_get1_encCert(const OSSL_CRMF_ENCRYPTEDKEY *ecert,
|
|
+ OSSL_LIB_CTX *libctx, const char *propq,
|
|
+ EVP_PKEY *pkey, unsigned int flags);
|
|
+unsigned char
|
|
+*OSSL_CRMF_ENCRYPTEDVALUE_decrypt(const OSSL_CRMF_ENCRYPTEDVALUE *enc,
|
|
+ OSSL_LIB_CTX *libctx, const char *propq,
|
|
+ EVP_PKEY *pkey, int *outlen);
|
|
+EVP_PKEY *OSSL_CRMF_ENCRYPTEDKEY_get1_pkey(const OSSL_CRMF_ENCRYPTEDKEY *encryptedKey,
|
|
+ X509_STORE *ts, STACK_OF(X509) *extra, EVP_PKEY *pkey,
|
|
+ X509 *cert, ASN1_OCTET_STRING *secret,
|
|
+ OSSL_LIB_CTX *libctx, const char *propq);
|
|
+int OSSL_CRMF_MSG_centralkeygen_requested(const OSSL_CRMF_MSG *crm, const X509_REQ *p10cr);
|
|
+# ifndef OPENSSL_NO_CMS
|
|
+OSSL_CRMF_ENCRYPTEDKEY *OSSL_CRMF_ENCRYPTEDKEY_init_envdata(CMS_EnvelopedData *envdata);
|
|
+# endif
|
|
|
|
# ifdef __cplusplus
|
|
}
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h
|
|
index ae7e30a26e..bba69ec2e1 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h
|
|
@@ -391,6 +391,9 @@ void OPENSSL_cleanse(void *ptr, size_t len);
|
|
# define CRYPTO_MEM_CHECK_ENABLE 0x2 /* Control and mode bit */
|
|
# define CRYPTO_MEM_CHECK_DISABLE 0x3 /* Control only */
|
|
|
|
+/* max allowed length for value of OPENSSL_MALLOC_FAILURES env var. */
|
|
+# define CRYPTO_MEM_CHECK_MAX_FS 256
|
|
+
|
|
void CRYPTO_get_alloc_counts(int *mcount, int *rcount, int *fcount);
|
|
# ifndef OPENSSL_NO_DEPRECATED_3_0
|
|
# define OPENSSL_mem_debug_push(info) \
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h
|
|
index cf4bdbcea4..03ed187898 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h
|
|
@@ -28,7 +28,7 @@ extern "C" {
|
|
* These macros express version number MAJOR.MINOR.PATCH exactly
|
|
*/
|
|
# define OPENSSL_VERSION_MAJOR 3
|
|
-# define OPENSSL_VERSION_MINOR 4
|
|
+# define OPENSSL_VERSION_MINOR 5
|
|
# define OPENSSL_VERSION_PATCH 1
|
|
|
|
/*
|
|
@@ -39,7 +39,7 @@ extern "C" {
|
|
*/
|
|
|
|
/* Could be: #define OPENSSL_VERSION_PRE_RELEASE "-alpha.1" */
|
|
-# define OPENSSL_VERSION_PRE_RELEASE ""
|
|
+# define OPENSSL_VERSION_PRE_RELEASE "-dev"
|
|
/* Could be: #define OPENSSL_VERSION_BUILD_METADATA "+fips" */
|
|
/* Could be: #define OPENSSL_VERSION_BUILD_METADATA "+vendor.1" */
|
|
# define OPENSSL_VERSION_BUILD_METADATA ""
|
|
@@ -74,21 +74,21 @@ extern "C" {
|
|
* longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and
|
|
* OPENSSL_VERSION_BUILD_METADATA_STR appended.
|
|
*/
|
|
-# define OPENSSL_VERSION_STR "3.4.1"
|
|
-# define OPENSSL_FULL_VERSION_STR "3.4.1"
|
|
+# define OPENSSL_VERSION_STR "3.5.1"
|
|
+# define OPENSSL_FULL_VERSION_STR "3.5.1-dev"
|
|
|
|
/*
|
|
* SECTION 3: ADDITIONAL METADATA
|
|
*
|
|
* These strings are defined separately to allow them to be parsable.
|
|
*/
|
|
-# define OPENSSL_RELEASE_DATE "11 Feb 2025"
|
|
+# define OPENSSL_RELEASE_DATE ""
|
|
|
|
/*
|
|
* SECTION 4: BACKWARD COMPATIBILITY
|
|
*/
|
|
|
|
-# define OPENSSL_VERSION_TEXT "OpenSSL 3.4.1 11 Feb 2025"
|
|
+# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.1-dev "
|
|
|
|
/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */
|
|
# ifdef OPENSSL_VERSION_PRE_RELEASE
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h
|
|
index 9741f3a18a..4701ff69d5 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h
|
|
@@ -2,7 +2,7 @@
|
|
* WARNING: do not edit!
|
|
* Generated by Makefile from include/openssl/ssl.h.in
|
|
*
|
|
- * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
* Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
|
|
* Copyright 2005 Nokia. All rights reserved.
|
|
*
|
|
@@ -409,7 +409,7 @@ typedef int (*SSL_async_callback_fn)(SSL *s, void *arg);
|
|
*/
|
|
# define SSL_OP_CIPHER_SERVER_PREFERENCE SSL_OP_BIT(22)
|
|
/*
|
|
- * If set, a server will allow a client to issue a SSLv3.0 version
|
|
+ * If set, a server will allow a client to issue an SSLv3.0 version
|
|
* number as latest version supported in the premaster secret, even when
|
|
* TLSv1.0 (version 3.1) was announced in the client hello. Normally
|
|
* this is forbidden to prevent version rollback attacks.
|
|
@@ -1383,6 +1383,9 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
|
|
# define SSL_CTRL_SET_RETRY_VERIFY 136
|
|
# define SSL_CTRL_GET_VERIFY_CERT_STORE 137
|
|
# define SSL_CTRL_GET_CHAIN_CERT_STORE 138
|
|
+# define SSL_CTRL_GET0_IMPLEMENTED_GROUPS 139
|
|
+# define SSL_CTRL_GET_SIGNATURE_NAME 140
|
|
+# define SSL_CTRL_GET_PEER_SIGNATURE_NAME 141
|
|
# define SSL_CERT_SET_FIRST 1
|
|
# define SSL_CERT_SET_NEXT 2
|
|
# define SSL_CERT_SET_SERVER 3
|
|
@@ -1491,6 +1494,9 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
|
|
SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS,glistlen,(int *)(glist))
|
|
# define SSL_CTX_set1_groups_list(ctx, s) \
|
|
SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS_LIST,0,(char *)(s))
|
|
+# define SSL_CTX_get0_implemented_groups(ctx, all, out) \
|
|
+ SSL_CTX_ctrl(ctx,SSL_CTRL_GET0_IMPLEMENTED_GROUPS, all, \
|
|
+ (STACK_OF(OPENSSL_CSTRING) *)(out))
|
|
# define SSL_set1_groups(s, glist, glistlen) \
|
|
SSL_ctrl(s,SSL_CTRL_SET_GROUPS,glistlen,(char *)(glist))
|
|
# define SSL_set1_groups_list(s, str) \
|
|
@@ -1522,8 +1528,12 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
|
|
(char *)(clist))
|
|
# define SSL_set1_client_certificate_types(s, clist, clistlen) \
|
|
SSL_ctrl(s,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen,(char *)(clist))
|
|
+# define SSL_get0_signature_name(s, str) \
|
|
+ SSL_ctrl(s,SSL_CTRL_GET_SIGNATURE_NAME,0,(1?(str):(const char **)NULL))
|
|
# define SSL_get_signature_nid(s, pn) \
|
|
SSL_ctrl(s,SSL_CTRL_GET_SIGNATURE_NID,0,pn)
|
|
+# define SSL_get0_peer_signature_name(s, str) \
|
|
+ SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NAME,0,(1?(str):(const char **)NULL))
|
|
# define SSL_get_peer_signature_nid(s, pn) \
|
|
SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NID,0,pn)
|
|
# define SSL_get_peer_tmp_key(s, pk) \
|
|
@@ -1944,6 +1954,11 @@ OSSL_DEPRECATEDIN_3_0 __owur char *SSL_get_srp_userinfo(SSL *s);
|
|
typedef int (*SSL_client_hello_cb_fn) (SSL *s, int *al, void *arg);
|
|
void SSL_CTX_set_client_hello_cb(SSL_CTX *c, SSL_client_hello_cb_fn cb,
|
|
void *arg);
|
|
+typedef int (*SSL_new_pending_conn_cb_fn) (SSL_CTX *ctx, SSL *new_ssl,
|
|
+ void *arg);
|
|
+void SSL_CTX_set_new_pending_conn_cb(SSL_CTX *c, SSL_new_pending_conn_cb_fn cb,
|
|
+ void *arg);
|
|
+
|
|
int SSL_client_hello_isv2(SSL *s);
|
|
unsigned int SSL_client_hello_get0_legacy_version(SSL *s);
|
|
size_t SSL_client_hello_get0_random(SSL *s, const unsigned char **out);
|
|
@@ -2339,6 +2354,31 @@ __owur int SSL_set1_initial_peer_addr(SSL *s, const BIO_ADDR *peer_addr);
|
|
__owur SSL *SSL_get0_connection(SSL *s);
|
|
__owur int SSL_is_connection(SSL *s);
|
|
|
|
+__owur int SSL_is_listener(SSL *ssl);
|
|
+__owur SSL *SSL_get0_listener(SSL *s);
|
|
+#define SSL_LISTENER_FLAG_NO_VALIDATE (1UL << 1)
|
|
+__owur SSL *SSL_new_listener(SSL_CTX *ctx, uint64_t flags);
|
|
+__owur SSL *SSL_new_listener_from(SSL *ssl, uint64_t flags);
|
|
+__owur SSL *SSL_new_from_listener(SSL *ssl, uint64_t flags);
|
|
+#define SSL_ACCEPT_CONNECTION_NO_BLOCK (1UL << 0)
|
|
+__owur SSL *SSL_accept_connection(SSL *ssl, uint64_t flags);
|
|
+__owur size_t SSL_get_accept_connection_queue_len(SSL *ssl);
|
|
+__owur int SSL_listen(SSL *ssl);
|
|
+
|
|
+__owur int SSL_is_domain(SSL *s);
|
|
+__owur SSL *SSL_get0_domain(SSL *s);
|
|
+__owur SSL *SSL_new_domain(SSL_CTX *ctx, uint64_t flags);
|
|
+
|
|
+#define SSL_DOMAIN_FLAG_SINGLE_THREAD (1U << 0)
|
|
+#define SSL_DOMAIN_FLAG_MULTI_THREAD (1U << 1)
|
|
+#define SSL_DOMAIN_FLAG_THREAD_ASSISTED (1U << 2)
|
|
+#define SSL_DOMAIN_FLAG_BLOCKING (1U << 3)
|
|
+#define SSL_DOMAIN_FLAG_LEGACY_BLOCKING (1U << 4)
|
|
+
|
|
+__owur int SSL_CTX_set_domain_flags(SSL_CTX *ctx, uint64_t domain_flags);
|
|
+__owur int SSL_CTX_get_domain_flags(const SSL_CTX *ctx, uint64_t *domain_flags);
|
|
+__owur int SSL_get_domain_flags(const SSL *ssl, uint64_t *domain_flags);
|
|
+
|
|
#define SSL_STREAM_TYPE_NONE 0
|
|
#define SSL_STREAM_TYPE_READ (1U << 0)
|
|
#define SSL_STREAM_TYPE_WRITE (1U << 1)
|
|
@@ -2872,6 +2912,21 @@ __owur int SSL_get0_server_cert_type(const SSL *s, unsigned char **t, size_t *le
|
|
__owur int SSL_CTX_get0_client_cert_type(const SSL_CTX *ctx, unsigned char **t, size_t *len);
|
|
__owur int SSL_CTX_get0_server_cert_type(const SSL_CTX *s, unsigned char **t, size_t *len);
|
|
|
|
+/*
|
|
+ * Protection level. For <= TLSv1.2 only "NONE" and "APPLICATION" are used.
|
|
+ */
|
|
+# define OSSL_RECORD_PROTECTION_LEVEL_NONE 0
|
|
+# define OSSL_RECORD_PROTECTION_LEVEL_EARLY 1
|
|
+# define OSSL_RECORD_PROTECTION_LEVEL_HANDSHAKE 2
|
|
+# define OSSL_RECORD_PROTECTION_LEVEL_APPLICATION 3
|
|
+
|
|
+int SSL_set_quic_tls_cbs(SSL *s, const OSSL_DISPATCH *qtdis, void *arg);
|
|
+int SSL_set_quic_tls_transport_params(SSL *s,
|
|
+ const unsigned char *params,
|
|
+ size_t params_len);
|
|
+
|
|
+int SSL_set_quic_tls_early_data_enabled(SSL *s, int enabled);
|
|
+
|
|
# ifdef __cplusplus
|
|
}
|
|
# endif
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h
|
|
index 86babde0d8..4eaac6f955 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h
|
|
@@ -260,4 +260,35 @@ DECLARE_ASN1_FUNCTIONS(OSSL_TARGET)
|
|
DECLARE_ASN1_FUNCTIONS(OSSL_TARGETS)
|
|
DECLARE_ASN1_FUNCTIONS(OSSL_TARGETING_INFORMATION)
|
|
|
|
+typedef STACK_OF(OSSL_ISSUER_SERIAL) OSSL_AUTHORITY_ATTRIBUTE_ID_SYNTAX;
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_AUTHORITY_ATTRIBUTE_ID_SYNTAX)
|
|
+
|
|
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ISSUER_SERIAL, OSSL_ISSUER_SERIAL, OSSL_ISSUER_SERIAL)
|
|
+#define sk_OSSL_ISSUER_SERIAL_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk))
|
|
+#define sk_OSSL_ISSUER_SERIAL_value(sk, idx) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_value(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk), (idx)))
|
|
+#define sk_OSSL_ISSUER_SERIAL_new(cmp) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new(ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp)))
|
|
+#define sk_OSSL_ISSUER_SERIAL_new_null() ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new_null())
|
|
+#define sk_OSSL_ISSUER_SERIAL_new_reserve(cmp, n) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp), (n)))
|
|
+#define sk_OSSL_ISSUER_SERIAL_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (n))
|
|
+#define sk_OSSL_ISSUER_SERIAL_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk))
|
|
+#define sk_OSSL_ISSUER_SERIAL_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk))
|
|
+#define sk_OSSL_ISSUER_SERIAL_delete(sk, i) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_delete(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (i)))
|
|
+#define sk_OSSL_ISSUER_SERIAL_delete_ptr(sk, ptr) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)))
|
|
+#define sk_OSSL_ISSUER_SERIAL_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))
|
|
+#define sk_OSSL_ISSUER_SERIAL_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))
|
|
+#define sk_OSSL_ISSUER_SERIAL_pop(sk) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_pop(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk)))
|
|
+#define sk_OSSL_ISSUER_SERIAL_shift(sk) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_shift(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk)))
|
|
+#define sk_OSSL_ISSUER_SERIAL_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk),ossl_check_OSSL_ISSUER_SERIAL_freefunc_type(freefunc))
|
|
+#define sk_OSSL_ISSUER_SERIAL_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr), (idx))
|
|
+#define sk_OSSL_ISSUER_SERIAL_set(sk, idx, ptr) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_set(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (idx), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)))
|
|
+#define sk_OSSL_ISSUER_SERIAL_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))
|
|
+#define sk_OSSL_ISSUER_SERIAL_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))
|
|
+#define sk_OSSL_ISSUER_SERIAL_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr), pnum)
|
|
+#define sk_OSSL_ISSUER_SERIAL_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk))
|
|
+#define sk_OSSL_ISSUER_SERIAL_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk))
|
|
+#define sk_OSSL_ISSUER_SERIAL_dup(sk) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk)))
|
|
+#define sk_OSSL_ISSUER_SERIAL_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_copyfunc_type(copyfunc), ossl_check_OSSL_ISSUER_SERIAL_freefunc_type(freefunc)))
|
|
+#define sk_OSSL_ISSUER_SERIAL_set_cmp_func(sk, cmp) ((sk_OSSL_ISSUER_SERIAL_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp)))
|
|
+
|
|
+
|
|
#endif
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h
|
|
index 68b20ee5f2..de63bf0184 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h
|
|
@@ -2,7 +2,7 @@
|
|
* WARNING: do not edit!
|
|
* Generated by Makefile from include/openssl/x509_vfy.h.in
|
|
*
|
|
- * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
*
|
|
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
* this file except in compliance with the License. You can obtain a copy
|
|
@@ -812,6 +812,7 @@ int X509_VERIFY_PARAM_clear_flags(X509_VERIFY_PARAM *param,
|
|
unsigned long flags);
|
|
unsigned long X509_VERIFY_PARAM_get_flags(const X509_VERIFY_PARAM *param);
|
|
int X509_VERIFY_PARAM_set_purpose(X509_VERIFY_PARAM *param, int purpose);
|
|
+int X509_VERIFY_PARAM_get_purpose(const X509_VERIFY_PARAM *param);
|
|
int X509_VERIFY_PARAM_set_trust(X509_VERIFY_PARAM *param, int trust);
|
|
void X509_VERIFY_PARAM_set_depth(X509_VERIFY_PARAM *param, int depth);
|
|
void X509_VERIFY_PARAM_set_auth_level(X509_VERIFY_PARAM *param, int auth_level);
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h
|
|
index 5fd66fbda3..718157ebfa 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h
|
|
@@ -2,7 +2,7 @@
|
|
* WARNING: do not edit!
|
|
* Generated by Makefile from include/openssl/x509v3.h.in
|
|
*
|
|
- * Copyright 1999-2024 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ * Copyright 1999-2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
*
|
|
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
* this file except in compliance with the License. You can obtain a copy
|
|
@@ -741,7 +741,7 @@ SKM_DEFINE_STACK_OF_INTERNAL(X509_PURPOSE, X509_PURPOSE, X509_PURPOSE)
|
|
#define sk_X509_PURPOSE_set_cmp_func(sk, cmp) ((sk_X509_PURPOSE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_compfunc_type(cmp)))
|
|
|
|
|
|
-
|
|
+# define X509_PURPOSE_DEFAULT_ANY 0
|
|
# define X509_PURPOSE_SSL_CLIENT 1
|
|
# define X509_PURPOSE_SSL_SERVER 2
|
|
# define X509_PURPOSE_NS_SSL_SERVER 3
|
|
@@ -990,7 +990,6 @@ int X509V3_extensions_print(BIO *out, const char *title,
|
|
int X509_check_ca(X509 *x);
|
|
int X509_check_purpose(X509 *x, int id, int ca);
|
|
int X509_supported_extension(X509_EXTENSION *ex);
|
|
-int X509_PURPOSE_set(int *p, int purpose);
|
|
int X509_check_issued(X509 *issuer, X509 *subject);
|
|
int X509_check_akid(const X509 *issuer, const AUTHORITY_KEYID *akid);
|
|
void X509_set_proxy_flag(X509 *x);
|
|
@@ -1006,22 +1005,26 @@ const GENERAL_NAMES *X509_get0_authority_issuer(X509 *x);
|
|
const ASN1_INTEGER *X509_get0_authority_serial(X509 *x);
|
|
|
|
int X509_PURPOSE_get_count(void);
|
|
-X509_PURPOSE *X509_PURPOSE_get0(int idx);
|
|
+int X509_PURPOSE_get_unused_id(OSSL_LIB_CTX *libctx);
|
|
int X509_PURPOSE_get_by_sname(const char *sname);
|
|
int X509_PURPOSE_get_by_id(int id);
|
|
int X509_PURPOSE_add(int id, int trust, int flags,
|
|
int (*ck) (const X509_PURPOSE *, const X509 *, int),
|
|
const char *name, const char *sname, void *arg);
|
|
+void X509_PURPOSE_cleanup(void);
|
|
+
|
|
+X509_PURPOSE *X509_PURPOSE_get0(int idx);
|
|
+int X509_PURPOSE_get_id(const X509_PURPOSE *);
|
|
char *X509_PURPOSE_get0_name(const X509_PURPOSE *xp);
|
|
char *X509_PURPOSE_get0_sname(const X509_PURPOSE *xp);
|
|
int X509_PURPOSE_get_trust(const X509_PURPOSE *xp);
|
|
-void X509_PURPOSE_cleanup(void);
|
|
-int X509_PURPOSE_get_id(const X509_PURPOSE *);
|
|
+int X509_PURPOSE_set(int *p, int purpose);
|
|
|
|
STACK_OF(OPENSSL_STRING) *X509_get1_email(X509 *x);
|
|
STACK_OF(OPENSSL_STRING) *X509_REQ_get1_email(X509_REQ *x);
|
|
void X509_email_free(STACK_OF(OPENSSL_STRING) *sk);
|
|
STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(X509 *x);
|
|
+
|
|
/* Flags for X509_check_* functions */
|
|
|
|
/*
|
|
@@ -1494,6 +1497,471 @@ SKM_DEFINE_STACK_OF_INTERNAL(USERNOTICE, USERNOTICE, USERNOTICE)
|
|
#define sk_USERNOTICE_set_cmp_func(sk, cmp) ((sk_USERNOTICE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_compfunc_type(cmp)))
|
|
|
|
|
|
+typedef struct OSSL_ROLE_SPEC_CERT_ID_st {
|
|
+ GENERAL_NAME *roleName;
|
|
+ GENERAL_NAME *roleCertIssuer;
|
|
+ ASN1_INTEGER *roleCertSerialNumber;
|
|
+ GENERAL_NAMES *roleCertLocator;
|
|
+} OSSL_ROLE_SPEC_CERT_ID;
|
|
+
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_ROLE_SPEC_CERT_ID)
|
|
+
|
|
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ROLE_SPEC_CERT_ID, OSSL_ROLE_SPEC_CERT_ID, OSSL_ROLE_SPEC_CERT_ID)
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_value(sk, idx) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_value(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (idx)))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_new(cmp) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new(ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp)))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_new_null() ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new_null())
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_new_reserve(cmp, n) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp), (n)))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (n))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_delete(sk, i) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_delete(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (i)))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_delete_ptr(sk, ptr) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_pop(sk) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_pop(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_shift(sk) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_shift(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk),ossl_check_OSSL_ROLE_SPEC_CERT_ID_freefunc_type(freefunc))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr), (idx))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_set(sk, idx, ptr) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_set(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (idx), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr), pnum)
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_dup(sk) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_copyfunc_type(copyfunc), ossl_check_OSSL_ROLE_SPEC_CERT_ID_freefunc_type(freefunc)))
|
|
+#define sk_OSSL_ROLE_SPEC_CERT_ID_set_cmp_func(sk, cmp) ((sk_OSSL_ROLE_SPEC_CERT_ID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp)))
|
|
+
|
|
+
|
|
+typedef STACK_OF(OSSL_ROLE_SPEC_CERT_ID) OSSL_ROLE_SPEC_CERT_ID_SYNTAX;
|
|
+
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_ROLE_SPEC_CERT_ID_SYNTAX)
|
|
+typedef struct OSSL_HASH_st {
|
|
+ X509_ALGOR *algorithmIdentifier;
|
|
+ ASN1_BIT_STRING *hashValue;
|
|
+} OSSL_HASH;
|
|
+
|
|
+typedef struct OSSL_INFO_SYNTAX_POINTER_st {
|
|
+ GENERAL_NAMES *name;
|
|
+ OSSL_HASH *hash;
|
|
+} OSSL_INFO_SYNTAX_POINTER;
|
|
+
|
|
+# define OSSL_INFO_SYNTAX_TYPE_CONTENT 0
|
|
+# define OSSL_INFO_SYNTAX_TYPE_POINTER 1
|
|
+
|
|
+typedef struct OSSL_INFO_SYNTAX_st {
|
|
+ int type;
|
|
+ union {
|
|
+ ASN1_STRING *content;
|
|
+ OSSL_INFO_SYNTAX_POINTER *pointer;
|
|
+ } choice;
|
|
+} OSSL_INFO_SYNTAX;
|
|
+
|
|
+typedef struct OSSL_PRIVILEGE_POLICY_ID_st {
|
|
+ ASN1_OBJECT *privilegePolicy;
|
|
+ OSSL_INFO_SYNTAX *privPolSyntax;
|
|
+} OSSL_PRIVILEGE_POLICY_ID;
|
|
+
|
|
+typedef struct OSSL_ATTRIBUTE_DESCRIPTOR_st {
|
|
+ ASN1_OBJECT *identifier;
|
|
+ ASN1_STRING *attributeSyntax;
|
|
+ ASN1_UTF8STRING *name;
|
|
+ ASN1_UTF8STRING *description;
|
|
+ OSSL_PRIVILEGE_POLICY_ID *dominationRule;
|
|
+} OSSL_ATTRIBUTE_DESCRIPTOR;
|
|
+
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_HASH)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_INFO_SYNTAX)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_INFO_SYNTAX_POINTER)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_PRIVILEGE_POLICY_ID)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_DESCRIPTOR)
|
|
+
|
|
+typedef struct OSSL_TIME_SPEC_ABSOLUTE_st {
|
|
+ ASN1_GENERALIZEDTIME *startTime;
|
|
+ ASN1_GENERALIZEDTIME *endTime;
|
|
+} OSSL_TIME_SPEC_ABSOLUTE;
|
|
+
|
|
+typedef struct OSSL_DAY_TIME_st {
|
|
+ ASN1_INTEGER *hour;
|
|
+ ASN1_INTEGER *minute;
|
|
+ ASN1_INTEGER *second;
|
|
+} OSSL_DAY_TIME;
|
|
+
|
|
+typedef struct OSSL_DAY_TIME_BAND_st {
|
|
+ OSSL_DAY_TIME *startDayTime;
|
|
+ OSSL_DAY_TIME *endDayTime;
|
|
+} OSSL_DAY_TIME_BAND;
|
|
+
|
|
+# define OSSL_NAMED_DAY_TYPE_INT 0
|
|
+# define OSSL_NAMED_DAY_TYPE_BIT 1
|
|
+# define OSSL_NAMED_DAY_INT_SUN 1
|
|
+# define OSSL_NAMED_DAY_INT_MON 2
|
|
+# define OSSL_NAMED_DAY_INT_TUE 3
|
|
+# define OSSL_NAMED_DAY_INT_WED 4
|
|
+# define OSSL_NAMED_DAY_INT_THU 5
|
|
+# define OSSL_NAMED_DAY_INT_FRI 6
|
|
+# define OSSL_NAMED_DAY_INT_SAT 7
|
|
+# define OSSL_NAMED_DAY_BIT_SUN 0
|
|
+# define OSSL_NAMED_DAY_BIT_MON 1
|
|
+# define OSSL_NAMED_DAY_BIT_TUE 2
|
|
+# define OSSL_NAMED_DAY_BIT_WED 3
|
|
+# define OSSL_NAMED_DAY_BIT_THU 4
|
|
+# define OSSL_NAMED_DAY_BIT_FRI 5
|
|
+# define OSSL_NAMED_DAY_BIT_SAT 6
|
|
+
|
|
+typedef struct OSSL_NAMED_DAY_st {
|
|
+ int type;
|
|
+ union {
|
|
+ ASN1_INTEGER *intNamedDays;
|
|
+ ASN1_BIT_STRING *bitNamedDays;
|
|
+ } choice;
|
|
+} OSSL_NAMED_DAY;
|
|
+
|
|
+# define OSSL_TIME_SPEC_X_DAY_OF_FIRST 0
|
|
+# define OSSL_TIME_SPEC_X_DAY_OF_SECOND 1
|
|
+# define OSSL_TIME_SPEC_X_DAY_OF_THIRD 2
|
|
+# define OSSL_TIME_SPEC_X_DAY_OF_FOURTH 3
|
|
+# define OSSL_TIME_SPEC_X_DAY_OF_FIFTH 4
|
|
+
|
|
+typedef struct OSSL_TIME_SPEC_X_DAY_OF_st {
|
|
+ int type;
|
|
+ union {
|
|
+ OSSL_NAMED_DAY *first;
|
|
+ OSSL_NAMED_DAY *second;
|
|
+ OSSL_NAMED_DAY *third;
|
|
+ OSSL_NAMED_DAY *fourth;
|
|
+ OSSL_NAMED_DAY *fifth;
|
|
+ } choice;
|
|
+} OSSL_TIME_SPEC_X_DAY_OF;
|
|
+
|
|
+# define OSSL_TIME_SPEC_DAY_TYPE_INT 0
|
|
+# define OSSL_TIME_SPEC_DAY_TYPE_BIT 1
|
|
+# define OSSL_TIME_SPEC_DAY_TYPE_DAY_OF 2
|
|
+# define OSSL_TIME_SPEC_DAY_BIT_SUN 0
|
|
+# define OSSL_TIME_SPEC_DAY_BIT_MON 1
|
|
+# define OSSL_TIME_SPEC_DAY_BIT_TUE 2
|
|
+# define OSSL_TIME_SPEC_DAY_BIT_WED 3
|
|
+# define OSSL_TIME_SPEC_DAY_BIT_THU 4
|
|
+# define OSSL_TIME_SPEC_DAY_BIT_FRI 5
|
|
+# define OSSL_TIME_SPEC_DAY_BIT_SAT 6
|
|
+# define OSSL_TIME_SPEC_DAY_INT_SUN 1
|
|
+# define OSSL_TIME_SPEC_DAY_INT_MON 2
|
|
+# define OSSL_TIME_SPEC_DAY_INT_TUE 3
|
|
+# define OSSL_TIME_SPEC_DAY_INT_WED 4
|
|
+# define OSSL_TIME_SPEC_DAY_INT_THU 5
|
|
+# define OSSL_TIME_SPEC_DAY_INT_FRI 6
|
|
+# define OSSL_TIME_SPEC_DAY_INT_SAT 7
|
|
+
|
|
+typedef struct OSSL_TIME_SPEC_DAY_st {
|
|
+ int type;
|
|
+ union {
|
|
+ STACK_OF(ASN1_INTEGER) *intDay;
|
|
+ ASN1_BIT_STRING *bitDay;
|
|
+ OSSL_TIME_SPEC_X_DAY_OF *dayOf;
|
|
+ } choice;
|
|
+} OSSL_TIME_SPEC_DAY;
|
|
+
|
|
+# define OSSL_TIME_SPEC_WEEKS_TYPE_ALL 0
|
|
+# define OSSL_TIME_SPEC_WEEKS_TYPE_INT 1
|
|
+# define OSSL_TIME_SPEC_WEEKS_TYPE_BIT 2
|
|
+# define OSSL_TIME_SPEC_BIT_WEEKS_1 0
|
|
+# define OSSL_TIME_SPEC_BIT_WEEKS_2 1
|
|
+# define OSSL_TIME_SPEC_BIT_WEEKS_3 2
|
|
+# define OSSL_TIME_SPEC_BIT_WEEKS_4 3
|
|
+# define OSSL_TIME_SPEC_BIT_WEEKS_5 4
|
|
+
|
|
+typedef struct OSSL_TIME_SPEC_WEEKS_st {
|
|
+ int type;
|
|
+ union {
|
|
+ ASN1_NULL *allWeeks;
|
|
+ STACK_OF(ASN1_INTEGER) *intWeek;
|
|
+ ASN1_BIT_STRING *bitWeek;
|
|
+ } choice;
|
|
+} OSSL_TIME_SPEC_WEEKS;
|
|
+
|
|
+# define OSSL_TIME_SPEC_MONTH_TYPE_ALL 0
|
|
+# define OSSL_TIME_SPEC_MONTH_TYPE_INT 1
|
|
+# define OSSL_TIME_SPEC_MONTH_TYPE_BIT 2
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_JAN 1
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_FEB 2
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_MAR 3
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_APR 4
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_MAY 5
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_JUN 6
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_JUL 7
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_AUG 8
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_SEP 9
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_OCT 10
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_NOV 11
|
|
+# define OSSL_TIME_SPEC_INT_MONTH_DEC 12
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_JAN 0
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_FEB 1
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_MAR 2
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_APR 3
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_MAY 4
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_JUN 5
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_JUL 6
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_AUG 7
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_SEP 8
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_OCT 9
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_NOV 10
|
|
+# define OSSL_TIME_SPEC_BIT_MONTH_DEC 11
|
|
+
|
|
+typedef struct OSSL_TIME_SPEC_MONTH_st {
|
|
+ int type;
|
|
+ union {
|
|
+ ASN1_NULL *allMonths;
|
|
+ STACK_OF(ASN1_INTEGER) *intMonth;
|
|
+ ASN1_BIT_STRING *bitMonth;
|
|
+ } choice;
|
|
+} OSSL_TIME_SPEC_MONTH;
|
|
+
|
|
+typedef struct OSSL_TIME_PERIOD_st {
|
|
+ STACK_OF(OSSL_DAY_TIME_BAND) *timesOfDay;
|
|
+ OSSL_TIME_SPEC_DAY *days;
|
|
+ OSSL_TIME_SPEC_WEEKS *weeks;
|
|
+ OSSL_TIME_SPEC_MONTH *months;
|
|
+ STACK_OF(ASN1_INTEGER) *years;
|
|
+} OSSL_TIME_PERIOD;
|
|
+
|
|
+# define OSSL_TIME_SPEC_TIME_TYPE_ABSOLUTE 0
|
|
+# define OSSL_TIME_SPEC_TIME_TYPE_PERIODIC 1
|
|
+
|
|
+typedef struct OSSL_TIME_SPEC_TIME_st {
|
|
+ int type;
|
|
+ union {
|
|
+ OSSL_TIME_SPEC_ABSOLUTE *absolute;
|
|
+ STACK_OF(OSSL_TIME_PERIOD) *periodic;
|
|
+ } choice;
|
|
+} OSSL_TIME_SPEC_TIME;
|
|
+
|
|
+typedef struct OSSL_TIME_SPEC_st {
|
|
+ OSSL_TIME_SPEC_TIME *time;
|
|
+ ASN1_BOOLEAN notThisTime;
|
|
+ ASN1_INTEGER *timeZone;
|
|
+} OSSL_TIME_SPEC;
|
|
+
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_DAY_TIME)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_DAY_TIME_BAND)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_DAY)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_WEEKS)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_MONTH)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_NAMED_DAY)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_X_DAY_OF)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_ABSOLUTE)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_TIME)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_TIME_PERIOD)
|
|
+
|
|
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_TIME_PERIOD, OSSL_TIME_PERIOD, OSSL_TIME_PERIOD)
|
|
+#define sk_OSSL_TIME_PERIOD_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk))
|
|
+#define sk_OSSL_TIME_PERIOD_value(sk, idx) ((OSSL_TIME_PERIOD *)OPENSSL_sk_value(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk), (idx)))
|
|
+#define sk_OSSL_TIME_PERIOD_new(cmp) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new(ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp)))
|
|
+#define sk_OSSL_TIME_PERIOD_new_null() ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new_null())
|
|
+#define sk_OSSL_TIME_PERIOD_new_reserve(cmp, n) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp), (n)))
|
|
+#define sk_OSSL_TIME_PERIOD_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (n))
|
|
+#define sk_OSSL_TIME_PERIOD_free(sk) OPENSSL_sk_free(ossl_check_OSSL_TIME_PERIOD_sk_type(sk))
|
|
+#define sk_OSSL_TIME_PERIOD_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_TIME_PERIOD_sk_type(sk))
|
|
+#define sk_OSSL_TIME_PERIOD_delete(sk, i) ((OSSL_TIME_PERIOD *)OPENSSL_sk_delete(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (i)))
|
|
+#define sk_OSSL_TIME_PERIOD_delete_ptr(sk, ptr) ((OSSL_TIME_PERIOD *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr)))
|
|
+#define sk_OSSL_TIME_PERIOD_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr))
|
|
+#define sk_OSSL_TIME_PERIOD_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr))
|
|
+#define sk_OSSL_TIME_PERIOD_pop(sk) ((OSSL_TIME_PERIOD *)OPENSSL_sk_pop(ossl_check_OSSL_TIME_PERIOD_sk_type(sk)))
|
|
+#define sk_OSSL_TIME_PERIOD_shift(sk) ((OSSL_TIME_PERIOD *)OPENSSL_sk_shift(ossl_check_OSSL_TIME_PERIOD_sk_type(sk)))
|
|
+#define sk_OSSL_TIME_PERIOD_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_TIME_PERIOD_sk_type(sk),ossl_check_OSSL_TIME_PERIOD_freefunc_type(freefunc))
|
|
+#define sk_OSSL_TIME_PERIOD_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr), (idx))
|
|
+#define sk_OSSL_TIME_PERIOD_set(sk, idx, ptr) ((OSSL_TIME_PERIOD *)OPENSSL_sk_set(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (idx), ossl_check_OSSL_TIME_PERIOD_type(ptr)))
|
|
+#define sk_OSSL_TIME_PERIOD_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr))
|
|
+#define sk_OSSL_TIME_PERIOD_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr))
|
|
+#define sk_OSSL_TIME_PERIOD_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr), pnum)
|
|
+#define sk_OSSL_TIME_PERIOD_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_TIME_PERIOD_sk_type(sk))
|
|
+#define sk_OSSL_TIME_PERIOD_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk))
|
|
+#define sk_OSSL_TIME_PERIOD_dup(sk) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_dup(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk)))
|
|
+#define sk_OSSL_TIME_PERIOD_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_copyfunc_type(copyfunc), ossl_check_OSSL_TIME_PERIOD_freefunc_type(freefunc)))
|
|
+#define sk_OSSL_TIME_PERIOD_set_cmp_func(sk, cmp) ((sk_OSSL_TIME_PERIOD_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp)))
|
|
+
|
|
+
|
|
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_DAY_TIME_BAND, OSSL_DAY_TIME_BAND, OSSL_DAY_TIME_BAND)
|
|
+#define sk_OSSL_DAY_TIME_BAND_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk))
|
|
+#define sk_OSSL_DAY_TIME_BAND_value(sk, idx) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_value(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk), (idx)))
|
|
+#define sk_OSSL_DAY_TIME_BAND_new(cmp) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new(ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp)))
|
|
+#define sk_OSSL_DAY_TIME_BAND_new_null() ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new_null())
|
|
+#define sk_OSSL_DAY_TIME_BAND_new_reserve(cmp, n) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp), (n)))
|
|
+#define sk_OSSL_DAY_TIME_BAND_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (n))
|
|
+#define sk_OSSL_DAY_TIME_BAND_free(sk) OPENSSL_sk_free(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk))
|
|
+#define sk_OSSL_DAY_TIME_BAND_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk))
|
|
+#define sk_OSSL_DAY_TIME_BAND_delete(sk, i) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_delete(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (i)))
|
|
+#define sk_OSSL_DAY_TIME_BAND_delete_ptr(sk, ptr) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)))
|
|
+#define sk_OSSL_DAY_TIME_BAND_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))
|
|
+#define sk_OSSL_DAY_TIME_BAND_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))
|
|
+#define sk_OSSL_DAY_TIME_BAND_pop(sk) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_pop(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk)))
|
|
+#define sk_OSSL_DAY_TIME_BAND_shift(sk) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_shift(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk)))
|
|
+#define sk_OSSL_DAY_TIME_BAND_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk),ossl_check_OSSL_DAY_TIME_BAND_freefunc_type(freefunc))
|
|
+#define sk_OSSL_DAY_TIME_BAND_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr), (idx))
|
|
+#define sk_OSSL_DAY_TIME_BAND_set(sk, idx, ptr) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_set(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (idx), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)))
|
|
+#define sk_OSSL_DAY_TIME_BAND_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))
|
|
+#define sk_OSSL_DAY_TIME_BAND_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))
|
|
+#define sk_OSSL_DAY_TIME_BAND_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr), pnum)
|
|
+#define sk_OSSL_DAY_TIME_BAND_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk))
|
|
+#define sk_OSSL_DAY_TIME_BAND_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk))
|
|
+#define sk_OSSL_DAY_TIME_BAND_dup(sk) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_dup(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk)))
|
|
+#define sk_OSSL_DAY_TIME_BAND_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_copyfunc_type(copyfunc), ossl_check_OSSL_DAY_TIME_BAND_freefunc_type(freefunc)))
|
|
+#define sk_OSSL_DAY_TIME_BAND_set_cmp_func(sk, cmp) ((sk_OSSL_DAY_TIME_BAND_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp)))
|
|
+
|
|
+
|
|
+/* Attribute Type and Value */
|
|
+typedef struct atav_st {
|
|
+ ASN1_OBJECT *type;
|
|
+ ASN1_TYPE *value;
|
|
+} OSSL_ATAV;
|
|
+
|
|
+typedef struct ATTRIBUTE_TYPE_MAPPING_st {
|
|
+ ASN1_OBJECT *local;
|
|
+ ASN1_OBJECT *remote;
|
|
+} OSSL_ATTRIBUTE_TYPE_MAPPING;
|
|
+
|
|
+typedef struct ATTRIBUTE_VALUE_MAPPING_st {
|
|
+ OSSL_ATAV *local;
|
|
+ OSSL_ATAV *remote;
|
|
+} OSSL_ATTRIBUTE_VALUE_MAPPING;
|
|
+
|
|
+# define OSSL_ATTR_MAP_TYPE 0
|
|
+# define OSSL_ATTR_MAP_VALUE 1
|
|
+
|
|
+typedef struct ATTRIBUTE_MAPPING_st {
|
|
+ int type;
|
|
+ union {
|
|
+ OSSL_ATTRIBUTE_TYPE_MAPPING *typeMappings;
|
|
+ OSSL_ATTRIBUTE_VALUE_MAPPING *typeValueMappings;
|
|
+ } choice;
|
|
+} OSSL_ATTRIBUTE_MAPPING;
|
|
+
|
|
+typedef STACK_OF(OSSL_ATTRIBUTE_MAPPING) OSSL_ATTRIBUTE_MAPPINGS;
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_ATAV)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_TYPE_MAPPING)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_VALUE_MAPPING)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_MAPPING)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_MAPPINGS)
|
|
+
|
|
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ATTRIBUTE_MAPPING, OSSL_ATTRIBUTE_MAPPING, OSSL_ATTRIBUTE_MAPPING)
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_value(sk, idx) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_value(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (idx)))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_new(cmp) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new(ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp)))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_new_null() ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new_null())
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_new_reserve(cmp, n) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp), (n)))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (n))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_delete(sk, i) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_delete(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (i)))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_delete_ptr(sk, ptr) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_pop(sk) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_pop(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_shift(sk) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_shift(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk),ossl_check_OSSL_ATTRIBUTE_MAPPING_freefunc_type(freefunc))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr), (idx))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_set(sk, idx, ptr) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_set(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (idx), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr), pnum)
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_dup(sk) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_copyfunc_type(copyfunc), ossl_check_OSSL_ATTRIBUTE_MAPPING_freefunc_type(freefunc)))
|
|
+#define sk_OSSL_ATTRIBUTE_MAPPING_set_cmp_func(sk, cmp) ((sk_OSSL_ATTRIBUTE_MAPPING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp)))
|
|
+
|
|
+
|
|
+# define OSSL_AAA_ATTRIBUTE_TYPE 0
|
|
+# define OSSL_AAA_ATTRIBUTE_VALUES 1
|
|
+
|
|
+typedef struct ALLOWED_ATTRIBUTES_CHOICE_st {
|
|
+ int type;
|
|
+ union {
|
|
+ ASN1_OBJECT *attributeType;
|
|
+ X509_ATTRIBUTE *attributeTypeandValues;
|
|
+ } choice;
|
|
+} OSSL_ALLOWED_ATTRIBUTES_CHOICE;
|
|
+
|
|
+typedef struct ALLOWED_ATTRIBUTES_ITEM_st {
|
|
+ STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *attributes;
|
|
+ GENERAL_NAME *holderDomain;
|
|
+} OSSL_ALLOWED_ATTRIBUTES_ITEM;
|
|
+
|
|
+typedef STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) OSSL_ALLOWED_ATTRIBUTES_SYNTAX;
|
|
+
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_CHOICE)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_ITEM)
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_SYNTAX)
|
|
+
|
|
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ALLOWED_ATTRIBUTES_CHOICE, OSSL_ALLOWED_ATTRIBUTES_CHOICE, OSSL_ALLOWED_ATTRIBUTES_CHOICE)
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_value(sk, idx) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_value(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (idx)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new(cmp) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new_null() ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new_null())
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new_reserve(cmp, n) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp), (n)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (n))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_delete(sk, i) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_delete(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (i)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_delete_ptr(sk, ptr) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_pop(sk) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_pop(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_shift(sk) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_shift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk),ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_freefunc_type(freefunc))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr), (idx))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_set(sk, idx, ptr) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_set(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (idx), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr), pnum)
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_dup(sk) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_copyfunc_type(copyfunc), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_freefunc_type(freefunc)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_set_cmp_func(sk, cmp) ((sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp)))
|
|
+
|
|
+
|
|
+SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ALLOWED_ATTRIBUTES_ITEM, OSSL_ALLOWED_ATTRIBUTES_ITEM, OSSL_ALLOWED_ATTRIBUTES_ITEM)
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_value(sk, idx) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_value(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (idx)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new(cmp) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new_null() ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new_null())
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new_reserve(cmp, n) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp), (n)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (n))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_delete(sk, i) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_delete(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (i)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_delete_ptr(sk, ptr) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_pop(sk) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_pop(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_shift(sk) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_shift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk),ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_freefunc_type(freefunc))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr), (idx))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_set(sk, idx, ptr) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_set(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (idx), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr), pnum)
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_dup(sk) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_copyfunc_type(copyfunc), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_freefunc_type(freefunc)))
|
|
+#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_set_cmp_func(sk, cmp) ((sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp)))
|
|
+
|
|
+
|
|
+typedef struct AA_DIST_POINT_st {
|
|
+ DIST_POINT_NAME *distpoint;
|
|
+ ASN1_BIT_STRING *reasons;
|
|
+ int dp_reasons;
|
|
+ ASN1_BOOLEAN indirectCRL;
|
|
+ ASN1_BOOLEAN containsUserAttributeCerts;
|
|
+ ASN1_BOOLEAN containsAACerts;
|
|
+ ASN1_BOOLEAN containsSOAPublicKeyCerts;
|
|
+} OSSL_AA_DIST_POINT;
|
|
+
|
|
+DECLARE_ASN1_FUNCTIONS(OSSL_AA_DIST_POINT)
|
|
+
|
|
# ifdef __cplusplus
|
|
}
|
|
# endif
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c
|
|
new file mode 100644
|
|
index 0000000000..d4c6cfc61e
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c
|
|
@@ -0,0 +1,37 @@
|
|
+/*
|
|
+ * WARNING: do not edit!
|
|
+ * Generated by Makefile from providers/common/der/der_ml_dsa_gen.c.in
|
|
+ *
|
|
+ * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ *
|
|
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
+ * this file except in compliance with the License. You can obtain a copy
|
|
+ * in the file LICENSE in the source distribution or at
|
|
+ * https://www.openssl.org/source/license.html
|
|
+ */
|
|
+
|
|
+#include "prov/der_ml_dsa.h"
|
|
+
|
|
+/* Well known OIDs precompiled */
|
|
+
|
|
+/*
|
|
+ * id-ml-dsa-44 OBJECT IDENTIFIER ::= { sigAlgs 17 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_ml_dsa_44[DER_OID_SZ_id_ml_dsa_44] = {
|
|
+ DER_OID_V_id_ml_dsa_44
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-ml-dsa-65 OBJECT IDENTIFIER ::= { sigAlgs 18 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_ml_dsa_65[DER_OID_SZ_id_ml_dsa_65] = {
|
|
+ DER_OID_V_id_ml_dsa_65
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-ml-dsa-87 OBJECT IDENTIFIER ::= { sigAlgs 19 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_ml_dsa_87[DER_OID_SZ_id_ml_dsa_87] = {
|
|
+ DER_OID_V_id_ml_dsa_87
|
|
+};
|
|
+
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c
|
|
new file mode 100644
|
|
index 0000000000..f9fb0bdc51
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c
|
|
@@ -0,0 +1,100 @@
|
|
+/*
|
|
+ * WARNING: do not edit!
|
|
+ * Generated by Makefile from providers/common/der/der_slh_dsa_gen.c.in
|
|
+ *
|
|
+ * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ *
|
|
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
+ * this file except in compliance with the License. You can obtain a copy
|
|
+ * in the file LICENSE in the source distribution or at
|
|
+ * https://www.openssl.org/source/license.html
|
|
+ */
|
|
+
|
|
+#include "prov/der_slh_dsa.h"
|
|
+
|
|
+/* Well known OIDs precompiled */
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-128s OBJECT IDENTIFIER ::= { sigAlgs 20 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_128s[DER_OID_SZ_id_slh_dsa_sha2_128s] = {
|
|
+ DER_OID_V_id_slh_dsa_sha2_128s
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-128f OBJECT IDENTIFIER ::= { sigAlgs 21 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_128f[DER_OID_SZ_id_slh_dsa_sha2_128f] = {
|
|
+ DER_OID_V_id_slh_dsa_sha2_128f
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-192s OBJECT IDENTIFIER ::= { sigAlgs 22 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_192s[DER_OID_SZ_id_slh_dsa_sha2_192s] = {
|
|
+ DER_OID_V_id_slh_dsa_sha2_192s
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-192f OBJECT IDENTIFIER ::= { sigAlgs 23 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_192f[DER_OID_SZ_id_slh_dsa_sha2_192f] = {
|
|
+ DER_OID_V_id_slh_dsa_sha2_192f
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-256s OBJECT IDENTIFIER ::= { sigAlgs 24 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_256s[DER_OID_SZ_id_slh_dsa_sha2_256s] = {
|
|
+ DER_OID_V_id_slh_dsa_sha2_256s
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-256f OBJECT IDENTIFIER ::= { sigAlgs 25 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_sha2_256f[DER_OID_SZ_id_slh_dsa_sha2_256f] = {
|
|
+ DER_OID_V_id_slh_dsa_sha2_256f
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-128s OBJECT IDENTIFIER ::= { sigAlgs 26 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_shake_128s[DER_OID_SZ_id_slh_dsa_shake_128s] = {
|
|
+ DER_OID_V_id_slh_dsa_shake_128s
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-128f OBJECT IDENTIFIER ::= { sigAlgs 27 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_shake_128f[DER_OID_SZ_id_slh_dsa_shake_128f] = {
|
|
+ DER_OID_V_id_slh_dsa_shake_128f
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-192s OBJECT IDENTIFIER ::= { sigAlgs 28 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_shake_192s[DER_OID_SZ_id_slh_dsa_shake_192s] = {
|
|
+ DER_OID_V_id_slh_dsa_shake_192s
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-192f OBJECT IDENTIFIER ::= { sigAlgs 29 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_shake_192f[DER_OID_SZ_id_slh_dsa_shake_192f] = {
|
|
+ DER_OID_V_id_slh_dsa_shake_192f
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-256s OBJECT IDENTIFIER ::= { sigAlgs 30 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_shake_256s[DER_OID_SZ_id_slh_dsa_shake_256s] = {
|
|
+ DER_OID_V_id_slh_dsa_shake_256s
|
|
+};
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-256f OBJECT IDENTIFIER ::= { sigAlgs 31 }
|
|
+ */
|
|
+const unsigned char ossl_der_oid_id_slh_dsa_shake_256f[DER_OID_SZ_id_slh_dsa_shake_256f] = {
|
|
+ DER_OID_V_id_slh_dsa_shake_256f
|
|
+};
|
|
+
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h
|
|
new file mode 100644
|
|
index 0000000000..636054f781
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h
|
|
@@ -0,0 +1,40 @@
|
|
+/*
|
|
+ * WARNING: do not edit!
|
|
+ * Generated by Makefile from providers/common/include/prov/der_ml_dsa.h.in
|
|
+ *
|
|
+ * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ *
|
|
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
+ * this file except in compliance with the License. You can obtain a copy
|
|
+ * in the file LICENSE in the source distribution or at
|
|
+ * https://www.openssl.org/source/license.html
|
|
+ */
|
|
+
|
|
+#include "internal/der.h"
|
|
+#include "crypto/ml_dsa.h"
|
|
+
|
|
+/* Well known OIDs precompiled */
|
|
+
|
|
+/*
|
|
+ * id-ml-dsa-44 OBJECT IDENTIFIER ::= { sigAlgs 17 }
|
|
+ */
|
|
+#define DER_OID_V_id_ml_dsa_44 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x11
|
|
+#define DER_OID_SZ_id_ml_dsa_44 11
|
|
+extern const unsigned char ossl_der_oid_id_ml_dsa_44[DER_OID_SZ_id_ml_dsa_44];
|
|
+
|
|
+/*
|
|
+ * id-ml-dsa-65 OBJECT IDENTIFIER ::= { sigAlgs 18 }
|
|
+ */
|
|
+#define DER_OID_V_id_ml_dsa_65 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x12
|
|
+#define DER_OID_SZ_id_ml_dsa_65 11
|
|
+extern const unsigned char ossl_der_oid_id_ml_dsa_65[DER_OID_SZ_id_ml_dsa_65];
|
|
+
|
|
+/*
|
|
+ * id-ml-dsa-87 OBJECT IDENTIFIER ::= { sigAlgs 19 }
|
|
+ */
|
|
+#define DER_OID_V_id_ml_dsa_87 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x13
|
|
+#define DER_OID_SZ_id_ml_dsa_87 11
|
|
+extern const unsigned char ossl_der_oid_id_ml_dsa_87[DER_OID_SZ_id_ml_dsa_87];
|
|
+
|
|
+
|
|
+int ossl_DER_w_algorithmIdentifier_ML_DSA(WPACKET *pkt, int tag, ML_DSA_KEY *key);
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h
|
|
new file mode 100644
|
|
index 0000000000..0da6cdd7b1
|
|
--- /dev/null
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h
|
|
@@ -0,0 +1,103 @@
|
|
+/*
|
|
+ * WARNING: do not edit!
|
|
+ * Generated by Makefile from providers/common/include/prov/der_slh_dsa.h.in
|
|
+ *
|
|
+ * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ *
|
|
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
+ * this file except in compliance with the License. You can obtain a copy
|
|
+ * in the file LICENSE in the source distribution or at
|
|
+ * https://www.openssl.org/source/license.html
|
|
+ */
|
|
+
|
|
+#include "internal/der.h"
|
|
+#include "crypto/slh_dsa.h"
|
|
+
|
|
+/* Well known OIDs precompiled */
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-128s OBJECT IDENTIFIER ::= { sigAlgs 20 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_sha2_128s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x14
|
|
+#define DER_OID_SZ_id_slh_dsa_sha2_128s 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_128s[DER_OID_SZ_id_slh_dsa_sha2_128s];
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-128f OBJECT IDENTIFIER ::= { sigAlgs 21 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_sha2_128f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x15
|
|
+#define DER_OID_SZ_id_slh_dsa_sha2_128f 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_128f[DER_OID_SZ_id_slh_dsa_sha2_128f];
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-192s OBJECT IDENTIFIER ::= { sigAlgs 22 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_sha2_192s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x16
|
|
+#define DER_OID_SZ_id_slh_dsa_sha2_192s 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_192s[DER_OID_SZ_id_slh_dsa_sha2_192s];
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-192f OBJECT IDENTIFIER ::= { sigAlgs 23 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_sha2_192f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x17
|
|
+#define DER_OID_SZ_id_slh_dsa_sha2_192f 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_192f[DER_OID_SZ_id_slh_dsa_sha2_192f];
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-256s OBJECT IDENTIFIER ::= { sigAlgs 24 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_sha2_256s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x18
|
|
+#define DER_OID_SZ_id_slh_dsa_sha2_256s 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_256s[DER_OID_SZ_id_slh_dsa_sha2_256s];
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-sha2-256f OBJECT IDENTIFIER ::= { sigAlgs 25 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_sha2_256f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x19
|
|
+#define DER_OID_SZ_id_slh_dsa_sha2_256f 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_256f[DER_OID_SZ_id_slh_dsa_sha2_256f];
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-128s OBJECT IDENTIFIER ::= { sigAlgs 26 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_shake_128s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1A
|
|
+#define DER_OID_SZ_id_slh_dsa_shake_128s 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_128s[DER_OID_SZ_id_slh_dsa_shake_128s];
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-128f OBJECT IDENTIFIER ::= { sigAlgs 27 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_shake_128f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1B
|
|
+#define DER_OID_SZ_id_slh_dsa_shake_128f 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_128f[DER_OID_SZ_id_slh_dsa_shake_128f];
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-192s OBJECT IDENTIFIER ::= { sigAlgs 28 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_shake_192s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1C
|
|
+#define DER_OID_SZ_id_slh_dsa_shake_192s 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_192s[DER_OID_SZ_id_slh_dsa_shake_192s];
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-192f OBJECT IDENTIFIER ::= { sigAlgs 29 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_shake_192f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1D
|
|
+#define DER_OID_SZ_id_slh_dsa_shake_192f 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_192f[DER_OID_SZ_id_slh_dsa_shake_192f];
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-256s OBJECT IDENTIFIER ::= { sigAlgs 30 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_shake_256s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1E
|
|
+#define DER_OID_SZ_id_slh_dsa_shake_256s 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_256s[DER_OID_SZ_id_slh_dsa_shake_256s];
|
|
+
|
|
+/*
|
|
+ * id-slh-dsa-shake-256f OBJECT IDENTIFIER ::= { sigAlgs 31 }
|
|
+ */
|
|
+#define DER_OID_V_id_slh_dsa_shake_256f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1F
|
|
+#define DER_OID_SZ_id_slh_dsa_shake_256f 11
|
|
+extern const unsigned char ossl_der_oid_id_slh_dsa_shake_256f[DER_OID_SZ_id_slh_dsa_shake_256f];
|
|
+
|
|
+
|
|
+int ossl_DER_w_algorithmIdentifier_SLH_DSA(WPACKET *pkt, int tag, SLH_DSA_KEY *key);
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslLib.inf b/CryptoPkg/Library/OpensslLib/OpensslLib.inf
|
|
index 1aa22f9905..1e1b732b3e 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslLib.inf
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslLib.inf
|
|
@@ -289,7 +289,9 @@
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
|
|
$(OPENSSL_PATH)/crypto/evp/signature.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
|
|
@@ -297,6 +299,7 @@
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
|
|
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
|
|
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
|
|
$(OPENSSL_PATH)/crypto/hmac/hmac.c
|
|
$(OPENSSL_PATH)/crypto/hpke/hpke.c
|
|
@@ -353,6 +356,7 @@
|
|
$(OPENSSL_PATH)/crypto/self_test_core.c
|
|
$(OPENSSL_PATH)/crypto/sleep.c
|
|
$(OPENSSL_PATH)/crypto/sparse_array.c
|
|
+ $(OPENSSL_PATH)/crypto/ssl_err.c
|
|
$(OPENSSL_PATH)/crypto/threads_lib.c
|
|
$(OPENSSL_PATH)/crypto/threads_none.c
|
|
$(OPENSSL_PATH)/crypto/threads_pthread.c
|
|
@@ -363,6 +367,15 @@
|
|
$(OPENSSL_PATH)/crypto/md5/md5_dgst.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_one.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_sha1.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c
|
|
$(OPENSSL_PATH)/crypto/modes/cbc128.c
|
|
$(OPENSSL_PATH)/crypto/modes/ccm128.c
|
|
$(OPENSSL_PATH)/crypto/modes/cfb128.c
|
|
@@ -443,6 +456,16 @@
|
|
$(OPENSSL_PATH)/crypto/sha/sha256.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha3.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha512.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c
|
|
$(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c
|
|
$(OPENSSL_PATH)/crypto/sm3/sm3.c
|
|
$(OPENSSL_PATH)/crypto/stack/stack.c
|
|
@@ -467,13 +490,17 @@
|
|
$(OPENSSL_PATH)/crypto/x509/t_crl.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_req.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_x509.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
|
|
@@ -501,12 +528,14 @@
|
|
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_san.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
|
|
@@ -585,6 +614,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/kdf_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/argon2.c
|
|
@@ -600,11 +632,15 @@
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/sskdf.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/tls1_prf.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/x942kdf.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c
|
|
@@ -620,10 +656,16 @@
|
|
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_unix.c
|
|
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
|
|
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/provider_ctx.c
|
|
$(OPENSSL_PATH)/providers/common/provider_err.c
|
|
$(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c
|
|
@@ -636,7 +678,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/digests/digestcommon.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/tls_pad.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c
|
|
$(OPENSSL_PATH)/ssl/bio_ssl.c
|
|
$(OPENSSL_PATH)/ssl/d1_lib.c
|
|
@@ -652,7 +696,6 @@
|
|
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
|
|
$(OPENSSL_PATH)/ssl/ssl_ciph.c
|
|
$(OPENSSL_PATH)/ssl/ssl_conf.c
|
|
- $(OPENSSL_PATH)/ssl/ssl_err.c
|
|
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
|
|
$(OPENSSL_PATH)/ssl/ssl_init.c
|
|
$(OPENSSL_PATH)/ssl/ssl_lib.c
|
|
@@ -669,6 +712,8 @@
|
|
$(OPENSSL_PATH)/ssl/tls13_enc.c
|
|
$(OPENSSL_PATH)/ssl/tls_depr.c
|
|
$(OPENSSL_PATH)/ssl/tls_srp.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf b/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf
|
|
index 1138211ff4..94eebf8dfd 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf
|
|
@@ -306,7 +306,9 @@
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
|
|
$(OPENSSL_PATH)/crypto/evp/signature.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
|
|
@@ -314,6 +316,7 @@
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
|
|
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
|
|
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
|
|
$(OPENSSL_PATH)/crypto/hmac/hmac.c
|
|
$(OPENSSL_PATH)/crypto/hpke/hpke.c
|
|
@@ -369,6 +372,7 @@
|
|
$(OPENSSL_PATH)/crypto/self_test_core.c
|
|
$(OPENSSL_PATH)/crypto/sleep.c
|
|
$(OPENSSL_PATH)/crypto/sparse_array.c
|
|
+ $(OPENSSL_PATH)/crypto/ssl_err.c
|
|
$(OPENSSL_PATH)/crypto/threads_lib.c
|
|
$(OPENSSL_PATH)/crypto/threads_none.c
|
|
$(OPENSSL_PATH)/crypto/threads_pthread.c
|
|
@@ -379,6 +383,15 @@
|
|
$(OPENSSL_PATH)/crypto/md5/md5_dgst.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_one.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_sha1.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c
|
|
$(OPENSSL_PATH)/crypto/modes/cbc128.c
|
|
$(OPENSSL_PATH)/crypto/modes/ccm128.c
|
|
$(OPENSSL_PATH)/crypto/modes/cfb128.c
|
|
@@ -459,6 +472,16 @@
|
|
$(OPENSSL_PATH)/crypto/sha/sha256.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha3.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha512.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c
|
|
$(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c
|
|
$(OPENSSL_PATH)/crypto/sm3/sm3.c
|
|
$(OPENSSL_PATH)/crypto/stack/stack.c
|
|
@@ -483,13 +506,17 @@
|
|
$(OPENSSL_PATH)/crypto/x509/t_crl.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_req.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_x509.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
|
|
@@ -517,12 +544,14 @@
|
|
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_san.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
|
|
@@ -601,6 +630,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/kdf_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/argon2.c
|
|
@@ -616,11 +648,15 @@
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/sskdf.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/tls1_prf.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/x942kdf.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c
|
|
@@ -636,10 +672,16 @@
|
|
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_unix.c
|
|
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
|
|
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/provider_ctx.c
|
|
$(OPENSSL_PATH)/providers/common/provider_err.c
|
|
$(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c
|
|
@@ -652,7 +694,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/digests/digestcommon.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/tls_pad.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c
|
|
$(OPENSSL_PATH)/ssl/bio_ssl.c
|
|
$(OPENSSL_PATH)/ssl/d1_lib.c
|
|
@@ -668,7 +712,6 @@
|
|
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
|
|
$(OPENSSL_PATH)/ssl/ssl_ciph.c
|
|
$(OPENSSL_PATH)/ssl/ssl_conf.c
|
|
- $(OPENSSL_PATH)/ssl/ssl_err.c
|
|
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
|
|
$(OPENSSL_PATH)/ssl/ssl_init.c
|
|
$(OPENSSL_PATH)/ssl/ssl_lib.c
|
|
@@ -685,6 +728,8 @@
|
|
$(OPENSSL_PATH)/ssl/tls13_enc.c
|
|
$(OPENSSL_PATH)/ssl/tls_depr.c
|
|
$(OPENSSL_PATH)/ssl/tls_srp.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
|
|
@@ -982,7 +1027,9 @@
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
|
|
$(OPENSSL_PATH)/crypto/evp/signature.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
|
|
@@ -990,6 +1037,7 @@
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
|
|
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
|
|
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
|
|
$(OPENSSL_PATH)/crypto/hmac/hmac.c
|
|
$(OPENSSL_PATH)/crypto/hpke/hpke.c
|
|
@@ -1045,6 +1093,7 @@
|
|
$(OPENSSL_PATH)/crypto/self_test_core.c
|
|
$(OPENSSL_PATH)/crypto/sleep.c
|
|
$(OPENSSL_PATH)/crypto/sparse_array.c
|
|
+ $(OPENSSL_PATH)/crypto/ssl_err.c
|
|
$(OPENSSL_PATH)/crypto/threads_lib.c
|
|
$(OPENSSL_PATH)/crypto/threads_none.c
|
|
$(OPENSSL_PATH)/crypto/threads_pthread.c
|
|
@@ -1055,6 +1104,15 @@
|
|
$(OPENSSL_PATH)/crypto/md5/md5_dgst.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_one.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_sha1.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c
|
|
$(OPENSSL_PATH)/crypto/modes/cbc128.c
|
|
$(OPENSSL_PATH)/crypto/modes/ccm128.c
|
|
$(OPENSSL_PATH)/crypto/modes/cfb128.c
|
|
@@ -1134,6 +1192,16 @@
|
|
$(OPENSSL_PATH)/crypto/sha/sha256.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha3.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha512.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c
|
|
$(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c
|
|
$(OPENSSL_PATH)/crypto/sm3/sm3.c
|
|
$(OPENSSL_PATH)/crypto/stack/stack.c
|
|
@@ -1158,13 +1226,17 @@
|
|
$(OPENSSL_PATH)/crypto/x509/t_crl.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_req.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_x509.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
|
|
@@ -1192,12 +1264,14 @@
|
|
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_san.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
|
|
@@ -1276,6 +1350,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/kdf_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/argon2.c
|
|
@@ -1291,11 +1368,15 @@
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/sskdf.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/tls1_prf.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/x942kdf.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c
|
|
@@ -1311,10 +1392,16 @@
|
|
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_unix.c
|
|
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
|
|
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/provider_ctx.c
|
|
$(OPENSSL_PATH)/providers/common/provider_err.c
|
|
$(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c
|
|
@@ -1327,7 +1414,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/digests/digestcommon.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/tls_pad.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c
|
|
$(OPENSSL_PATH)/ssl/bio_ssl.c
|
|
$(OPENSSL_PATH)/ssl/d1_lib.c
|
|
@@ -1343,7 +1432,6 @@
|
|
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
|
|
$(OPENSSL_PATH)/ssl/ssl_ciph.c
|
|
$(OPENSSL_PATH)/ssl/ssl_conf.c
|
|
- $(OPENSSL_PATH)/ssl/ssl_err.c
|
|
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
|
|
$(OPENSSL_PATH)/ssl/ssl_init.c
|
|
$(OPENSSL_PATH)/ssl/ssl_lib.c
|
|
@@ -1360,6 +1448,8 @@
|
|
$(OPENSSL_PATH)/ssl/tls13_enc.c
|
|
$(OPENSSL_PATH)/ssl/tls_depr.c
|
|
$(OPENSSL_PATH)/ssl/tls_srp.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
|
|
@@ -1382,6 +1472,7 @@
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha1-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
+ $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/bsaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/vpaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/x86_64cpuid.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
@@ -1400,6 +1491,7 @@
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha1-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha256-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
+ $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-xts-avx512.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/bsaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/vpaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/x86_64cpuid.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
@@ -1676,7 +1768,9 @@
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
|
|
$(OPENSSL_PATH)/crypto/evp/signature.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
|
|
@@ -1684,6 +1778,7 @@
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
|
|
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
|
|
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
|
|
$(OPENSSL_PATH)/crypto/hmac/hmac.c
|
|
$(OPENSSL_PATH)/crypto/hpke/hpke.c
|
|
@@ -1739,6 +1834,7 @@
|
|
$(OPENSSL_PATH)/crypto/self_test_core.c
|
|
$(OPENSSL_PATH)/crypto/sleep.c
|
|
$(OPENSSL_PATH)/crypto/sparse_array.c
|
|
+ $(OPENSSL_PATH)/crypto/ssl_err.c
|
|
$(OPENSSL_PATH)/crypto/threads_lib.c
|
|
$(OPENSSL_PATH)/crypto/threads_none.c
|
|
$(OPENSSL_PATH)/crypto/threads_pthread.c
|
|
@@ -1749,6 +1845,15 @@
|
|
$(OPENSSL_PATH)/crypto/md5/md5_dgst.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_one.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_sha1.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c
|
|
$(OPENSSL_PATH)/crypto/modes/cbc128.c
|
|
$(OPENSSL_PATH)/crypto/modes/ccm128.c
|
|
$(OPENSSL_PATH)/crypto/modes/cfb128.c
|
|
@@ -1828,6 +1933,16 @@
|
|
$(OPENSSL_PATH)/crypto/sha/sha256.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha3.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha512.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c
|
|
$(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c
|
|
$(OPENSSL_PATH)/crypto/sm3/sm3.c
|
|
$(OPENSSL_PATH)/crypto/stack/stack.c
|
|
@@ -1852,13 +1967,17 @@
|
|
$(OPENSSL_PATH)/crypto/x509/t_crl.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_req.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_x509.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
|
|
@@ -1886,12 +2005,14 @@
|
|
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_san.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
|
|
@@ -1970,6 +2091,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/kdf_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/argon2.c
|
|
@@ -1985,11 +2109,15 @@
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/sskdf.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/tls1_prf.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/x942kdf.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c
|
|
@@ -2005,10 +2133,16 @@
|
|
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_unix.c
|
|
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
|
|
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/provider_ctx.c
|
|
$(OPENSSL_PATH)/providers/common/provider_err.c
|
|
$(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c
|
|
@@ -2021,7 +2155,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/digests/digestcommon.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/tls_pad.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c
|
|
$(OPENSSL_PATH)/ssl/bio_ssl.c
|
|
$(OPENSSL_PATH)/ssl/d1_lib.c
|
|
@@ -2037,7 +2173,6 @@
|
|
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
|
|
$(OPENSSL_PATH)/ssl/ssl_ciph.c
|
|
$(OPENSSL_PATH)/ssl/ssl_conf.c
|
|
- $(OPENSSL_PATH)/ssl/ssl_err.c
|
|
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
|
|
$(OPENSSL_PATH)/ssl/ssl_init.c
|
|
$(OPENSSL_PATH)/ssl/ssl_lib.c
|
|
@@ -2054,6 +2189,8 @@
|
|
$(OPENSSL_PATH)/ssl/tls13_enc.c
|
|
$(OPENSSL_PATH)/ssl/tls_depr.c
|
|
$(OPENSSL_PATH)/ssl/tls_srp.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf b/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf
|
|
index a5dd36432d..122b359c02 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf
|
|
@@ -290,7 +290,9 @@
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
|
|
$(OPENSSL_PATH)/crypto/evp/signature.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
|
|
@@ -298,6 +300,7 @@
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
|
|
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
|
|
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
|
|
$(OPENSSL_PATH)/crypto/hmac/hmac.c
|
|
$(OPENSSL_PATH)/crypto/hpke/hpke.c
|
|
@@ -354,6 +357,7 @@
|
|
$(OPENSSL_PATH)/crypto/self_test_core.c
|
|
$(OPENSSL_PATH)/crypto/sleep.c
|
|
$(OPENSSL_PATH)/crypto/sparse_array.c
|
|
+ $(OPENSSL_PATH)/crypto/ssl_err.c
|
|
$(OPENSSL_PATH)/crypto/threads_lib.c
|
|
$(OPENSSL_PATH)/crypto/threads_none.c
|
|
$(OPENSSL_PATH)/crypto/threads_pthread.c
|
|
@@ -364,6 +368,15 @@
|
|
$(OPENSSL_PATH)/crypto/md5/md5_dgst.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_one.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_sha1.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c
|
|
$(OPENSSL_PATH)/crypto/modes/cbc128.c
|
|
$(OPENSSL_PATH)/crypto/modes/ccm128.c
|
|
$(OPENSSL_PATH)/crypto/modes/cfb128.c
|
|
@@ -444,6 +457,16 @@
|
|
$(OPENSSL_PATH)/crypto/sha/sha256.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha3.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha512.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c
|
|
$(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c
|
|
$(OPENSSL_PATH)/crypto/sm3/sm3.c
|
|
$(OPENSSL_PATH)/crypto/stack/stack.c
|
|
@@ -468,13 +491,17 @@
|
|
$(OPENSSL_PATH)/crypto/x509/t_crl.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_req.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_x509.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
|
|
@@ -502,12 +529,14 @@
|
|
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_san.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
|
|
@@ -586,6 +615,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/kdf_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/argon2.c
|
|
@@ -601,11 +633,15 @@
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/sskdf.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/tls1_prf.c
|
|
$(OPENSSL_PATH)/providers/implementations/kdfs/x942kdf.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c
|
|
@@ -621,10 +657,16 @@
|
|
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_unix.c
|
|
$(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
|
|
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/provider_ctx.c
|
|
$(OPENSSL_PATH)/providers/common/provider_err.c
|
|
$(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c
|
|
@@ -637,7 +679,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/digests/digestcommon.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/tls_pad.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c
|
|
# Autogenerated files list ends here
|
|
buildinf.h
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf b/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf
|
|
index 75f40b8abf..c1823fe406 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf
|
|
@@ -332,7 +332,9 @@
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
|
|
$(OPENSSL_PATH)/crypto/evp/signature.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
|
|
@@ -340,6 +342,7 @@
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
|
|
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
|
|
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
|
|
$(OPENSSL_PATH)/crypto/hmac/hmac.c
|
|
$(OPENSSL_PATH)/crypto/hpke/hpke.c
|
|
@@ -396,6 +399,7 @@
|
|
$(OPENSSL_PATH)/crypto/self_test_core.c
|
|
$(OPENSSL_PATH)/crypto/sleep.c
|
|
$(OPENSSL_PATH)/crypto/sparse_array.c
|
|
+ $(OPENSSL_PATH)/crypto/ssl_err.c
|
|
$(OPENSSL_PATH)/crypto/threads_lib.c
|
|
$(OPENSSL_PATH)/crypto/threads_none.c
|
|
$(OPENSSL_PATH)/crypto/threads_pthread.c
|
|
@@ -406,6 +410,15 @@
|
|
$(OPENSSL_PATH)/crypto/md5/md5_dgst.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_one.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_sha1.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c
|
|
$(OPENSSL_PATH)/crypto/modes/cbc128.c
|
|
$(OPENSSL_PATH)/crypto/modes/ccm128.c
|
|
$(OPENSSL_PATH)/crypto/modes/cfb128.c
|
|
@@ -486,6 +499,16 @@
|
|
$(OPENSSL_PATH)/crypto/sha/sha256.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha3.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha512.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c
|
|
$(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c
|
|
$(OPENSSL_PATH)/crypto/sm3/sm3.c
|
|
$(OPENSSL_PATH)/crypto/stack/stack.c
|
|
@@ -510,13 +533,17 @@
|
|
$(OPENSSL_PATH)/crypto/x509/t_crl.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_req.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_x509.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
|
|
@@ -544,12 +571,14 @@
|
|
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_san.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
|
|
@@ -628,6 +657,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/ecdh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/ecx_exch.c
|
|
@@ -648,13 +680,19 @@
|
|
$(OPENSSL_PATH)/providers/implementations/kem/ec_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/ecx_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/kem_util.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/mlx_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/ec_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/ecx_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/mlx_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c
|
|
@@ -672,13 +710,19 @@
|
|
$(OPENSSL_PATH)/providers/implementations/signature/ecdsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
|
|
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ec_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ec_sig.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ecx_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/provider_ctx.c
|
|
$(OPENSSL_PATH)/providers/common/provider_err.c
|
|
$(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c
|
|
@@ -693,7 +737,9 @@
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_ec_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_ecx_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c
|
|
$(OPENSSL_PATH)/ssl/bio_ssl.c
|
|
$(OPENSSL_PATH)/ssl/d1_lib.c
|
|
@@ -709,7 +755,6 @@
|
|
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
|
|
$(OPENSSL_PATH)/ssl/ssl_ciph.c
|
|
$(OPENSSL_PATH)/ssl/ssl_conf.c
|
|
- $(OPENSSL_PATH)/ssl/ssl_err.c
|
|
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
|
|
$(OPENSSL_PATH)/ssl/ssl_init.c
|
|
$(OPENSSL_PATH)/ssl/ssl_lib.c
|
|
@@ -726,6 +771,8 @@
|
|
$(OPENSSL_PATH)/ssl/tls13_enc.c
|
|
$(OPENSSL_PATH)/ssl/tls_depr.c
|
|
$(OPENSSL_PATH)/ssl/tls_srp.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
|
|
diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf b/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf
|
|
index f46659c0a0..64270b816e 100644
|
|
--- a/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf
|
|
+++ b/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf
|
|
@@ -349,7 +349,9 @@
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
|
|
$(OPENSSL_PATH)/crypto/evp/signature.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
|
|
@@ -357,6 +359,7 @@
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
|
|
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
|
|
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
|
|
$(OPENSSL_PATH)/crypto/hmac/hmac.c
|
|
$(OPENSSL_PATH)/crypto/hpke/hpke.c
|
|
@@ -412,6 +415,7 @@
|
|
$(OPENSSL_PATH)/crypto/self_test_core.c
|
|
$(OPENSSL_PATH)/crypto/sleep.c
|
|
$(OPENSSL_PATH)/crypto/sparse_array.c
|
|
+ $(OPENSSL_PATH)/crypto/ssl_err.c
|
|
$(OPENSSL_PATH)/crypto/threads_lib.c
|
|
$(OPENSSL_PATH)/crypto/threads_none.c
|
|
$(OPENSSL_PATH)/crypto/threads_pthread.c
|
|
@@ -422,6 +426,15 @@
|
|
$(OPENSSL_PATH)/crypto/md5/md5_dgst.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_one.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_sha1.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c
|
|
$(OPENSSL_PATH)/crypto/modes/cbc128.c
|
|
$(OPENSSL_PATH)/crypto/modes/ccm128.c
|
|
$(OPENSSL_PATH)/crypto/modes/cfb128.c
|
|
@@ -502,6 +515,16 @@
|
|
$(OPENSSL_PATH)/crypto/sha/sha256.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha3.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha512.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c
|
|
$(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c
|
|
$(OPENSSL_PATH)/crypto/sm3/sm3.c
|
|
$(OPENSSL_PATH)/crypto/stack/stack.c
|
|
@@ -526,13 +549,17 @@
|
|
$(OPENSSL_PATH)/crypto/x509/t_crl.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_req.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_x509.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
|
|
@@ -560,12 +587,14 @@
|
|
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_san.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
|
|
@@ -644,6 +673,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/ecdh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/ecx_exch.c
|
|
@@ -664,13 +696,19 @@
|
|
$(OPENSSL_PATH)/providers/implementations/kem/ec_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/ecx_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/kem_util.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/mlx_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/ec_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/ecx_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/mlx_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c
|
|
@@ -688,13 +726,19 @@
|
|
$(OPENSSL_PATH)/providers/implementations/signature/ecdsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
|
|
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ec_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ec_sig.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ecx_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/provider_ctx.c
|
|
$(OPENSSL_PATH)/providers/common/provider_err.c
|
|
$(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c
|
|
@@ -709,7 +753,9 @@
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_ec_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_ecx_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c
|
|
$(OPENSSL_PATH)/ssl/bio_ssl.c
|
|
$(OPENSSL_PATH)/ssl/d1_lib.c
|
|
@@ -725,7 +771,6 @@
|
|
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
|
|
$(OPENSSL_PATH)/ssl/ssl_ciph.c
|
|
$(OPENSSL_PATH)/ssl/ssl_conf.c
|
|
- $(OPENSSL_PATH)/ssl/ssl_err.c
|
|
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
|
|
$(OPENSSL_PATH)/ssl/ssl_init.c
|
|
$(OPENSSL_PATH)/ssl/ssl_lib.c
|
|
@@ -742,6 +787,8 @@
|
|
$(OPENSSL_PATH)/ssl/tls13_enc.c
|
|
$(OPENSSL_PATH)/ssl/tls_depr.c
|
|
$(OPENSSL_PATH)/ssl/tls_srp.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
|
|
@@ -1077,7 +1124,9 @@
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
|
|
$(OPENSSL_PATH)/crypto/evp/signature.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
|
|
@@ -1085,6 +1134,7 @@
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
|
|
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
|
|
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
|
|
$(OPENSSL_PATH)/crypto/hmac/hmac.c
|
|
$(OPENSSL_PATH)/crypto/hpke/hpke.c
|
|
@@ -1140,6 +1190,7 @@
|
|
$(OPENSSL_PATH)/crypto/self_test_core.c
|
|
$(OPENSSL_PATH)/crypto/sleep.c
|
|
$(OPENSSL_PATH)/crypto/sparse_array.c
|
|
+ $(OPENSSL_PATH)/crypto/ssl_err.c
|
|
$(OPENSSL_PATH)/crypto/threads_lib.c
|
|
$(OPENSSL_PATH)/crypto/threads_none.c
|
|
$(OPENSSL_PATH)/crypto/threads_pthread.c
|
|
@@ -1150,6 +1201,15 @@
|
|
$(OPENSSL_PATH)/crypto/md5/md5_dgst.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_one.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_sha1.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c
|
|
$(OPENSSL_PATH)/crypto/modes/cbc128.c
|
|
$(OPENSSL_PATH)/crypto/modes/ccm128.c
|
|
$(OPENSSL_PATH)/crypto/modes/cfb128.c
|
|
@@ -1229,6 +1289,16 @@
|
|
$(OPENSSL_PATH)/crypto/sha/sha256.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha3.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha512.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c
|
|
$(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c
|
|
$(OPENSSL_PATH)/crypto/sm3/sm3.c
|
|
$(OPENSSL_PATH)/crypto/stack/stack.c
|
|
@@ -1253,13 +1323,17 @@
|
|
$(OPENSSL_PATH)/crypto/x509/t_crl.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_req.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_x509.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
|
|
@@ -1287,12 +1361,14 @@
|
|
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_san.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
|
|
@@ -1371,6 +1447,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/ecdh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/ecx_exch.c
|
|
@@ -1391,13 +1470,19 @@
|
|
$(OPENSSL_PATH)/providers/implementations/kem/ec_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/ecx_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/kem_util.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/mlx_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/ec_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/ecx_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/mlx_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c
|
|
@@ -1415,13 +1500,19 @@
|
|
$(OPENSSL_PATH)/providers/implementations/signature/ecdsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
|
|
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ec_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ec_sig.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ecx_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/provider_ctx.c
|
|
$(OPENSSL_PATH)/providers/common/provider_err.c
|
|
$(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c
|
|
@@ -1436,7 +1527,9 @@
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_ec_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_ecx_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c
|
|
$(OPENSSL_PATH)/ssl/bio_ssl.c
|
|
$(OPENSSL_PATH)/ssl/d1_lib.c
|
|
@@ -1452,7 +1545,6 @@
|
|
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
|
|
$(OPENSSL_PATH)/ssl/ssl_ciph.c
|
|
$(OPENSSL_PATH)/ssl/ssl_conf.c
|
|
- $(OPENSSL_PATH)/ssl/ssl_err.c
|
|
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
|
|
$(OPENSSL_PATH)/ssl/ssl_init.c
|
|
$(OPENSSL_PATH)/ssl/ssl_lib.c
|
|
@@ -1469,6 +1561,8 @@
|
|
$(OPENSSL_PATH)/ssl/tls13_enc.c
|
|
$(OPENSSL_PATH)/ssl/tls_depr.c
|
|
$(OPENSSL_PATH)/ssl/tls_srp.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
|
|
@@ -1491,6 +1585,7 @@
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha1-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
+ $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/bsaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/vpaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-MSFT/crypto/x86_64cpuid.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
@@ -1509,6 +1604,7 @@
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha1-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha256-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
+ $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-xts-avx512.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/bsaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/vpaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
$(OPENSSL_GEN_PATH)/X64-GCC/crypto/x86_64cpuid.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm
|
|
@@ -1823,7 +1919,9 @@
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_check.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_gn.c
|
|
$(OPENSSL_PATH)/crypto/evp/pmeth_lib.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/s_lib.c
|
|
$(OPENSSL_PATH)/crypto/evp/signature.c
|
|
+ $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_backend.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_dh.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c
|
|
@@ -1831,6 +1929,7 @@
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c
|
|
$(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c
|
|
+ $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c
|
|
$(OPENSSL_PATH)/crypto/hashtable/hashtable.c
|
|
$(OPENSSL_PATH)/crypto/hmac/hmac.c
|
|
$(OPENSSL_PATH)/crypto/hpke/hpke.c
|
|
@@ -1886,6 +1985,7 @@
|
|
$(OPENSSL_PATH)/crypto/self_test_core.c
|
|
$(OPENSSL_PATH)/crypto/sleep.c
|
|
$(OPENSSL_PATH)/crypto/sparse_array.c
|
|
+ $(OPENSSL_PATH)/crypto/ssl_err.c
|
|
$(OPENSSL_PATH)/crypto/threads_lib.c
|
|
$(OPENSSL_PATH)/crypto/threads_none.c
|
|
$(OPENSSL_PATH)/crypto/threads_pthread.c
|
|
@@ -1896,6 +1996,15 @@
|
|
$(OPENSSL_PATH)/crypto/md5/md5_dgst.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_one.c
|
|
$(OPENSSL_PATH)/crypto/md5/md5_sha1.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c
|
|
+ $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c
|
|
$(OPENSSL_PATH)/crypto/modes/cbc128.c
|
|
$(OPENSSL_PATH)/crypto/modes/ccm128.c
|
|
$(OPENSSL_PATH)/crypto/modes/cfb128.c
|
|
@@ -1975,6 +2084,16 @@
|
|
$(OPENSSL_PATH)/crypto/sha/sha256.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha3.c
|
|
$(OPENSSL_PATH)/crypto/sha/sha512.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c
|
|
+ $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c
|
|
$(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c
|
|
$(OPENSSL_PATH)/crypto/sm3/sm3.c
|
|
$(OPENSSL_PATH)/crypto/stack/stack.c
|
|
@@ -1999,13 +2118,17 @@
|
|
$(OPENSSL_PATH)/crypto/x509/t_crl.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_req.c
|
|
$(OPENSSL_PATH)/crypto/x509/t_x509.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_aaa.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_addr.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_admis.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akeya.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_akid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_asid.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_audit_id.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_authattid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_battcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bcons.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_bitst.c
|
|
@@ -2033,12 +2156,14 @@
|
|
$(OPENSSL_PATH)/crypto/x509/v3_pmaps.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_prn.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_purp.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_san.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sda.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_single_use.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_skid.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_soa_id.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_sxnet.c
|
|
+ $(OPENSSL_PATH)/crypto/x509/v3_timespec.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_tlsf.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_usernotice.c
|
|
$(OPENSSL_PATH)/crypto/x509/v3_utf8.c
|
|
@@ -2117,6 +2242,9 @@
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c
|
|
$(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/ecdh_exch.c
|
|
$(OPENSSL_PATH)/providers/implementations/exchange/ecx_exch.c
|
|
@@ -2137,13 +2265,19 @@
|
|
$(OPENSSL_PATH)/providers/implementations/kem/ec_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/ecx_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/kem_util.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/kem/mlx_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/ec_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/ecx_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/mlx_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c
|
|
$(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c
|
|
@@ -2161,13 +2295,19 @@
|
|
$(OPENSSL_PATH)/providers/implementations/signature/ecdsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c
|
|
$(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c
|
|
+ $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c
|
|
$(OPENSSL_GEN_PATH)/crypto/params_idx.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ec_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ec_sig.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_ecx_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/der/der_rsa_key.c
|
|
+ $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c
|
|
$(OPENSSL_PATH)/providers/common/provider_ctx.c
|
|
$(OPENSSL_PATH)/providers/common/provider_err.c
|
|
$(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c
|
|
@@ -2182,7 +2322,9 @@
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_ec_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_ecx_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c
|
|
+ $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c
|
|
$(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c
|
|
$(OPENSSL_PATH)/ssl/bio_ssl.c
|
|
$(OPENSSL_PATH)/ssl/d1_lib.c
|
|
@@ -2198,7 +2340,6 @@
|
|
$(OPENSSL_PATH)/ssl/ssl_cert_comp.c
|
|
$(OPENSSL_PATH)/ssl/ssl_ciph.c
|
|
$(OPENSSL_PATH)/ssl/ssl_conf.c
|
|
- $(OPENSSL_PATH)/ssl/ssl_err.c
|
|
$(OPENSSL_PATH)/ssl/ssl_err_legacy.c
|
|
$(OPENSSL_PATH)/ssl/ssl_init.c
|
|
$(OPENSSL_PATH)/ssl/ssl_lib.c
|
|
@@ -2215,6 +2356,8 @@
|
|
$(OPENSSL_PATH)/ssl/tls13_enc.c
|
|
$(OPENSSL_PATH)/ssl/tls_depr.c
|
|
$(OPENSSL_PATH)/ssl/tls_srp.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls.c
|
|
+ $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_d1.c
|
|
$(OPENSSL_PATH)/ssl/record/rec_layer_s3.c
|
|
$(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c
|