From 98550817e5afabb4b254cc838e1efe01ceea516e Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 7 Apr 2025 12:06:45 +0200 Subject: [PATCH] CryptoPkg/openssl: update generated files Signed-off-by: Gerd Hoffmann --- .../AARCH64-GCC/crypto/aes/aesv8-armx.S | 6 +- .../AARCH64-GCC/crypto/aes/bsaes-armv8.S | 26 +- .../AARCH64-GCC/crypto/aes/vpaes-armv8.S | 53 +- .../AARCH64-GCC/crypto/bn/armv8-mont.S | 1 + .../crypto/ec/ecp_nistz256-armv8.S | 100 +- .../AARCH64-GCC/crypto/md5/md5-aarch64.S | 128 +- .../crypto/modes/aes-gcm-armv8_64.S | 1 + .../AARCH64-GCC/crypto/modes/ghashv8-armx.S | 1 + .../AARCH64-GCC/crypto/sha/keccak1600-armv8.S | 10 +- .../AARCH64-GCC/crypto/sha/sha1-armv8.S | 6 +- .../AARCH64-GCC/crypto/sha/sha256-armv8.S | 15 +- .../AARCH64-GCC/crypto/sha/sha512-armv8.S | 12 +- .../AARCH64-GCC/crypto/sm3/sm3-armv8.S | 15 +- .../OpensslGen/IA32-GCC/crypto/aes/aes-586.S | 2 +- .../IA32-GCC/crypto/aes/aesni-x86.S | 2 +- .../OpensslGen/IA32-GCC/crypto/bn/bn-586.S | 2 +- .../OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S | 2 +- .../OpensslGen/IA32-GCC/crypto/bn/x86-mont.S | 2 +- .../IA32-GCC/crypto/ec/ecp_nistz256-x86.S | 2 +- .../OpensslGen/IA32-GCC/crypto/sha/sha1-586.S | 2 +- .../IA32-GCC/crypto/sha/sha256-586.S | 2 +- .../IA32-GCC/crypto/sha/sha512-586.S | 2 +- .../OpensslGen/IA32-GCC/crypto/x86cpuid.S | 21 +- .../IA32-MSFT/crypto/aes/aes-586.nasm | 2 +- .../IA32-MSFT/crypto/aes/aesni-x86.nasm | 2 +- .../IA32-MSFT/crypto/bn/bn-586.nasm | 2 +- .../IA32-MSFT/crypto/bn/x86-gf2m.nasm | 2 +- .../IA32-MSFT/crypto/bn/x86-mont.nasm | 2 +- .../IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm | 2 +- .../IA32-MSFT/crypto/sha/sha1-586.nasm | 2 +- .../IA32-MSFT/crypto/sha/sha256-586.nasm | 2 +- .../IA32-MSFT/crypto/sha/sha512-586.nasm | 2 +- .../OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm | 21 +- .../X64-GCC/crypto/aes/aesni-xts-avx512.s | 8145 ++++++++++++++++ .../X64-GCC/crypto/bn/rsaz-2k-avxifma.s | 1167 +++ .../X64-GCC/crypto/bn/rsaz-3k-avxifma.s | 1768 ++++ .../X64-GCC/crypto/bn/rsaz-4k-avxifma.s | 1922 ++++ .../X64-GCC/crypto/md5/md5-x86_64.s | 32 +- .../OpensslGen/X64-GCC/crypto/x86_64cpuid.s | 30 +- .../X64-MSFT/crypto/aes/aesni-xts-avx512.nasm | 8350 +++++++++++++++++ .../X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm | 1276 +++ .../X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm | 1927 ++++ .../X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm | 2081 ++++ .../X64-MSFT/crypto/md5/md5-x86_64.nasm | 32 +- .../X64-MSFT/crypto/x86_64cpuid.nasm | 30 +- .../OpensslLib/OpensslGen/crypto/params_idx.c | 151 +- .../OpensslGen/include/internal/param_names.h | 607 +- .../OpensslGen/include/openssl/asn1.h | 31 +- .../OpensslGen/include/openssl/bio.h | 47 +- .../OpensslGen/include/openssl/cms.h | 5 +- .../include/openssl/configuration-ec.h | 12 + .../include/openssl/configuration-noec.h | 12 + .../OpensslGen/include/openssl/core_names.h | 32 +- .../OpensslGen/include/openssl/crmf.h | 30 +- .../OpensslGen/include/openssl/crypto.h | 3 + .../OpensslGen/include/openssl/opensslv.h | 12 +- .../OpensslGen/include/openssl/ssl.h | 59 +- .../OpensslGen/include/openssl/x509_acert.h | 31 + .../OpensslGen/include/openssl/x509_vfy.h | 3 +- .../OpensslGen/include/openssl/x509v3.h | 480 +- .../providers/common/der/der_ml_dsa_gen.c | 37 + .../providers/common/der/der_slh_dsa_gen.c | 100 + .../common/include/prov/der_ml_dsa.h | 40 + .../common/include/prov/der_slh_dsa.h | 103 + CryptoPkg/Library/OpensslLib/OpensslLib.inf | 47 +- .../Library/OpensslLib/OpensslLibAccel.inf | 143 +- .../Library/OpensslLib/OpensslLibCrypto.inf | 44 + .../Library/OpensslLib/OpensslLibFull.inf | 49 +- .../OpensslLib/OpensslLibFullAccel.inf | 149 +- 69 files changed, 28877 insertions(+), 562 deletions(-) create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h create mode 100644 CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S index 96a66379e1..31c750f875 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/aesv8-armx.S @@ -3,12 +3,13 @@ #if __ARM_MAX_ARCH__>=7 .arch armv8-a+crypto .text +.section .rodata .align 5 .Lrcon: .long 0x01,0x01,0x01,0x01 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat .long 0x1b,0x1b,0x1b,0x1b - +.previous .globl aes_v8_set_encrypt_key .type aes_v8_set_encrypt_key,%function .align 5 @@ -31,7 +32,8 @@ aes_v8_set_encrypt_key: tst w1,#0x3f b.ne .Lenc_key_abort - adr x3,.Lrcon + adrp x3,.Lrcon + add x3,x3,#:lo12:.Lrcon cmp w1,#192 eor v0.16b,v0.16b,v0.16b diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S index 8a8668262a..bb05512c17 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/bsaes-armv8.S @@ -1,4 +1,4 @@ -// Copyright 2021-2024 The OpenSSL Project Authors. All Rights Reserved. +// Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the OpenSSL license (the "License"). You may not use // this file except in compliance with the License. You can obtain a copy @@ -44,7 +44,8 @@ // other SIMD registers corrupted _bsaes_decrypt8: ldr q8, [x9], #16 - adr x11, .LM0ISR + adrp x11, .LM0ISR + add x11, x11, #:lo12:.LM0ISR movi v9.16b, #0x55 ldr q10, [x11], #16 movi v16.16b, #0x33 @@ -460,9 +461,10 @@ _bsaes_decrypt8: ret .size _bsaes_decrypt8,.-_bsaes_decrypt8 -.type _bsaes_const,%object +.section .rodata +.type _bsaes_consts,%object .align 6 -_bsaes_const: +_bsaes_consts: // InvShiftRows constants // Used in _bsaes_decrypt8, which assumes contiguity // .LM0ISR used with round 0 key @@ -498,7 +500,9 @@ _bsaes_const: .quad 0x090d01050c000408, 0x03070b0f060a0e02 .align 6 -.size _bsaes_const,.-_bsaes_const +.size _bsaes_consts,.-_bsaes_consts + +.previous .type _bsaes_encrypt8,%function .align 4 @@ -514,7 +518,8 @@ _bsaes_const: // other SIMD registers corrupted _bsaes_encrypt8: ldr q8, [x9], #16 - adr x11, .LM0SR + adrp x11, .LM0SR + add x11, x11, #:lo12:.LM0SR ldr q9, [x11], #16 _bsaes_encrypt8_alt: eor v0.16b, v0.16b, v8.16b @@ -918,9 +923,11 @@ _bsaes_encrypt8_alt: // other SIMD registers corrupted _bsaes_key_convert: #ifdef __AARCH64EL__ - adr x11, .LM0_littleendian + adrp x11, .LM0_littleendian + add x11, x11, #:lo12:.LM0_littleendian #else - adr x11, .LM0_bigendian + adrp x11, .LM0_bigendian + add x11, x11, #:lo12:.LM0_bigendian #endif ldr q0, [x9], #16 // load round 0 key ldr q1, [x11] // .LM0 @@ -964,7 +971,8 @@ _bsaes_key_convert: // don't save last round key #ifdef __AARCH64EL__ rev32 v15.16b, v15.16b - adr x11, .LM0_bigendian + adrp x11, .LM0_bigendian + add x11, x11, #:lo12:.LM0_bigendian #endif ret .size _bsaes_key_convert,.-_bsaes_key_convert diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S index 9aef5acd86..e78961d334 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/aes/vpaes-armv8.S @@ -1,6 +1,6 @@ #include "arm_arch.h" -.text +.section .rodata .type _vpaes_consts,%object .align 7 // totally strategic alignment @@ -92,6 +92,9 @@ _vpaes_consts: .align 2 .size _vpaes_consts,.-_vpaes_consts .align 6 + +.text + // // _aes_preheat // @@ -101,7 +104,8 @@ _vpaes_consts: .type _vpaes_encrypt_preheat,%function .align 4 _vpaes_encrypt_preheat: - adr x10, .Lk_inv + adrp x10, .Lk_inv + add x10, x10, #:lo12:.Lk_inv movi v17.16b, #0x0f ld1 {v18.2d,v19.2d}, [x10],#32 // .Lk_inv ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo @@ -129,7 +133,8 @@ _vpaes_encrypt_preheat: _vpaes_encrypt_core: mov x9, x2 ldr w8, [x2,#240] // pull rounds - adr x11, .Lk_mc_forward+16 + adrp x11, .Lk_mc_forward+16 + add x11, x11, #:lo12:.Lk_mc_forward+16 // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -216,7 +221,8 @@ vpaes_encrypt: _vpaes_encrypt_2x: mov x9, x2 ldr w8, [x2,#240] // pull rounds - adr x11, .Lk_mc_forward+16 + adrp x11, .Lk_mc_forward+16 + add x11, x11, #:lo12:.Lk_mc_forward+16 // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -319,9 +325,11 @@ _vpaes_encrypt_2x: .type _vpaes_decrypt_preheat,%function .align 4 _vpaes_decrypt_preheat: - adr x10, .Lk_inv + adrp x10, .Lk_inv + add x10, x10, #:lo12:.Lk_inv movi v17.16b, #0x0f - adr x11, .Lk_dipt + adrp x11, .Lk_dipt + add x11, x11, #:lo12:.Lk_dipt ld1 {v18.2d,v19.2d}, [x10],#32 // .Lk_inv ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd @@ -343,10 +351,12 @@ _vpaes_decrypt_core: // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 eor x11, x11, #0x30 // xor $0x30, %r11 - adr x10, .Lk_sr + adrp x10, .Lk_sr + add x10, x10, #:lo12:.Lk_sr and x11, x11, #0x30 // and $0x30, %r11 add x11, x11, x10 - adr x10, .Lk_mc_forward+48 + adrp x10, .Lk_mc_forward+48 + add x10, x10, #:lo12:.Lk_mc_forward+48 ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -454,10 +464,12 @@ _vpaes_decrypt_2x: // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 eor x11, x11, #0x30 // xor $0x30, %r11 - adr x10, .Lk_sr + adrp x10, .Lk_sr + add x10, x10, #:lo12:.Lk_sr and x11, x11, #0x30 // and $0x30, %r11 add x11, x11, x10 - adr x10, .Lk_mc_forward+48 + adrp x10, .Lk_mc_forward+48 + add x10, x10, #:lo12:.Lk_mc_forward+48 ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -586,14 +598,18 @@ _vpaes_decrypt_2x: .type _vpaes_key_preheat,%function .align 4 _vpaes_key_preheat: - adr x10, .Lk_inv + adrp x10, .Lk_inv + add x10, x10, #:lo12:.Lk_inv movi v16.16b, #0x5b // .Lk_s63 - adr x11, .Lk_sb1 + adrp x11, .Lk_sb1 + add x11, x11, #:lo12:.Lk_sb1 movi v17.16b, #0x0f // .Lk_s0F ld1 {v18.2d,v19.2d,v20.2d,v21.2d}, [x10] // .Lk_inv, .Lk_ipt - adr x10, .Lk_dksd + adrp x10, .Lk_dksd + add x10, x10, #:lo12:.Lk_dksd ld1 {v22.2d,v23.2d}, [x11] // .Lk_sb1 - adr x11, .Lk_mc_forward + adrp x11, .Lk_mc_forward + add x11, x11, #:lo12:.Lk_mc_forward ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9 ld1 {v8.2d}, [x10] // .Lk_rcon @@ -617,7 +633,8 @@ _vpaes_schedule_core: bl _vpaes_schedule_transform mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7 - adr x10, .Lk_sr // lea .Lk_sr(%rip),%r10 + adrp x10, .Lk_sr + add x10, x10, #:lo12:.Lk_sr add x8, x8, x10 cbnz w3, .Lschedule_am_decrypting @@ -743,12 +760,14 @@ _vpaes_schedule_core: .align 4 .Lschedule_mangle_last: // schedule last round key from xmm0 - adr x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew + adrp x11, .Lk_deskew + add x11, x11, #:lo12:.Lk_deskew cbnz w3, .Lschedule_mangle_last_dec // encrypting ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1 - adr x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform + adrp x11, .Lk_opt + add x11, x11, #:lo12:.Lk_opt add x2, x2, #32 // add $32, %rdx tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S index 111de65451..98c5b76576 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/bn/armv8-mont.S @@ -2130,6 +2130,7 @@ __bn_mul4x_mont: AARCH64_VALIDATE_LINK_REGISTER ret .size __bn_mul4x_mont,.-__bn_mul4x_mont +.section .rodata .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 2 .align 4 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S index 6fe86a4020..8c42109669 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/ec/ecp_nistz256-armv8.S @@ -1,6 +1,6 @@ #include "arm_arch.h" -.text +.section .rodata .globl ecp_nistz256_precomputed .type ecp_nistz256_precomputed,%object .align 12 @@ -2390,6 +2390,8 @@ ecp_nistz256_precomputed: .byte 69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 2 +.text + // void ecp_nistz256_to_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); .globl ecp_nistz256_to_mont .type ecp_nistz256_to_mont,%function @@ -2400,12 +2402,16 @@ ecp_nistz256_to_mont: add x29,sp,#0 stp x19,x20,[sp,#16] - ldr x3,.LRR // bp[0] + adrp x3,.LRR + ldr x3,[x3,#:lo12:.LRR] // bp[0] ldp x4,x5,[x1] ldp x6,x7,[x1,#16] - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 - adr x2,.LRR // &bp[0] + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + adrp x2,.LRR + add x2,x2,#:lo12:.LRR bl __ecp_nistz256_mul_mont @@ -2428,9 +2434,12 @@ ecp_nistz256_from_mont: mov x3,#1 // bp[0] ldp x4,x5,[x1] ldp x6,x7,[x1,#16] - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 - adr x2,.Lone // &bp[0] + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + adrp x2,.Lone + add x2,x2,#:lo12:.Lone bl __ecp_nistz256_mul_mont @@ -2454,8 +2463,10 @@ ecp_nistz256_mul_mont: ldr x3,[x2] // bp[0] ldp x4,x5,[x1] ldp x6,x7,[x1,#16] - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] bl __ecp_nistz256_mul_mont @@ -2477,8 +2488,10 @@ ecp_nistz256_sqr_mont: ldp x4,x5,[x1] ldp x6,x7,[x1,#16] - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] bl __ecp_nistz256_sqr_mont @@ -2502,8 +2515,10 @@ ecp_nistz256_add: ldp x8,x9,[x2] ldp x16,x17,[x1,#16] ldp x10,x11,[x2,#16] - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] bl __ecp_nistz256_add @@ -2523,8 +2538,10 @@ ecp_nistz256_div_by_2: ldp x14,x15,[x1] ldp x16,x17,[x1,#16] - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] bl __ecp_nistz256_div_by_2 @@ -2544,8 +2561,10 @@ ecp_nistz256_mul_by_2: ldp x14,x15,[x1] ldp x16,x17,[x1,#16] - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] mov x8,x14 mov x9,x15 mov x10,x16 @@ -2569,8 +2588,10 @@ ecp_nistz256_mul_by_3: ldp x14,x15,[x1] ldp x16,x17,[x1,#16] - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] mov x8,x14 mov x9,x15 mov x10,x16 @@ -2606,8 +2627,10 @@ ecp_nistz256_sub: ldp x14,x15,[x1] ldp x16,x17,[x1,#16] - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] bl __ecp_nistz256_sub_from @@ -2630,8 +2653,10 @@ ecp_nistz256_neg: mov x15,xzr mov x16,xzr mov x17,xzr - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] bl __ecp_nistz256_sub_from @@ -3026,9 +3051,11 @@ ecp_nistz256_point_double: mov x21,x0 ldp x16,x17,[x1,#48] mov x22,x1 - ldr x12,.Lpoly+8 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] mov x8,x14 - ldr x13,.Lpoly+24 + ldr x13,[x13,#24] mov x9,x15 ldp x4,x5,[x22,#64] // forward load for p256_sqr_mont mov x10,x16 @@ -3171,8 +3198,10 @@ ecp_nistz256_point_add: mov x21,x0 mov x22,x1 mov x23,x2 - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] orr x8,x4,x5 orr x10,x6,x7 orr x25,x8,x10 @@ -3422,8 +3451,10 @@ ecp_nistz256_point_add_affine: mov x21,x0 mov x22,x1 mov x23,x2 - ldr x12,.Lpoly+8 - ldr x13,.Lpoly+24 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] ldp x4,x5,[x1,#64] // in1_z ldp x6,x7,[x1,#64+16] @@ -3569,7 +3600,8 @@ ecp_nistz256_point_add_affine: ldp x10,x11,[x23,#0+48] stp x14,x15,[x21,#0] stp x16,x17,[x21,#0+16] - adr x23,.Lone_mont-64 + adrp x23,.Lone_mont-64 + add x23,x23,#:lo12:.Lone_mont-64 ldp x14,x15,[x22,#32] // in1 cmp x24,#0 // ~, remember? ldp x16,x17,[x22,#32+16] @@ -3627,7 +3659,8 @@ ecp_nistz256_ord_mul_mont: stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] - adr x23,.Lord + adrp x23,.Lord + add x23,x23,#:lo12:.Lord ldr x3,[x2] // bp[0] ldp x4,x5,[x1] ldp x6,x7,[x1,#16] @@ -3837,7 +3870,8 @@ ecp_nistz256_ord_sqr_mont: stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] - adr x23,.Lord + adrp x23,.Lord + add x23,x23,#:lo12:.Lord ldp x4,x5,[x1] ldp x6,x7,[x1,#16] diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S index 7045e31f18..76e4de346a 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/md5/md5-aarch64.S @@ -207,165 +207,165 @@ ossl_md5_blocks_loop: add w9, w9, w13 // Add constant 0x49b40821 add w9, w9, w6 // Add aux function result ror w9, w9, #10 // Rotate left s=22 bits - bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x8, x17 // Aux function round 2 (~z & y) add w9, w8, w9 // Add X parameter round 1 B=FF(B, C, D, A, 0x49b40821, s=22, M[15]) - and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0x2562 // .Load lower half of constant 0xf61e2562 movk x13, #0xf61e, lsl #16 // .Load upper half of constant 0xf61e2562 add w4, w4, w20 // Add dest value add w4, w4, w13 // Add constant 0xf61e2562 - add w4, w4, w6 // Add aux function result + and x13, x9, x17 // Aux function round 2 (x & z) + add w4, w4, w6 // Add (~z & y) + add w4, w4, w13 // Add (x & z) ror w4, w4, #27 // Rotate left s=5 bits - bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x9, x8 // Aux function round 2 (~z & y) add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xf61e2562, s=5, M[1]) - and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0xb340 // .Load lower half of constant 0xc040b340 movk x13, #0xc040, lsl #16 // .Load upper half of constant 0xc040b340 add w17, w17, w7 // Add dest value add w17, w17, w13 // Add constant 0xc040b340 - add w17, w17, w6 // Add aux function result + and x13, x4, x8 // Aux function round 2 (x & z) + add w17, w17, w6 // Add (~z & y) + add w17, w17, w13 // Add (x & z) ror w17, w17, #23 // Rotate left s=9 bits - bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x4, x9 // Aux function round 2 (~z & y) add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xc040b340, s=9, M[6]) - and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0x5a51 // .Load lower half of constant 0x265e5a51 movk x13, #0x265e, lsl #16 // .Load upper half of constant 0x265e5a51 add w8, w8, w25 // Add dest value add w8, w8, w13 // Add constant 0x265e5a51 - add w8, w8, w6 // Add aux function result + and x13, x17, x9 // Aux function round 2 (x & z) + add w8, w8, w6 // Add (~z & y) + add w8, w8, w13 // Add (x & z) ror w8, w8, #18 // Rotate left s=14 bits - bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x17, x4 // Aux function round 2 (~z & y) add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0x265e5a51, s=14, M[11]) - and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0xc7aa // .Load lower half of constant 0xe9b6c7aa movk x13, #0xe9b6, lsl #16 // .Load upper half of constant 0xe9b6c7aa add w9, w9, w15 // Add dest value add w9, w9, w13 // Add constant 0xe9b6c7aa - add w9, w9, w6 // Add aux function result + and x13, x8, x4 // Aux function round 2 (x & z) + add w9, w9, w6 // Add (~z & y) + add w9, w9, w13 // Add (x & z) ror w9, w9, #12 // Rotate left s=20 bits - bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x8, x17 // Aux function round 2 (~z & y) add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0xe9b6c7aa, s=20, M[0]) - and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0x105d // .Load lower half of constant 0xd62f105d movk x13, #0xd62f, lsl #16 // .Load upper half of constant 0xd62f105d add w4, w4, w22 // Add dest value add w4, w4, w13 // Add constant 0xd62f105d - add w4, w4, w6 // Add aux function result + and x13, x9, x17 // Aux function round 2 (x & z) + add w4, w4, w6 // Add (~z & y) + add w4, w4, w13 // Add (x & z) ror w4, w4, #27 // Rotate left s=5 bits - bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x9, x8 // Aux function round 2 (~z & y) add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xd62f105d, s=5, M[5]) - and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0x1453 // .Load lower half of constant 0x2441453 movk x13, #0x244, lsl #16 // .Load upper half of constant 0x2441453 add w17, w17, w16 // Add dest value add w17, w17, w13 // Add constant 0x2441453 - add w17, w17, w6 // Add aux function result + and x13, x4, x8 // Aux function round 2 (x & z) + add w17, w17, w6 // Add (~z & y) + add w17, w17, w13 // Add (x & z) ror w17, w17, #23 // Rotate left s=9 bits - bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x4, x9 // Aux function round 2 (~z & y) add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0x2441453, s=9, M[10]) - and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0xe681 // .Load lower half of constant 0xd8a1e681 movk x13, #0xd8a1, lsl #16 // .Load upper half of constant 0xd8a1e681 add w8, w8, w27 // Add dest value add w8, w8, w13 // Add constant 0xd8a1e681 - add w8, w8, w6 // Add aux function result + and x13, x17, x9 // Aux function round 2 (x & z) + add w8, w8, w6 // Add (~z & y) + add w8, w8, w13 // Add (x & z) ror w8, w8, #18 // Rotate left s=14 bits - bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x17, x4 // Aux function round 2 (~z & y) add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0xd8a1e681, s=14, M[15]) - and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0xfbc8 // .Load lower half of constant 0xe7d3fbc8 movk x13, #0xe7d3, lsl #16 // .Load upper half of constant 0xe7d3fbc8 add w9, w9, w14 // Add dest value add w9, w9, w13 // Add constant 0xe7d3fbc8 - add w9, w9, w6 // Add aux function result + and x13, x8, x4 // Aux function round 2 (x & z) + add w9, w9, w6 // Add (~z & y) + add w9, w9, w13 // Add (x & z) ror w9, w9, #12 // Rotate left s=20 bits - bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x8, x17 // Aux function round 2 (~z & y) add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0xe7d3fbc8, s=20, M[4]) - and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0xcde6 // .Load lower half of constant 0x21e1cde6 movk x13, #0x21e1, lsl #16 // .Load upper half of constant 0x21e1cde6 add w4, w4, w24 // Add dest value add w4, w4, w13 // Add constant 0x21e1cde6 - add w4, w4, w6 // Add aux function result + and x13, x9, x17 // Aux function round 2 (x & z) + add w4, w4, w6 // Add (~z & y) + add w4, w4, w13 // Add (x & z) ror w4, w4, #27 // Rotate left s=5 bits - bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x9, x8 // Aux function round 2 (~z & y) add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0x21e1cde6, s=5, M[9]) - and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0x7d6 // .Load lower half of constant 0xc33707d6 movk x13, #0xc337, lsl #16 // .Load upper half of constant 0xc33707d6 add w17, w17, w12 // Add dest value add w17, w17, w13 // Add constant 0xc33707d6 - add w17, w17, w6 // Add aux function result + and x13, x4, x8 // Aux function round 2 (x & z) + add w17, w17, w6 // Add (~z & y) + add w17, w17, w13 // Add (x & z) ror w17, w17, #23 // Rotate left s=9 bits - bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x4, x9 // Aux function round 2 (~z & y) add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xc33707d6, s=9, M[14]) - and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0xd87 // .Load lower half of constant 0xf4d50d87 movk x13, #0xf4d5, lsl #16 // .Load upper half of constant 0xf4d50d87 add w8, w8, w21 // Add dest value add w8, w8, w13 // Add constant 0xf4d50d87 - add w8, w8, w6 // Add aux function result + and x13, x17, x9 // Aux function round 2 (x & z) + add w8, w8, w6 // Add (~z & y) + add w8, w8, w13 // Add (x & z) ror w8, w8, #18 // Rotate left s=14 bits - bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x17, x4 // Aux function round 2 (~z & y) add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0xf4d50d87, s=14, M[3]) - and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0x14ed // .Load lower half of constant 0x455a14ed movk x13, #0x455a, lsl #16 // .Load upper half of constant 0x455a14ed add w9, w9, w5 // Add dest value add w9, w9, w13 // Add constant 0x455a14ed - add w9, w9, w6 // Add aux function result + and x13, x8, x4 // Aux function round 2 (x & z) + add w9, w9, w6 // Add (~z & y) + add w9, w9, w13 // Add (x & z) ror w9, w9, #12 // Rotate left s=20 bits - bic x6, x8, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x8, x17 // Aux function round 2 (~z & y) add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0x455a14ed, s=20, M[8]) - and x13, x9, x17 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0xe905 // .Load lower half of constant 0xa9e3e905 movk x13, #0xa9e3, lsl #16 // .Load upper half of constant 0xa9e3e905 add w4, w4, w26 // Add dest value add w4, w4, w13 // Add constant 0xa9e3e905 - add w4, w4, w6 // Add aux function result + and x13, x9, x17 // Aux function round 2 (x & z) + add w4, w4, w6 // Add (~z & y) + add w4, w4, w13 // Add (x & z) ror w4, w4, #27 // Rotate left s=5 bits - bic x6, x9, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x9, x8 // Aux function round 2 (~z & y) add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xa9e3e905, s=5, M[13]) - and x13, x4, x8 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0xa3f8 // .Load lower half of constant 0xfcefa3f8 movk x13, #0xfcef, lsl #16 // .Load upper half of constant 0xfcefa3f8 add w17, w17, w3 // Add dest value add w17, w17, w13 // Add constant 0xfcefa3f8 - add w17, w17, w6 // Add aux function result + and x13, x4, x8 // Aux function round 2 (x & z) + add w17, w17, w6 // Add (~z & y) + add w17, w17, w13 // Add (x & z) ror w17, w17, #23 // Rotate left s=9 bits - bic x6, x4, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x4, x9 // Aux function round 2 (~z & y) add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xfcefa3f8, s=9, M[2]) - and x13, x17, x9 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0x2d9 // .Load lower half of constant 0x676f02d9 movk x13, #0x676f, lsl #16 // .Load upper half of constant 0x676f02d9 add w8, w8, w23 // Add dest value add w8, w8, w13 // Add constant 0x676f02d9 - add w8, w8, w6 // Add aux function result + and x13, x17, x9 // Aux function round 2 (x & z) + add w8, w8, w6 // Add (~z & y) + add w8, w8, w13 // Add (x & z) ror w8, w8, #18 // Rotate left s=14 bits - bic x6, x17, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) + bic x6, x17, x4 // Aux function round 2 (~z & y) add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0x676f02d9, s=14, M[7]) - and x13, x8, x4 // Aux function round 2 G(x,y,z)=((x&z)|(~z&y)) - orr x6, x6, x13 // End aux function round 2 G(x,y,z)=((x&z)|(~z&y)) movz x13, #0x4c8a // .Load lower half of constant 0x8d2a4c8a movk x13, #0x8d2a, lsl #16 // .Load upper half of constant 0x8d2a4c8a add w9, w9, w11 // Add dest value add w9, w9, w13 // Add constant 0x8d2a4c8a - add w9, w9, w6 // Add aux function result + and x13, x8, x4 // Aux function round 2 (x & z) + add w9, w9, w6 // Add (~z & y) + add w9, w9, w13 // Add (x & z) eor x6, x8, x17 // Begin aux function round 3 H(x,y,z)=(x^y^z) ror w9, w9, #12 // Rotate left s=20 bits movz x10, #0x3942 // .Load lower half of constant 0xfffa3942 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S index 75e29532bc..117c52ebc0 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/aes-gcm-armv8_64.S @@ -6389,6 +6389,7 @@ aes_gcm_dec_256_kernel: mov w0, #0x0 ret .size aes_gcm_dec_256_kernel,.-aes_gcm_dec_256_kernel +.section .rodata .byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 2 .align 2 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S index 9553790361..0b50e2af31 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/modes/ghashv8-armx.S @@ -637,6 +637,7 @@ gcm_ghash_v8_4x: ret .size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x +.section .rodata .byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 2 .align 2 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S index a6f0f603a0..a4f0f0f7d0 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/keccak1600-armv8.S @@ -1,6 +1,6 @@ #include "arm_arch.h" -.text +.section .rodata .align 8 // strategic alignment and padding that allows to use // address value as loop termination condition... @@ -32,11 +32,14 @@ iotas: .quad 0x0000000080000001 .quad 0x8000000080008008 .size iotas,.-iotas +.text + .type KeccakF1600_int,%function .align 5 KeccakF1600_int: AARCH64_SIGN_LINK_REGISTER - adr x28,iotas + adrp x28,iotas + add x28,x28,#:lo12:iotas stp x28,x30,[sp,#16] // 32 bytes on top are mine b .Loop .align 4 @@ -578,7 +581,8 @@ SHA3_squeeze: .align 5 KeccakF1600_ce: mov x9,#24 - adr x10,iotas + adrp x10,iotas + add x10,x10,#:lo12:iotas b .Loop_ce .align 4 .Loop_ce: diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S index 42fc0a74c1..507262b368 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha1-armv8.S @@ -1080,7 +1080,8 @@ sha1_block_armv8: stp x29,x30,[sp,#-16]! add x29,sp,#0 - adr x4,.Lconst + adrp x4,.Lconst + add x4,x4,#:lo12:.Lconst eor v1.16b,v1.16b,v1.16b ld1 {v0.4s},[x0],#16 ld1 {v1.s}[0],[x0] @@ -1203,6 +1204,9 @@ sha1_block_armv8: ldr x29,[sp],#16 ret .size sha1_block_armv8,.-sha1_block_armv8 + +.section .rodata + .align 6 .Lconst: .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S index 9d397fae8f..b0644b849d 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha256-armv8.S @@ -1,4 +1,4 @@ -// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. +// Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the Apache License 2.0 (the "License"). You may not use // this file except in compliance with the License. You can obtain a copy @@ -92,7 +92,8 @@ sha256_block_data_order: ldp w24,w25,[x0,#4*4] add x2,x1,x2,lsl#6 // end of input ldp w26,w27,[x0,#6*4] - adr x30,.LK256 + adrp x30,.LK256 + add x30,x30,#:lo12:.LK256 stp x0,x2,[x29,#96] .Loop: @@ -1040,6 +1041,8 @@ sha256_block_data_order: ret .size sha256_block_data_order,.-sha256_block_data_order +.section .rodata + .align 6 .type .LK256,%object .LK256: @@ -1064,6 +1067,8 @@ sha256_block_data_order: .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 2 .align 2 + +.text #ifndef __KERNEL__ .type sha256_block_armv8,%function .align 6 @@ -1074,7 +1079,8 @@ sha256_block_armv8: add x29,sp,#0 ld1 {v0.4s,v1.4s},[x0] - adr x3,.LK256 + adrp x3,.LK256 + add x3,x3,#:lo12:.LK256 .Loop_hw: ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 @@ -1218,7 +1224,8 @@ sha256_block_neon: mov x29, sp sub sp,sp,#16*4 - adr x16,.LK256 + adrp x16,.LK256 + add x16,x16,#:lo12:.LK256 add x2,x1,x2,lsl#6 // len to point at the end of inp ld1 {v0.16b},[x1], #16 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S index dbc688df71..0a45eb06ed 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sha/sha512-armv8.S @@ -1,4 +1,4 @@ -// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. +// Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the Apache License 2.0 (the "License"). You may not use // this file except in compliance with the License. You can obtain a copy @@ -90,7 +90,8 @@ sha512_block_data_order: ldp x24,x25,[x0,#4*8] add x2,x1,x2,lsl#7 // end of input ldp x26,x27,[x0,#6*8] - adr x30,.LK512 + adrp x30,.LK512 + add x30,x30,#:lo12:.LK512 stp x0,x2,[x29,#96] .Loop: @@ -1038,6 +1039,8 @@ sha512_block_data_order: ret .size sha512_block_data_order,.-sha512_block_data_order +.section .rodata + .align 6 .type .LK512,%object .LK512: @@ -1086,6 +1089,8 @@ sha512_block_data_order: .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 2 .align 2 + +.text #ifndef __KERNEL__ .type sha512_block_armv8,%function .align 6 @@ -1099,7 +1104,8 @@ sha512_block_armv8: ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context - adr x3,.LK512 + adrp x3,.LK512 + add x3,x3,#:lo12:.LK512 rev64 v16.16b,v16.16b rev64 v17.16b,v17.16b diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S index f1678ff143..39ffc201d7 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/AARCH64-GCC/crypto/sm3/sm3-armv8.S @@ -1,4 +1,4 @@ -// Copyright 2021-2023 The OpenSSL Project Authors. All Rights Reserved. +// Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the Apache License 2.0 (the "License"). You may not use // this file except in compliance with the License. You can obtain a copy @@ -22,19 +22,19 @@ ossl_hwsm3_block_data_order: rev64 v6.4s, v6.4s ext v5.16b, v5.16b, v5.16b, #8 ext v6.16b, v6.16b, v6.16b, #8 - - adr x8, .Tj + adrp x8, .Tj + add x8, x8, #:lo12:.Tj ldp s16, s17, [x8] .Loop: // load input - ld1 {v0.16b,v1.16b,v2.16b,v3.16b}, [x1], #64 + ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x1], #64 sub w2, w2, #1 mov v18.16b, v5.16b mov v19.16b, v6.16b -#ifndef __ARMEB__ +#ifndef __AARCH64EB__ rev32 v0.16b, v0.16b rev32 v1.16b, v1.16b rev32 v2.16b, v2.16b @@ -497,7 +497,12 @@ ossl_hwsm3_block_data_order: st1 {v5.4s,v6.4s}, [x0] ret .size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order +.section .rodata +.type _sm3_consts,%object .align 3 +_sm3_consts: .Tj: .word 0x79cc4519, 0x9d8a7a87 +.size _sm3_consts,.-_sm3_consts +.previous diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S index 9792a50a8c..c55c427c35 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aes-586.S @@ -3300,7 +3300,7 @@ AES_set_decrypt_key: .byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.comm OPENSSL_ia32cap_P,16,4 +.comm OPENSSL_ia32cap_P,40,4 .section ".note.gnu.property", "a" .p2align 2 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S index 28e4f1a50a..2f9563bdca 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/aes/aesni-x86.S @@ -3344,7 +3344,7 @@ aesni_set_decrypt_key: .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 -.comm OPENSSL_ia32cap_P,16,4 +.comm OPENSSL_ia32cap_P,40,4 .section ".note.gnu.property", "a" .p2align 2 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S index b26aa4b799..73b87147f0 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/bn-586.S @@ -1552,7 +1552,7 @@ bn_sub_part_words: popl %ebp ret .size bn_sub_part_words,.-.L_bn_sub_part_words_begin -.comm OPENSSL_ia32cap_P,16,4 +.comm OPENSSL_ia32cap_P,40,4 .section ".note.gnu.property", "a" .p2align 2 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S index 9ec7e16258..74f0986953 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-gf2m.S @@ -354,7 +354,7 @@ bn_GF2m_mul_2x2: .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 -.comm OPENSSL_ia32cap_P,16,4 +.comm OPENSSL_ia32cap_P,40,4 .section ".note.gnu.property", "a" .p2align 2 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S index 73fad9f75d..eedb0f4b97 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S @@ -474,7 +474,7 @@ bn_mul_mont: .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 .byte 111,114,103,62,0 -.comm OPENSSL_ia32cap_P,16,4 +.comm OPENSSL_ia32cap_P,40,4 .section ".note.gnu.property", "a" .p2align 2 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S index ccd96e4167..63f333ed91 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/ec/ecp_nistz256-x86.S @@ -5270,7 +5270,7 @@ ecp_nistz256_point_add_affine: popl %ebp ret .size ecp_nistz256_point_add_affine,.-.L_ecp_nistz256_point_add_affine_begin -.comm OPENSSL_ia32cap_P,16,4 +.comm OPENSSL_ia32cap_P,40,4 .section ".note.gnu.property", "a" .p2align 2 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S index 9cfe5a4660..a3f81ed903 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha1-586.S @@ -3986,7 +3986,7 @@ _sha1_block_data_order_avx: .byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 .byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 .byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.comm OPENSSL_ia32cap_P,16,4 +.comm OPENSSL_ia32cap_P,40,4 .section ".note.gnu.property", "a" .p2align 2 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S index 9253ab18d0..8497be1bb2 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha256-586.S @@ -6784,7 +6784,7 @@ sha256_block_data_order: popl %ebp ret .size sha256_block_data_order,.-.L_sha256_block_data_order_begin -.comm OPENSSL_ia32cap_P,16,4 +.comm OPENSSL_ia32cap_P,40,4 .section ".note.gnu.property", "a" .p2align 2 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S index 5e849365a6..74624d5a50 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/sha/sha512-586.S @@ -2830,7 +2830,7 @@ sha512_block_data_order: .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 -.comm OPENSSL_ia32cap_P,16,4 +.comm OPENSSL_ia32cap_P,40,4 .section ".note.gnu.property", "a" .p2align 2 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S index 3a3533b0f1..76d15e57db 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/x86cpuid.S @@ -122,6 +122,24 @@ OPENSSL_ia32_cpuid: xorl %ecx,%ecx .byte 0x0f,0xa2 movl %ebx,8(%edi) + movl %ecx,12(%edi) + movl %edx,16(%edi) + cmpl $1,%eax + jb .L005no_extended_info + movl $7,%eax + movl $1,%ecx + .byte 0x0f,0xa2 + movl %eax,20(%edi) + movl %edx,24(%edi) + movl %ebx,28(%edi) + movl %ecx,32(%edi) + andl $524288,%edx + cmpl $0,%edx + je .L005no_extended_info + movl $36,%eax + movl $0,%ecx + .byte 0x0f,0xa2 + movl %ebx,36(%edi) .L005no_extended_info: btl $27,%ebp jnc .L006clear_avx @@ -137,6 +155,7 @@ OPENSSL_ia32_cpuid: andl $4278190079,%esi .L006clear_avx: andl $4026525695,%ebp + andl $4286578687,20(%edi) andl $4294967263,8(%edi) .L007done: movl %esi,%eax @@ -577,7 +596,7 @@ OPENSSL_ia32_rdseed_bytes: .size OPENSSL_ia32_rdseed_bytes,.-.L_OPENSSL_ia32_rdseed_bytes_begin .hidden OPENSSL_cpuid_setup .hidden OPENSSL_ia32cap_P -.comm OPENSSL_ia32cap_P,16,4 +.comm OPENSSL_ia32cap_P,40,4 .section .init call OPENSSL_cpuid_setup diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm index 6c21227279..71c812ab39 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aes-586.nasm @@ -3208,4 +3208,4 @@ db 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 db 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 db 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 segment .bss -common _OPENSSL_ia32cap_P 16 +common _OPENSSL_ia32cap_P 40 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm index ca5544aebe..37198a2e5f 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/aes/aesni-x86.nasm @@ -3199,4 +3199,4 @@ db 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 db 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 db 115,108,46,111,114,103,62,0 segment .bss -common _OPENSSL_ia32cap_P 16 +common _OPENSSL_ia32cap_P 40 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm index e46d8457cf..d79fd8012a 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/bn-586.nasm @@ -1512,4 +1512,4 @@ L$029pw_end: pop ebp ret segment .bss -common _OPENSSL_ia32cap_P 16 +common _OPENSSL_ia32cap_P 40 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm index 7750777de2..52be719fc0 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm @@ -342,4 +342,4 @@ db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 db 62,0 segment .bss -common _OPENSSL_ia32cap_P 16 +common _OPENSSL_ia32cap_P 40 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm index 6547b0d7fa..545085381b 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-mont.nasm @@ -476,4 +476,4 @@ db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 db 111,114,103,62,0 segment .bss -common _OPENSSL_ia32cap_P 16 +common _OPENSSL_ia32cap_P 40 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm index 1e487970e3..cd5381520d 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/ec/ecp_nistz256-x86.nasm @@ -5125,4 +5125,4 @@ L$013pic: pop ebp ret segment .bss -common _OPENSSL_ia32cap_P 16 +common _OPENSSL_ia32cap_P 40 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm index 0d644acce0..41cc58af83 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha1-586.nasm @@ -3967,4 +3967,4 @@ db 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 db 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 db 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 segment .bss -common _OPENSSL_ia32cap_P 16 +common _OPENSSL_ia32cap_P 40 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm index 7d8398c7d3..ec4ac3e7d4 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha256-586.nasm @@ -6786,4 +6786,4 @@ L$018avx_bmi_00_47: pop ebp ret segment .bss -common _OPENSSL_ia32cap_P 16 +common _OPENSSL_ia32cap_P 40 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm index 9410d5c35d..d04b03ec2a 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/sha/sha512-586.nasm @@ -2832,4 +2832,4 @@ db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 db 62,0 segment .bss -common _OPENSSL_ia32cap_P 16 +common _OPENSSL_ia32cap_P 40 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm index 5f3599ce1b..a0ca9aeadc 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/x86cpuid.nasm @@ -123,6 +123,24 @@ L$002generic: xor ecx,ecx cpuid mov DWORD [8+edi],ebx + mov DWORD [12+edi],ecx + mov DWORD [16+edi],edx + cmp eax,1 + jb NEAR L$005no_extended_info + mov eax,7 + mov ecx,1 + cpuid + mov DWORD [20+edi],eax + mov DWORD [24+edi],edx + mov DWORD [28+edi],ebx + mov DWORD [32+edi],ecx + and edx,524288 + cmp edx,0 + je NEAR L$005no_extended_info + mov eax,36 + mov ecx,0 + cpuid + mov DWORD [36+edi],ebx L$005no_extended_info: bt ebp,27 jnc NEAR L$006clear_avx @@ -138,6 +156,7 @@ L$008clear_xmm: and esi,4278190079 L$006clear_avx: and ebp,4026525695 + and DWORD [20+edi],4286578687 and DWORD [8+edi],4294967263 L$007done: mov eax,esi @@ -500,7 +519,7 @@ L$031done: pop edi ret segment .bss -common _OPENSSL_ia32cap_P 16 +common _OPENSSL_ia32cap_P 40 segment .CRT$XCU data align=4 extern _OPENSSL_cpuid_setup dd _OPENSSL_cpuid_setup diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s new file mode 100644 index 0000000000..a07316de2d --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/aes/aesni-xts-avx512.s @@ -0,0 +1,8145 @@ +.text + +.globl aesni_xts_avx512_eligible +.type aesni_xts_avx512_eligible,@function +.align 32 +aesni_xts_avx512_eligible: + movl OPENSSL_ia32cap_P+8(%rip),%ecx + xorl %eax,%eax + + andl $0xc0030000,%ecx + cmpl $0xc0030000,%ecx + jne .L_done + movl OPENSSL_ia32cap_P+12(%rip),%ecx + + andl $0x640,%ecx + cmpl $0x640,%ecx + cmovel %ecx,%eax +.L_done: + .byte 0xf3,0xc3 +.size aesni_xts_avx512_eligible, .-aesni_xts_avx512_eligible +.globl aesni_xts_128_encrypt_avx512 +.hidden aesni_xts_128_encrypt_avx512 +.type aesni_xts_128_encrypt_avx512,@function +.align 32 +aesni_xts_128_encrypt_avx512: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbp + movq %rsp,%rbp + subq $136,%rsp + andq $0xffffffffffffffc0,%rsp + movq %rbx,128(%rsp) + movq $0x87,%r10 + vmovdqu (%r9),%xmm1 + vpxor (%r8),%xmm1,%xmm1 + vaesenc 16(%r8),%xmm1,%xmm1 + vaesenc 32(%r8),%xmm1,%xmm1 + vaesenc 48(%r8),%xmm1,%xmm1 + vaesenc 64(%r8),%xmm1,%xmm1 + vaesenc 80(%r8),%xmm1,%xmm1 + vaesenc 96(%r8),%xmm1,%xmm1 + vaesenc 112(%r8),%xmm1,%xmm1 + vaesenc 128(%r8),%xmm1,%xmm1 + vaesenc 144(%r8),%xmm1,%xmm1 + vaesenclast 160(%r8),%xmm1,%xmm1 + vmovdqa %xmm1,(%rsp) + + cmpq $0x80,%rdx + jl .L_less_than_128_bytes_hEgxyDlCngwrfFe + vpbroadcastq %r10,%zmm25 + cmpq $0x100,%rdx + jge .L_start_by16_hEgxyDlCngwrfFe + cmpq $0x80,%rdx + jge .L_start_by8_hEgxyDlCngwrfFe + +.L_do_n_blocks_hEgxyDlCngwrfFe: + cmpq $0x0,%rdx + je .L_ret_hEgxyDlCngwrfFe + cmpq $0x70,%rdx + jge .L_remaining_num_blocks_is_7_hEgxyDlCngwrfFe + cmpq $0x60,%rdx + jge .L_remaining_num_blocks_is_6_hEgxyDlCngwrfFe + cmpq $0x50,%rdx + jge .L_remaining_num_blocks_is_5_hEgxyDlCngwrfFe + cmpq $0x40,%rdx + jge .L_remaining_num_blocks_is_4_hEgxyDlCngwrfFe + cmpq $0x30,%rdx + jge .L_remaining_num_blocks_is_3_hEgxyDlCngwrfFe + cmpq $0x20,%rdx + jge .L_remaining_num_blocks_is_2_hEgxyDlCngwrfFe + cmpq $0x10,%rdx + jge .L_remaining_num_blocks_is_1_hEgxyDlCngwrfFe + vmovdqa %xmm0,%xmm8 + vmovdqa %xmm9,%xmm0 + jmp .L_steal_cipher_hEgxyDlCngwrfFe + +.L_remaining_num_blocks_is_7_hEgxyDlCngwrfFe: + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + addq $0x70,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vextracti32x4 $0x3,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe + +.L_remaining_num_blocks_is_6_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $0x60,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $0x60,%rsi + vextracti32x4 $0x1,%zmm2,%xmm8 + vextracti32x4 $0x2,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe + +.L_remaining_num_blocks_is_5_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu 64(%rdi),%xmm2 + addq $0x50,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu %xmm2,64(%rsi) + addq $0x50,%rsi + vmovdqa %xmm2,%xmm8 + vextracti32x4 $0x1,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe + +.L_remaining_num_blocks_is_4_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%zmm1 + addq $0x40,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,(%rsi) + addq $0x40,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa64 %xmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_remaining_num_blocks_is_3_hEgxyDlCngwrfFe: + movq $-1,%r8 + shrq $0x10,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1{%k1} + addq $0x30,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,(%rsi){%k1} + addq $0x30,%rsi + vextracti32x4 $0x2,%zmm1,%xmm8 + vextracti32x4 $0x3,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_remaining_num_blocks_is_2_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%ymm1 + addq $0x20,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu %ymm1,(%rsi) + addq $0x20,%rsi + vextracti32x4 $0x1,%zmm1,%xmm8 + vextracti32x4 $0x2,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_remaining_num_blocks_is_1_hEgxyDlCngwrfFe: + vmovdqu (%rdi),%xmm1 + addq $0x10,%rdi + vpxor %xmm9,%xmm1,%xmm1 + vpxor (%rcx),%xmm1,%xmm1 + vaesenc 16(%rcx),%xmm1,%xmm1 + vaesenc 32(%rcx),%xmm1,%xmm1 + vaesenc 48(%rcx),%xmm1,%xmm1 + vaesenc 64(%rcx),%xmm1,%xmm1 + vaesenc 80(%rcx),%xmm1,%xmm1 + vaesenc 96(%rcx),%xmm1,%xmm1 + vaesenc 112(%rcx),%xmm1,%xmm1 + vaesenc 128(%rcx),%xmm1,%xmm1 + vaesenc 144(%rcx),%xmm1,%xmm1 + vaesenclast 160(%rcx),%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + vextracti32x4 $0x1,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe + + +.L_start_by16_hEgxyDlCngwrfFe: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm11 + vpxord %zmm14,%zmm11,%zmm11 + vpsrldq $0xf,%zmm10,%zmm15 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 + vpslldq $0x1,%zmm10,%zmm12 + vpxord %zmm16,%zmm12,%zmm12 + +.L_main_loop_run_16_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 128(%rdi),%zmm3 + vmovdqu8 192(%rdi),%zmm4 + addq $0x100,%rdi + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpxorq %zmm0,%zmm3,%zmm3 + vpxorq %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm11,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm11,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm12,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm12,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm15,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm15,%zmm17 + vpxord %zmm14,%zmm17,%zmm17 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm16,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm16,%zmm18 + vpxord %zmm14,%zmm18,%zmm18 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vaesenclast %zmm0,%zmm3,%zmm3 + vaesenclast %zmm0,%zmm4,%zmm4 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqa32 %zmm17,%zmm11 + vmovdqa32 %zmm18,%zmm12 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + vmovdqu8 %zmm3,128(%rsi) + vmovdqu8 %zmm4,192(%rsi) + addq $0x100,%rsi + subq $0x100,%rdx + cmpq $0x100,%rdx + jae .L_main_loop_run_16_hEgxyDlCngwrfFe + cmpq $0x80,%rdx + jae .L_main_loop_run_8_hEgxyDlCngwrfFe + vextracti32x4 $0x3,%zmm4,%xmm0 + jmp .L_do_n_blocks_hEgxyDlCngwrfFe + +.L_start_by8_hEgxyDlCngwrfFe: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + +.L_main_loop_run_8_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + addq $0x80,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm10,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm10,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + addq $0x80,%rsi + subq $0x80,%rdx + cmpq $0x80,%rdx + jae .L_main_loop_run_8_hEgxyDlCngwrfFe + vextracti32x4 $0x3,%zmm2,%xmm0 + jmp .L_do_n_blocks_hEgxyDlCngwrfFe + +.L_steal_cipher_hEgxyDlCngwrfFe: + vmovdqa %xmm8,%xmm2 + leaq vpshufb_shf_table(%rip),%rax + vmovdqu (%rax,%rdx,1),%xmm10 + vpshufb %xmm10,%xmm8,%xmm8 + vmovdqu -16(%rdi,%rdx,1),%xmm3 + vmovdqu %xmm8,-16(%rsi,%rdx,1) + leaq vpshufb_shf_table(%rip),%rax + addq $16,%rax + subq %rdx,%rax + vmovdqu (%rax),%xmm10 + vpxor mask1(%rip),%xmm10,%xmm10 + vpshufb %xmm10,%xmm3,%xmm3 + vpblendvb %xmm10,%xmm2,%xmm3,%xmm3 + vpxor %xmm0,%xmm3,%xmm8 + vpxor (%rcx),%xmm8,%xmm8 + vaesenc 16(%rcx),%xmm8,%xmm8 + vaesenc 32(%rcx),%xmm8,%xmm8 + vaesenc 48(%rcx),%xmm8,%xmm8 + vaesenc 64(%rcx),%xmm8,%xmm8 + vaesenc 80(%rcx),%xmm8,%xmm8 + vaesenc 96(%rcx),%xmm8,%xmm8 + vaesenc 112(%rcx),%xmm8,%xmm8 + vaesenc 128(%rcx),%xmm8,%xmm8 + vaesenc 144(%rcx),%xmm8,%xmm8 + vaesenclast 160(%rcx),%xmm8,%xmm8 + vpxor %xmm0,%xmm8,%xmm8 + vmovdqu %xmm8,-16(%rsi) +.L_ret_hEgxyDlCngwrfFe: + movq 128(%rsp),%rbx + xorq %r8,%r8 + movq %r8,128(%rsp) + + vpxorq %zmm0,%zmm0,%zmm0 + movq %rbp,%rsp + popq %rbp + vzeroupper + .byte 0xf3,0xc3 + +.L_less_than_128_bytes_hEgxyDlCngwrfFe: + vpbroadcastq %r10,%zmm25 + cmpq $0x10,%rdx + jb .L_ret_hEgxyDlCngwrfFe + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movl $0xaa,%r8d + kmovq %r8,%k2 + movq %rdx,%r8 + andq $0x70,%r8 + cmpq $0x60,%r8 + je .L_num_blocks_is_6_hEgxyDlCngwrfFe + cmpq $0x50,%r8 + je .L_num_blocks_is_5_hEgxyDlCngwrfFe + cmpq $0x40,%r8 + je .L_num_blocks_is_4_hEgxyDlCngwrfFe + cmpq $0x30,%r8 + je .L_num_blocks_is_3_hEgxyDlCngwrfFe + cmpq $0x20,%r8 + je .L_num_blocks_is_2_hEgxyDlCngwrfFe + cmpq $0x10,%r8 + je .L_num_blocks_is_1_hEgxyDlCngwrfFe + +.L_num_blocks_is_7_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + + addq $0x70,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vextracti32x4 $0x3,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_6_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $96,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $96,%rsi + + vextracti32x4 $0x1,%ymm2,%xmm8 + vextracti32x4 $0x2,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_5_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%xmm2 + addq $80,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %xmm2,64(%rsi) + addq $80,%rsi + + vmovdqa %xmm2,%xmm8 + vextracti32x4 $0x1,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_4_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + addq $64,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,0(%rsi) + addq $64,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa %xmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_3_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 0(%rdi),%zmm1{%k1} + addq $48,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,0(%rsi){%k1} + addq $48,%rsi + vextracti32x4 $2,%zmm1,%xmm8 + vextracti32x4 $3,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_2_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + vmovdqu8 0(%rdi),%ymm1 + addq $32,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu8 %ymm1,0(%rsi) + addq $32,%rsi + + vextracti32x4 $1,%ymm1,%xmm8 + vextracti32x4 $2,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_1_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + vmovdqu8 0(%rdi),%xmm1 + addq $16,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu8 %xmm1,0(%rsi) + addq $16,%rsi + + vmovdqa %xmm1,%xmm8 + vextracti32x4 $1,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.cfi_endproc +.globl aesni_xts_128_decrypt_avx512 +.hidden aesni_xts_128_decrypt_avx512 +.type aesni_xts_128_decrypt_avx512,@function +.align 32 +aesni_xts_128_decrypt_avx512: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbp + movq %rsp,%rbp + subq $136,%rsp + andq $0xffffffffffffffc0,%rsp + movq %rbx,128(%rsp) + movq $0x87,%r10 + vmovdqu (%r9),%xmm1 + vpxor (%r8),%xmm1,%xmm1 + vaesenc 16(%r8),%xmm1,%xmm1 + vaesenc 32(%r8),%xmm1,%xmm1 + vaesenc 48(%r8),%xmm1,%xmm1 + vaesenc 64(%r8),%xmm1,%xmm1 + vaesenc 80(%r8),%xmm1,%xmm1 + vaesenc 96(%r8),%xmm1,%xmm1 + vaesenc 112(%r8),%xmm1,%xmm1 + vaesenc 128(%r8),%xmm1,%xmm1 + vaesenc 144(%r8),%xmm1,%xmm1 + vaesenclast 160(%r8),%xmm1,%xmm1 + vmovdqa %xmm1,(%rsp) + + cmpq $0x80,%rdx + jb .L_less_than_128_bytes_amivrujEyduiFoi + vpbroadcastq %r10,%zmm25 + cmpq $0x100,%rdx + jge .L_start_by16_amivrujEyduiFoi + jmp .L_start_by8_amivrujEyduiFoi + +.L_do_n_blocks_amivrujEyduiFoi: + cmpq $0x0,%rdx + je .L_ret_amivrujEyduiFoi + cmpq $0x70,%rdx + jge .L_remaining_num_blocks_is_7_amivrujEyduiFoi + cmpq $0x60,%rdx + jge .L_remaining_num_blocks_is_6_amivrujEyduiFoi + cmpq $0x50,%rdx + jge .L_remaining_num_blocks_is_5_amivrujEyduiFoi + cmpq $0x40,%rdx + jge .L_remaining_num_blocks_is_4_amivrujEyduiFoi + cmpq $0x30,%rdx + jge .L_remaining_num_blocks_is_3_amivrujEyduiFoi + cmpq $0x20,%rdx + jge .L_remaining_num_blocks_is_2_amivrujEyduiFoi + cmpq $0x10,%rdx + jge .L_remaining_num_blocks_is_1_amivrujEyduiFoi + + + vmovdqu %xmm5,%xmm1 + + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,-16(%rsi) + vmovdqa %xmm1,%xmm8 + + + movq $0x1,%r8 + kmovq %r8,%k1 + vpsllq $0x3f,%xmm9,%xmm13 + vpsraq $0x3f,%xmm13,%xmm14 + vpandq %xmm25,%xmm14,%xmm5 + vpxorq %xmm5,%xmm9,%xmm9{%k1} + vpsrldq $0x8,%xmm9,%xmm10 +.byte 98, 211, 181, 8, 115, 194, 1 + vpslldq $0x8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm0,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_remaining_num_blocks_is_7_amivrujEyduiFoi: + movq $0xffffffffffffffff,%r8 + shrq $0x10,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + addq $0x70,%rdi + andq $0xf,%rdx + je .L_done_7_remain_amivrujEyduiFoi + vextracti32x4 $0x2,%zmm10,%xmm12 + vextracti32x4 $0x3,%zmm10,%xmm13 + vinserti32x4 $0x2,%xmm13,%zmm10,%zmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_7_remain_amivrujEyduiFoi: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_6_amivrujEyduiFoi: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $0x60,%rdi + andq $0xf,%rdx + je .L_done_6_remain_amivrujEyduiFoi + vextracti32x4 $0x1,%zmm10,%xmm12 + vextracti32x4 $0x2,%zmm10,%xmm13 + vinserti32x4 $0x1,%xmm13,%zmm10,%zmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $0x60,%rsi + vextracti32x4 $0x1,%zmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_6_remain_amivrujEyduiFoi: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_5_amivrujEyduiFoi: + vmovdqu8 (%rdi),%zmm1 + vmovdqu 64(%rdi),%xmm2 + addq $0x50,%rdi + andq $0xf,%rdx + je .L_done_5_remain_amivrujEyduiFoi + vmovdqa %xmm10,%xmm12 + vextracti32x4 $0x1,%zmm10,%xmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu %xmm2,64(%rsi) + addq $0x50,%rsi + vmovdqa %xmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_5_remain_amivrujEyduiFoi: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %xmm2,64(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_4_amivrujEyduiFoi: + vmovdqu8 (%rdi),%zmm1 + addq $0x40,%rdi + andq $0xf,%rdx + je .L_done_4_remain_amivrujEyduiFoi + vextracti32x4 $0x3,%zmm9,%xmm12 + vinserti32x4 $0x3,%xmm10,%zmm9,%zmm9 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + addq $0x40,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_4_remain_amivrujEyduiFoi: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_3_amivrujEyduiFoi: + vmovdqu (%rdi),%xmm1 + vmovdqu 16(%rdi),%xmm2 + vmovdqu 32(%rdi),%xmm3 + addq $0x30,%rdi + andq $0xf,%rdx + je .L_done_3_remain_amivrujEyduiFoi + vextracti32x4 $0x2,%zmm9,%xmm13 + vextracti32x4 $0x1,%zmm9,%xmm10 + vextracti32x4 $0x3,%zmm9,%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x30,%rsi + vmovdqa %xmm3,%xmm8 + vmovdqa %xmm13,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_3_remain_amivrujEyduiFoi: + vextracti32x4 $0x1,%zmm9,%xmm10 + vextracti32x4 $0x2,%zmm9,%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_2_amivrujEyduiFoi: + vmovdqu (%rdi),%xmm1 + vmovdqu 16(%rdi),%xmm2 + addq $0x20,%rdi + andq $0xf,%rdx + je .L_done_2_remain_amivrujEyduiFoi + vextracti32x4 $0x2,%zmm9,%xmm10 + vextracti32x4 $0x1,%zmm9,%xmm12 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x20,%rsi + vmovdqa %xmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_2_remain_amivrujEyduiFoi: + vextracti32x4 $0x1,%zmm9,%xmm10 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_1_amivrujEyduiFoi: + vmovdqu (%rdi),%xmm1 + addq $0x10,%rdi + andq $0xf,%rdx + je .L_done_1_remain_amivrujEyduiFoi + vextracti32x4 $0x1,%zmm9,%xmm11 + vpxor %xmm11,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm11,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + vmovdqa %xmm9,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_1_remain_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_start_by16_amivrujEyduiFoi: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + + + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + + + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm11 + vpxord %zmm14,%zmm11,%zmm11 + + vpsrldq $0xf,%zmm10,%zmm15 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 + vpslldq $0x1,%zmm10,%zmm12 + vpxord %zmm16,%zmm12,%zmm12 + +.L_main_loop_run_16_amivrujEyduiFoi: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 128(%rdi),%zmm3 + vmovdqu8 192(%rdi),%zmm4 + vmovdqu8 240(%rdi),%xmm5 + addq $0x100,%rdi + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpxorq %zmm0,%zmm3,%zmm3 + vpxorq %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm11,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm11,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm12,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm12,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm15,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm15,%zmm17 + vpxord %zmm14,%zmm17,%zmm17 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm16,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm16,%zmm18 + vpxord %zmm14,%zmm18,%zmm18 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + vaesdeclast %zmm0,%zmm3,%zmm3 + vaesdeclast %zmm0,%zmm4,%zmm4 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqa32 %zmm17,%zmm11 + vmovdqa32 %zmm18,%zmm12 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + vmovdqu8 %zmm3,128(%rsi) + vmovdqu8 %zmm4,192(%rsi) + addq $0x100,%rsi + subq $0x100,%rdx + cmpq $0x100,%rdx + jge .L_main_loop_run_16_amivrujEyduiFoi + + cmpq $0x80,%rdx + jge .L_main_loop_run_8_amivrujEyduiFoi + jmp .L_do_n_blocks_amivrujEyduiFoi + +.L_start_by8_amivrujEyduiFoi: + + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + + + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + +.L_main_loop_run_8_amivrujEyduiFoi: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 112(%rdi),%xmm5 + addq $0x80,%rdi + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm10,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm10,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + addq $0x80,%rsi + subq $0x80,%rdx + cmpq $0x80,%rdx + jge .L_main_loop_run_8_amivrujEyduiFoi + jmp .L_do_n_blocks_amivrujEyduiFoi + +.L_steal_cipher_amivrujEyduiFoi: + + vmovdqa %xmm8,%xmm2 + + + leaq vpshufb_shf_table(%rip),%rax + vmovdqu (%rax,%rdx,1),%xmm10 + vpshufb %xmm10,%xmm8,%xmm8 + + + vmovdqu -16(%rdi,%rdx,1),%xmm3 + vmovdqu %xmm8,-16(%rsi,%rdx,1) + + + leaq vpshufb_shf_table(%rip),%rax + addq $16,%rax + subq %rdx,%rax + vmovdqu (%rax),%xmm10 + vpxor mask1(%rip),%xmm10,%xmm10 + vpshufb %xmm10,%xmm3,%xmm3 + + vpblendvb %xmm10,%xmm2,%xmm3,%xmm3 + + + vpxor %xmm0,%xmm3,%xmm8 + + + vpxor (%rcx),%xmm8,%xmm8 + vaesdec 16(%rcx),%xmm8,%xmm8 + vaesdec 32(%rcx),%xmm8,%xmm8 + vaesdec 48(%rcx),%xmm8,%xmm8 + vaesdec 64(%rcx),%xmm8,%xmm8 + vaesdec 80(%rcx),%xmm8,%xmm8 + vaesdec 96(%rcx),%xmm8,%xmm8 + vaesdec 112(%rcx),%xmm8,%xmm8 + vaesdec 128(%rcx),%xmm8,%xmm8 + vaesdec 144(%rcx),%xmm8,%xmm8 + vaesdeclast 160(%rcx),%xmm8,%xmm8 + + vpxor %xmm0,%xmm8,%xmm8 + +.L_done_amivrujEyduiFoi: + + vmovdqu %xmm8,-16(%rsi) +.L_ret_amivrujEyduiFoi: + movq 128(%rsp),%rbx + xorq %r8,%r8 + movq %r8,128(%rsp) + + vpxorq %zmm0,%zmm0,%zmm0 + movq %rbp,%rsp + popq %rbp + vzeroupper + .byte 0xf3,0xc3 + +.L_less_than_128_bytes_amivrujEyduiFoi: + cmpq $0x10,%rdx + jb .L_ret_amivrujEyduiFoi + + movq %rdx,%r8 + andq $0x70,%r8 + cmpq $0x60,%r8 + je .L_num_blocks_is_6_amivrujEyduiFoi + cmpq $0x50,%r8 + je .L_num_blocks_is_5_amivrujEyduiFoi + cmpq $0x40,%r8 + je .L_num_blocks_is_4_amivrujEyduiFoi + cmpq $0x30,%r8 + je .L_num_blocks_is_3_amivrujEyduiFoi + cmpq $0x20,%r8 + je .L_num_blocks_is_2_amivrujEyduiFoi + cmpq $0x10,%r8 + je .L_num_blocks_is_1_amivrujEyduiFoi + +.L_num_blocks_is_7_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,80(%rsp) + movq %rbx,80 + 8(%rsp) + vmovdqa 80(%rsp),%xmm14 + vmovdqu 80(%rdi),%xmm6 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,96(%rsp) + movq %rbx,96 + 8(%rsp) + vmovdqa 96(%rsp),%xmm15 + vmovdqu 96(%rdi),%xmm7 + addq $0x70,%rdi + andq $0xf,%rdx + je .L_done_7_amivrujEyduiFoi + +.L_steal_cipher_7_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm15,%xmm16 + vmovdqa 16(%rsp),%xmm15 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vpxor %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + addq $0x70,%rsi + vmovdqa64 %xmm16,%xmm0 + vmovdqa %xmm7,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_7_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vpxor %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + addq $0x70,%rsi + vmovdqa %xmm7,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_6_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,80(%rsp) + movq %rbx,80 + 8(%rsp) + vmovdqa 80(%rsp),%xmm14 + vmovdqu 80(%rdi),%xmm6 + addq $0x60,%rdi + andq $0xf,%rdx + je .L_done_6_amivrujEyduiFoi + +.L_steal_cipher_6_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm14,%xmm15 + vmovdqa 16(%rsp),%xmm14 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + addq $0x60,%rsi + vmovdqa %xmm15,%xmm0 + vmovdqa %xmm6,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_6_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + addq $0x60,%rsi + vmovdqa %xmm6,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_5_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + addq $0x50,%rdi + andq $0xf,%rdx + je .L_done_5_amivrujEyduiFoi + +.L_steal_cipher_5_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm13,%xmm14 + vmovdqa 16(%rsp),%xmm13 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + addq $0x50,%rsi + vmovdqa %xmm14,%xmm0 + vmovdqa %xmm5,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_5_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + addq $0x50,%rsi + vmovdqa %xmm5,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_4_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + addq $0x40,%rdi + andq $0xf,%rdx + je .L_done_4_amivrujEyduiFoi + +.L_steal_cipher_4_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm12,%xmm13 + vmovdqa 16(%rsp),%xmm12 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x40,%rsi + vmovdqa %xmm13,%xmm0 + vmovdqa %xmm4,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_4_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x40,%rsi + vmovdqa %xmm4,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_3_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + addq $0x30,%rdi + andq $0xf,%rdx + je .L_done_3_amivrujEyduiFoi + +.L_steal_cipher_3_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm11,%xmm12 + vmovdqa 16(%rsp),%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x30,%rsi + vmovdqa %xmm12,%xmm0 + vmovdqa %xmm3,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_3_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x30,%rsi + vmovdqa %xmm3,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_2_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + addq $0x20,%rdi + andq $0xf,%rdx + je .L_done_2_amivrujEyduiFoi + +.L_steal_cipher_2_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm10,%xmm11 + vmovdqa 16(%rsp),%xmm10 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + addq $0x20,%rsi + vmovdqa %xmm11,%xmm0 + vmovdqa %xmm2,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_2_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + addq $0x20,%rsi + vmovdqa %xmm2,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_1_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + addq $0x10,%rdi + andq $0xf,%rdx + je .L_done_1_amivrujEyduiFoi + +.L_steal_cipher_1_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm9,%xmm10 + vmovdqa 16(%rsp),%xmm9 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + addq $0x10,%rsi + vmovdqa %xmm10,%xmm0 + vmovdqa %xmm1,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_1_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + jmp .L_done_amivrujEyduiFoi +.cfi_endproc +.globl aesni_xts_256_encrypt_avx512 +.hidden aesni_xts_256_encrypt_avx512 +.type aesni_xts_256_encrypt_avx512,@function +.align 32 +aesni_xts_256_encrypt_avx512: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbp + movq %rsp,%rbp + subq $136,%rsp + andq $0xffffffffffffffc0,%rsp + movq %rbx,128(%rsp) + movq $0x87,%r10 + vmovdqu (%r9),%xmm1 + vpxor (%r8),%xmm1,%xmm1 + vaesenc 16(%r8),%xmm1,%xmm1 + vaesenc 32(%r8),%xmm1,%xmm1 + vaesenc 48(%r8),%xmm1,%xmm1 + vaesenc 64(%r8),%xmm1,%xmm1 + vaesenc 80(%r8),%xmm1,%xmm1 + vaesenc 96(%r8),%xmm1,%xmm1 + vaesenc 112(%r8),%xmm1,%xmm1 + vaesenc 128(%r8),%xmm1,%xmm1 + vaesenc 144(%r8),%xmm1,%xmm1 + vaesenc 160(%r8),%xmm1,%xmm1 + vaesenc 176(%r8),%xmm1,%xmm1 + vaesenc 192(%r8),%xmm1,%xmm1 + vaesenc 208(%r8),%xmm1,%xmm1 + vaesenclast 224(%r8),%xmm1,%xmm1 + vmovdqa %xmm1,(%rsp) + + cmpq $0x80,%rdx + jl .L_less_than_128_bytes_wcpqaDvsGlbjGoe + vpbroadcastq %r10,%zmm25 + cmpq $0x100,%rdx + jge .L_start_by16_wcpqaDvsGlbjGoe + cmpq $0x80,%rdx + jge .L_start_by8_wcpqaDvsGlbjGoe + +.L_do_n_blocks_wcpqaDvsGlbjGoe: + cmpq $0x0,%rdx + je .L_ret_wcpqaDvsGlbjGoe + cmpq $0x70,%rdx + jge .L_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe + cmpq $0x60,%rdx + jge .L_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe + cmpq $0x50,%rdx + jge .L_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe + cmpq $0x40,%rdx + jge .L_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe + cmpq $0x30,%rdx + jge .L_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe + cmpq $0x20,%rdx + jge .L_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe + cmpq $0x10,%rdx + jge .L_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe + vmovdqa %xmm0,%xmm8 + vmovdqa %xmm9,%xmm0 + jmp .L_steal_cipher_wcpqaDvsGlbjGoe + +.L_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe: + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + addq $0x70,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vextracti32x4 $0x3,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe + +.L_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $0x60,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $0x60,%rsi + vextracti32x4 $0x1,%zmm2,%xmm8 + vextracti32x4 $0x2,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe + +.L_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu 64(%rdi),%xmm2 + addq $0x50,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu %xmm2,64(%rsi) + addq $0x50,%rsi + vmovdqa %xmm2,%xmm8 + vextracti32x4 $0x1,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe + +.L_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%zmm1 + addq $0x40,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,(%rsi) + addq $0x40,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa64 %xmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe: + movq $-1,%r8 + shrq $0x10,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1{%k1} + addq $0x30,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,(%rsi){%k1} + addq $0x30,%rsi + vextracti32x4 $0x2,%zmm1,%xmm8 + vextracti32x4 $0x3,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%ymm1 + addq $0x20,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 176(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 192(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 208(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 224(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu %ymm1,(%rsi) + addq $0x20,%rsi + vextracti32x4 $0x1,%zmm1,%xmm8 + vextracti32x4 $0x2,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe: + vmovdqu (%rdi),%xmm1 + addq $0x10,%rdi + vpxor %xmm9,%xmm1,%xmm1 + vpxor (%rcx),%xmm1,%xmm1 + vaesenc 16(%rcx),%xmm1,%xmm1 + vaesenc 32(%rcx),%xmm1,%xmm1 + vaesenc 48(%rcx),%xmm1,%xmm1 + vaesenc 64(%rcx),%xmm1,%xmm1 + vaesenc 80(%rcx),%xmm1,%xmm1 + vaesenc 96(%rcx),%xmm1,%xmm1 + vaesenc 112(%rcx),%xmm1,%xmm1 + vaesenc 128(%rcx),%xmm1,%xmm1 + vaesenc 144(%rcx),%xmm1,%xmm1 + vaesenc 160(%rcx),%xmm1,%xmm1 + vaesenc 176(%rcx),%xmm1,%xmm1 + vaesenc 192(%rcx),%xmm1,%xmm1 + vaesenc 208(%rcx),%xmm1,%xmm1 + vaesenclast 224(%rcx),%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + vextracti32x4 $0x1,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe + + +.L_start_by16_wcpqaDvsGlbjGoe: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm11 + vpxord %zmm14,%zmm11,%zmm11 + vpsrldq $0xf,%zmm10,%zmm15 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 + vpslldq $0x1,%zmm10,%zmm12 + vpxord %zmm16,%zmm12,%zmm12 + +.L_main_loop_run_16_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 128(%rdi),%zmm3 + vmovdqu8 192(%rdi),%zmm4 + addq $0x100,%rdi + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpxorq %zmm0,%zmm3,%zmm3 + vpxorq %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm11,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm11,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm12,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm12,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm15,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm15,%zmm17 + vpxord %zmm14,%zmm17,%zmm17 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm16,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm16,%zmm18 + vpxord %zmm14,%zmm18,%zmm18 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vaesenclast %zmm0,%zmm3,%zmm3 + vaesenclast %zmm0,%zmm4,%zmm4 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqa32 %zmm17,%zmm11 + vmovdqa32 %zmm18,%zmm12 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + vmovdqu8 %zmm3,128(%rsi) + vmovdqu8 %zmm4,192(%rsi) + addq $0x100,%rsi + subq $0x100,%rdx + cmpq $0x100,%rdx + jae .L_main_loop_run_16_wcpqaDvsGlbjGoe + cmpq $0x80,%rdx + jae .L_main_loop_run_8_wcpqaDvsGlbjGoe + vextracti32x4 $0x3,%zmm4,%xmm0 + jmp .L_do_n_blocks_wcpqaDvsGlbjGoe + +.L_start_by8_wcpqaDvsGlbjGoe: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + +.L_main_loop_run_8_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + addq $0x80,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm10,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm10,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + addq $0x80,%rsi + subq $0x80,%rdx + cmpq $0x80,%rdx + jae .L_main_loop_run_8_wcpqaDvsGlbjGoe + vextracti32x4 $0x3,%zmm2,%xmm0 + jmp .L_do_n_blocks_wcpqaDvsGlbjGoe + +.L_steal_cipher_wcpqaDvsGlbjGoe: + vmovdqa %xmm8,%xmm2 + leaq vpshufb_shf_table(%rip),%rax + vmovdqu (%rax,%rdx,1),%xmm10 + vpshufb %xmm10,%xmm8,%xmm8 + vmovdqu -16(%rdi,%rdx,1),%xmm3 + vmovdqu %xmm8,-16(%rsi,%rdx,1) + leaq vpshufb_shf_table(%rip),%rax + addq $16,%rax + subq %rdx,%rax + vmovdqu (%rax),%xmm10 + vpxor mask1(%rip),%xmm10,%xmm10 + vpshufb %xmm10,%xmm3,%xmm3 + vpblendvb %xmm10,%xmm2,%xmm3,%xmm3 + vpxor %xmm0,%xmm3,%xmm8 + vpxor (%rcx),%xmm8,%xmm8 + vaesenc 16(%rcx),%xmm8,%xmm8 + vaesenc 32(%rcx),%xmm8,%xmm8 + vaesenc 48(%rcx),%xmm8,%xmm8 + vaesenc 64(%rcx),%xmm8,%xmm8 + vaesenc 80(%rcx),%xmm8,%xmm8 + vaesenc 96(%rcx),%xmm8,%xmm8 + vaesenc 112(%rcx),%xmm8,%xmm8 + vaesenc 128(%rcx),%xmm8,%xmm8 + vaesenc 144(%rcx),%xmm8,%xmm8 + vaesenc 160(%rcx),%xmm8,%xmm8 + vaesenc 176(%rcx),%xmm8,%xmm8 + vaesenc 192(%rcx),%xmm8,%xmm8 + vaesenc 208(%rcx),%xmm8,%xmm8 + vaesenclast 224(%rcx),%xmm8,%xmm8 + vpxor %xmm0,%xmm8,%xmm8 + vmovdqu %xmm8,-16(%rsi) +.L_ret_wcpqaDvsGlbjGoe: + movq 128(%rsp),%rbx + xorq %r8,%r8 + movq %r8,128(%rsp) + + vpxorq %zmm0,%zmm0,%zmm0 + movq %rbp,%rsp + popq %rbp + vzeroupper + .byte 0xf3,0xc3 + +.L_less_than_128_bytes_wcpqaDvsGlbjGoe: + vpbroadcastq %r10,%zmm25 + cmpq $0x10,%rdx + jb .L_ret_wcpqaDvsGlbjGoe + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movl $0xaa,%r8d + kmovq %r8,%k2 + movq %rdx,%r8 + andq $0x70,%r8 + cmpq $0x60,%r8 + je .L_num_blocks_is_6_wcpqaDvsGlbjGoe + cmpq $0x50,%r8 + je .L_num_blocks_is_5_wcpqaDvsGlbjGoe + cmpq $0x40,%r8 + je .L_num_blocks_is_4_wcpqaDvsGlbjGoe + cmpq $0x30,%r8 + je .L_num_blocks_is_3_wcpqaDvsGlbjGoe + cmpq $0x20,%r8 + je .L_num_blocks_is_2_wcpqaDvsGlbjGoe + cmpq $0x10,%r8 + je .L_num_blocks_is_1_wcpqaDvsGlbjGoe + +.L_num_blocks_is_7_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + + addq $0x70,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vextracti32x4 $0x3,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_6_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $96,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $96,%rsi + + vextracti32x4 $0x1,%ymm2,%xmm8 + vextracti32x4 $0x2,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_5_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%xmm2 + addq $80,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %xmm2,64(%rsi) + addq $80,%rsi + + vmovdqa %xmm2,%xmm8 + vextracti32x4 $0x1,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_4_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + addq $64,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,0(%rsi) + addq $64,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa %xmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_3_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 0(%rdi),%zmm1{%k1} + addq $48,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,0(%rsi){%k1} + addq $48,%rsi + vextracti32x4 $2,%zmm1,%xmm8 + vextracti32x4 $3,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_2_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + vmovdqu8 0(%rdi),%ymm1 + addq $32,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 176(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 192(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 208(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 224(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu8 %ymm1,0(%rsi) + addq $32,%rsi + + vextracti32x4 $1,%ymm1,%xmm8 + vextracti32x4 $2,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_1_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + vmovdqu8 0(%rdi),%xmm1 + addq $16,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 176(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 192(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 208(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 224(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu8 %xmm1,0(%rsi) + addq $16,%rsi + + vmovdqa %xmm1,%xmm8 + vextracti32x4 $1,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.cfi_endproc +.globl aesni_xts_256_decrypt_avx512 +.hidden aesni_xts_256_decrypt_avx512 +.type aesni_xts_256_decrypt_avx512,@function +.align 32 +aesni_xts_256_decrypt_avx512: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbp + movq %rsp,%rbp + subq $136,%rsp + andq $0xffffffffffffffc0,%rsp + movq %rbx,128(%rsp) + movq $0x87,%r10 + vmovdqu (%r9),%xmm1 + vpxor (%r8),%xmm1,%xmm1 + vaesenc 16(%r8),%xmm1,%xmm1 + vaesenc 32(%r8),%xmm1,%xmm1 + vaesenc 48(%r8),%xmm1,%xmm1 + vaesenc 64(%r8),%xmm1,%xmm1 + vaesenc 80(%r8),%xmm1,%xmm1 + vaesenc 96(%r8),%xmm1,%xmm1 + vaesenc 112(%r8),%xmm1,%xmm1 + vaesenc 128(%r8),%xmm1,%xmm1 + vaesenc 144(%r8),%xmm1,%xmm1 + vaesenc 160(%r8),%xmm1,%xmm1 + vaesenc 176(%r8),%xmm1,%xmm1 + vaesenc 192(%r8),%xmm1,%xmm1 + vaesenc 208(%r8),%xmm1,%xmm1 + vaesenclast 224(%r8),%xmm1,%xmm1 + vmovdqa %xmm1,(%rsp) + + cmpq $0x80,%rdx + jb .L_less_than_128_bytes_EmbgEptodyewbFa + vpbroadcastq %r10,%zmm25 + cmpq $0x100,%rdx + jge .L_start_by16_EmbgEptodyewbFa + jmp .L_start_by8_EmbgEptodyewbFa + +.L_do_n_blocks_EmbgEptodyewbFa: + cmpq $0x0,%rdx + je .L_ret_EmbgEptodyewbFa + cmpq $0x70,%rdx + jge .L_remaining_num_blocks_is_7_EmbgEptodyewbFa + cmpq $0x60,%rdx + jge .L_remaining_num_blocks_is_6_EmbgEptodyewbFa + cmpq $0x50,%rdx + jge .L_remaining_num_blocks_is_5_EmbgEptodyewbFa + cmpq $0x40,%rdx + jge .L_remaining_num_blocks_is_4_EmbgEptodyewbFa + cmpq $0x30,%rdx + jge .L_remaining_num_blocks_is_3_EmbgEptodyewbFa + cmpq $0x20,%rdx + jge .L_remaining_num_blocks_is_2_EmbgEptodyewbFa + cmpq $0x10,%rdx + jge .L_remaining_num_blocks_is_1_EmbgEptodyewbFa + + + vmovdqu %xmm5,%xmm1 + + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,-16(%rsi) + vmovdqa %xmm1,%xmm8 + + + movq $0x1,%r8 + kmovq %r8,%k1 + vpsllq $0x3f,%xmm9,%xmm13 + vpsraq $0x3f,%xmm13,%xmm14 + vpandq %xmm25,%xmm14,%xmm5 + vpxorq %xmm5,%xmm9,%xmm9{%k1} + vpsrldq $0x8,%xmm9,%xmm10 +.byte 98, 211, 181, 8, 115, 194, 1 + vpslldq $0x8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm0,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_7_EmbgEptodyewbFa: + movq $0xffffffffffffffff,%r8 + shrq $0x10,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + addq $0x70,%rdi + andq $0xf,%rdx + je .L_done_7_remain_EmbgEptodyewbFa + vextracti32x4 $0x2,%zmm10,%xmm12 + vextracti32x4 $0x3,%zmm10,%xmm13 + vinserti32x4 $0x2,%xmm13,%zmm10,%zmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_7_remain_EmbgEptodyewbFa: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_6_EmbgEptodyewbFa: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $0x60,%rdi + andq $0xf,%rdx + je .L_done_6_remain_EmbgEptodyewbFa + vextracti32x4 $0x1,%zmm10,%xmm12 + vextracti32x4 $0x2,%zmm10,%xmm13 + vinserti32x4 $0x1,%xmm13,%zmm10,%zmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $0x60,%rsi + vextracti32x4 $0x1,%zmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_6_remain_EmbgEptodyewbFa: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_5_EmbgEptodyewbFa: + vmovdqu8 (%rdi),%zmm1 + vmovdqu 64(%rdi),%xmm2 + addq $0x50,%rdi + andq $0xf,%rdx + je .L_done_5_remain_EmbgEptodyewbFa + vmovdqa %xmm10,%xmm12 + vextracti32x4 $0x1,%zmm10,%xmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu %xmm2,64(%rsi) + addq $0x50,%rsi + vmovdqa %xmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_5_remain_EmbgEptodyewbFa: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %xmm2,64(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_4_EmbgEptodyewbFa: + vmovdqu8 (%rdi),%zmm1 + addq $0x40,%rdi + andq $0xf,%rdx + je .L_done_4_remain_EmbgEptodyewbFa + vextracti32x4 $0x3,%zmm9,%xmm12 + vinserti32x4 $0x3,%xmm10,%zmm9,%zmm9 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + addq $0x40,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_4_remain_EmbgEptodyewbFa: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_3_EmbgEptodyewbFa: + vmovdqu (%rdi),%xmm1 + vmovdqu 16(%rdi),%xmm2 + vmovdqu 32(%rdi),%xmm3 + addq $0x30,%rdi + andq $0xf,%rdx + je .L_done_3_remain_EmbgEptodyewbFa + vextracti32x4 $0x2,%zmm9,%xmm13 + vextracti32x4 $0x1,%zmm9,%xmm10 + vextracti32x4 $0x3,%zmm9,%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x30,%rsi + vmovdqa %xmm3,%xmm8 + vmovdqa %xmm13,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_3_remain_EmbgEptodyewbFa: + vextracti32x4 $0x1,%zmm9,%xmm10 + vextracti32x4 $0x2,%zmm9,%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_2_EmbgEptodyewbFa: + vmovdqu (%rdi),%xmm1 + vmovdqu 16(%rdi),%xmm2 + addq $0x20,%rdi + andq $0xf,%rdx + je .L_done_2_remain_EmbgEptodyewbFa + vextracti32x4 $0x2,%zmm9,%xmm10 + vextracti32x4 $0x1,%zmm9,%xmm12 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x20,%rsi + vmovdqa %xmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_2_remain_EmbgEptodyewbFa: + vextracti32x4 $0x1,%zmm9,%xmm10 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_1_EmbgEptodyewbFa: + vmovdqu (%rdi),%xmm1 + addq $0x10,%rdi + andq $0xf,%rdx + je .L_done_1_remain_EmbgEptodyewbFa + vextracti32x4 $0x1,%zmm9,%xmm11 + vpxor %xmm11,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm11,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + vmovdqa %xmm9,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_1_remain_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_start_by16_EmbgEptodyewbFa: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + + + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + + + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm11 + vpxord %zmm14,%zmm11,%zmm11 + + vpsrldq $0xf,%zmm10,%zmm15 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 + vpslldq $0x1,%zmm10,%zmm12 + vpxord %zmm16,%zmm12,%zmm12 + +.L_main_loop_run_16_EmbgEptodyewbFa: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 128(%rdi),%zmm3 + vmovdqu8 192(%rdi),%zmm4 + vmovdqu8 240(%rdi),%xmm5 + addq $0x100,%rdi + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpxorq %zmm0,%zmm3,%zmm3 + vpxorq %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm11,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm11,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm12,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm12,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm15,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm15,%zmm17 + vpxord %zmm14,%zmm17,%zmm17 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm16,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm16,%zmm18 + vpxord %zmm14,%zmm18,%zmm18 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + vaesdeclast %zmm0,%zmm3,%zmm3 + vaesdeclast %zmm0,%zmm4,%zmm4 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqa32 %zmm17,%zmm11 + vmovdqa32 %zmm18,%zmm12 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + vmovdqu8 %zmm3,128(%rsi) + vmovdqu8 %zmm4,192(%rsi) + addq $0x100,%rsi + subq $0x100,%rdx + cmpq $0x100,%rdx + jge .L_main_loop_run_16_EmbgEptodyewbFa + + cmpq $0x80,%rdx + jge .L_main_loop_run_8_EmbgEptodyewbFa + jmp .L_do_n_blocks_EmbgEptodyewbFa + +.L_start_by8_EmbgEptodyewbFa: + + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + + + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + +.L_main_loop_run_8_EmbgEptodyewbFa: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 112(%rdi),%xmm5 + addq $0x80,%rdi + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm10,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm10,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + addq $0x80,%rsi + subq $0x80,%rdx + cmpq $0x80,%rdx + jge .L_main_loop_run_8_EmbgEptodyewbFa + jmp .L_do_n_blocks_EmbgEptodyewbFa + +.L_steal_cipher_EmbgEptodyewbFa: + + vmovdqa %xmm8,%xmm2 + + + leaq vpshufb_shf_table(%rip),%rax + vmovdqu (%rax,%rdx,1),%xmm10 + vpshufb %xmm10,%xmm8,%xmm8 + + + vmovdqu -16(%rdi,%rdx,1),%xmm3 + vmovdqu %xmm8,-16(%rsi,%rdx,1) + + + leaq vpshufb_shf_table(%rip),%rax + addq $16,%rax + subq %rdx,%rax + vmovdqu (%rax),%xmm10 + vpxor mask1(%rip),%xmm10,%xmm10 + vpshufb %xmm10,%xmm3,%xmm3 + + vpblendvb %xmm10,%xmm2,%xmm3,%xmm3 + + + vpxor %xmm0,%xmm3,%xmm8 + + + vpxor (%rcx),%xmm8,%xmm8 + vaesdec 16(%rcx),%xmm8,%xmm8 + vaesdec 32(%rcx),%xmm8,%xmm8 + vaesdec 48(%rcx),%xmm8,%xmm8 + vaesdec 64(%rcx),%xmm8,%xmm8 + vaesdec 80(%rcx),%xmm8,%xmm8 + vaesdec 96(%rcx),%xmm8,%xmm8 + vaesdec 112(%rcx),%xmm8,%xmm8 + vaesdec 128(%rcx),%xmm8,%xmm8 + vaesdec 144(%rcx),%xmm8,%xmm8 + vaesdec 160(%rcx),%xmm8,%xmm8 + vaesdec 176(%rcx),%xmm8,%xmm8 + vaesdec 192(%rcx),%xmm8,%xmm8 + vaesdec 208(%rcx),%xmm8,%xmm8 + vaesdeclast 224(%rcx),%xmm8,%xmm8 + + vpxor %xmm0,%xmm8,%xmm8 + +.L_done_EmbgEptodyewbFa: + + vmovdqu %xmm8,-16(%rsi) +.L_ret_EmbgEptodyewbFa: + movq 128(%rsp),%rbx + xorq %r8,%r8 + movq %r8,128(%rsp) + + vpxorq %zmm0,%zmm0,%zmm0 + movq %rbp,%rsp + popq %rbp + vzeroupper + .byte 0xf3,0xc3 + +.L_less_than_128_bytes_EmbgEptodyewbFa: + cmpq $0x10,%rdx + jb .L_ret_EmbgEptodyewbFa + + movq %rdx,%r8 + andq $0x70,%r8 + cmpq $0x60,%r8 + je .L_num_blocks_is_6_EmbgEptodyewbFa + cmpq $0x50,%r8 + je .L_num_blocks_is_5_EmbgEptodyewbFa + cmpq $0x40,%r8 + je .L_num_blocks_is_4_EmbgEptodyewbFa + cmpq $0x30,%r8 + je .L_num_blocks_is_3_EmbgEptodyewbFa + cmpq $0x20,%r8 + je .L_num_blocks_is_2_EmbgEptodyewbFa + cmpq $0x10,%r8 + je .L_num_blocks_is_1_EmbgEptodyewbFa + +.L_num_blocks_is_7_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,80(%rsp) + movq %rbx,80 + 8(%rsp) + vmovdqa 80(%rsp),%xmm14 + vmovdqu 80(%rdi),%xmm6 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,96(%rsp) + movq %rbx,96 + 8(%rsp) + vmovdqa 96(%rsp),%xmm15 + vmovdqu 96(%rdi),%xmm7 + addq $0x70,%rdi + andq $0xf,%rdx + je .L_done_7_EmbgEptodyewbFa + +.L_steal_cipher_7_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm15,%xmm16 + vmovdqa 16(%rsp),%xmm15 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vpxor %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + addq $0x70,%rsi + vmovdqa64 %xmm16,%xmm0 + vmovdqa %xmm7,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_7_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vpxor %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + addq $0x70,%rsi + vmovdqa %xmm7,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_6_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,80(%rsp) + movq %rbx,80 + 8(%rsp) + vmovdqa 80(%rsp),%xmm14 + vmovdqu 80(%rdi),%xmm6 + addq $0x60,%rdi + andq $0xf,%rdx + je .L_done_6_EmbgEptodyewbFa + +.L_steal_cipher_6_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm14,%xmm15 + vmovdqa 16(%rsp),%xmm14 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + addq $0x60,%rsi + vmovdqa %xmm15,%xmm0 + vmovdqa %xmm6,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_6_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + addq $0x60,%rsi + vmovdqa %xmm6,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_5_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + addq $0x50,%rdi + andq $0xf,%rdx + je .L_done_5_EmbgEptodyewbFa + +.L_steal_cipher_5_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm13,%xmm14 + vmovdqa 16(%rsp),%xmm13 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + addq $0x50,%rsi + vmovdqa %xmm14,%xmm0 + vmovdqa %xmm5,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_5_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + addq $0x50,%rsi + vmovdqa %xmm5,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_4_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + addq $0x40,%rdi + andq $0xf,%rdx + je .L_done_4_EmbgEptodyewbFa + +.L_steal_cipher_4_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm12,%xmm13 + vmovdqa 16(%rsp),%xmm12 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x40,%rsi + vmovdqa %xmm13,%xmm0 + vmovdqa %xmm4,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_4_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x40,%rsi + vmovdqa %xmm4,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_3_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + addq $0x30,%rdi + andq $0xf,%rdx + je .L_done_3_EmbgEptodyewbFa + +.L_steal_cipher_3_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm11,%xmm12 + vmovdqa 16(%rsp),%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x30,%rsi + vmovdqa %xmm12,%xmm0 + vmovdqa %xmm3,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_3_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x30,%rsi + vmovdqa %xmm3,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_2_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + addq $0x20,%rdi + andq $0xf,%rdx + je .L_done_2_EmbgEptodyewbFa + +.L_steal_cipher_2_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm10,%xmm11 + vmovdqa 16(%rsp),%xmm10 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + addq $0x20,%rsi + vmovdqa %xmm11,%xmm0 + vmovdqa %xmm2,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_2_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + addq $0x20,%rsi + vmovdqa %xmm2,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_1_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + addq $0x10,%rdi + andq $0xf,%rdx + je .L_done_1_EmbgEptodyewbFa + +.L_steal_cipher_1_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm9,%xmm10 + vmovdqa 16(%rsp),%xmm9 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + addq $0x10,%rsi + vmovdqa %xmm10,%xmm0 + vmovdqa %xmm1,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_1_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + jmp .L_done_EmbgEptodyewbFa +.cfi_endproc +.section .rodata +.align 16 + +vpshufb_shf_table: +.quad 0x8786858483828100, 0x8f8e8d8c8b8a8988 +.quad 0x0706050403020100, 0x000e0d0c0b0a0908 + +mask1: +.quad 0x8080808080808080, 0x8080808080808080 + +const_dq3210: +.quad 0, 0, 1, 1, 2, 2, 3, 3 +const_dq5678: +.quad 8, 8, 7, 7, 6, 6, 5, 5 +const_dq7654: +.quad 4, 4, 5, 5, 6, 6, 7, 7 +const_dq1234: +.quad 4, 4, 3, 3, 2, 2, 1, 1 + +shufb_15_7: +.byte 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 7, 0xff, 0xff +.byte 0xff, 0xff, 0xff, 0xff, 0xff + +.text + .section ".note.gnu.property", "a" + .p2align 3 + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + # "GNU" encoded with .byte, since .asciz isn't supported + # on Solaris. + .byte 0x47 + .byte 0x4e + .byte 0x55 + .byte 0 +1: + .p2align 3 + .long 0xc0000002 + .long 3f - 2f +2: + .long 3 +3: + .p2align 3 +4: diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s new file mode 100644 index 0000000000..7e08b44dde --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-2k-avxifma.s @@ -0,0 +1,1167 @@ +.text + +.globl ossl_rsaz_avxifma_eligible +.type ossl_rsaz_avxifma_eligible,@function +.align 32 +ossl_rsaz_avxifma_eligible: + movl OPENSSL_ia32cap_P+20(%rip),%ecx + xorl %eax,%eax + andl $8388608,%ecx + cmpl $8388608,%ecx + cmovel %ecx,%eax + .byte 0xf3,0xc3 +.size ossl_rsaz_avxifma_eligible, .-ossl_rsaz_avxifma_eligible +.text + +.globl ossl_rsaz_amm52x20_x1_avxifma256 +.type ossl_rsaz_amm52x20_x1_avxifma256,@function +.align 32 +ossl_rsaz_amm52x20_x1_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lossl_rsaz_amm52x20_x1_avxifma256_body: + + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + + movl $5,%ebx + +.align 32 +.Lloop5: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm5,32(%rsp) + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vmovdqu %ymm8,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm5 + vmovdqu 72(%rsp),%ymm6 + vmovdqu 104(%rsp),%ymm7 + vmovdqu 136(%rsp),%ymm8 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8 + leaq 168(%rsp),%rsp + movq 8(%r11),%r13 + + vpbroadcastq 8(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm5,32(%rsp) + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vmovdqu %ymm8,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm5 + vmovdqu 72(%rsp),%ymm6 + vmovdqu 104(%rsp),%ymm7 + vmovdqu 136(%rsp),%ymm8 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8 + leaq 168(%rsp),%rsp + movq 16(%r11),%r13 + + vpbroadcastq 16(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm5,32(%rsp) + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vmovdqu %ymm8,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm5 + vmovdqu 72(%rsp),%ymm6 + vmovdqu 104(%rsp),%ymm7 + vmovdqu 136(%rsp),%ymm8 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8 + leaq 168(%rsp),%rsp + movq 24(%r11),%r13 + + vpbroadcastq 24(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm5,32(%rsp) + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vmovdqu %ymm8,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm5 + vmovdqu 72(%rsp),%ymm6 + vmovdqu 104(%rsp),%ymm7 + vmovdqu 136(%rsp),%ymm8 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8 + leaq 168(%rsp),%rsp + leaq 32(%r11),%r11 + decl %ebx + jne .Lloop5 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm5,%ymm1 + vpsrlq $52,%ymm6,%ymm2 + vpsrlq $52,%ymm7,%ymm13 + vpsrlq $52,%ymm8,%ymm14 + + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm15 + vblendpd $1,%ymm15,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm2,%ymm15 + vblendpd $1,%ymm15,%ymm13,%ymm13 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm15 + vblendpd $1,%ymm15,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm15 + vblendpd $1,%ymm15,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm5,%ymm5 + vpaddq %ymm2,%ymm6,%ymm6 + vpaddq %ymm13,%ymm7,%ymm7 + vpaddq %ymm14,%ymm8,%ymm8 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm1 + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13 + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm14 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + vmovmskpd %ymm2,%r12d + vmovmskpd %ymm13,%r11d + vmovmskpd %ymm14,%r10d + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm1 + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13 + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm14 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + vmovmskpd %ymm2,%ebx + vmovmskpd %ymm13,%ecx + vmovmskpd %ymm14,%edx + + + + shlb $4,%r13b + orb %r13b,%r14b + shlb $4,%r11b + orb %r11b,%r12b + + addb %r14b,%r14b + adcb %r12b,%r12b + adcb %r10b,%r10b + + shlb $4,%r8b + orb %r8b,%r9b + shlb $4,%cl + orb %cl,%bl + + addb %r9b,%r14b + adcb %bl,%r12b + adcb %dl,%r10b + + xorb %r9b,%r14b + xorb %bl,%r12b + xorb %dl,%r10b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r13b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm3,%ymm3 + + shrb $4,%r13b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm5,%ymm5 + + movb %r12b,%r11b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm6,%ymm6 + + shrb $4,%r11b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm7,%ymm7 + + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm8,%ymm8 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm5,32(%rdi) + vmovdqu %ymm6,64(%rdi) + vmovdqu %ymm7,96(%rdi) + vmovdqu %ymm8,128(%rdi) + + vzeroupper + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbp +.cfi_restore %rbp + movq 40(%rsp),%rbx +.cfi_restore %rbx + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lossl_rsaz_amm52x20_x1_avxifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x20_x1_avxifma256, .-ossl_rsaz_amm52x20_x1_avxifma256 +.section .rodata +.align 32 +.Lmask52x4: +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.Lhigh64x3: +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.Lkmasklut: + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.text + +.globl ossl_rsaz_amm52x20_x2_avxifma256 +.type ossl_rsaz_amm52x20_x2_avxifma256,@function +.align 32 +ossl_rsaz_amm52x20_x2_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lossl_rsaz_amm52x20_x2_avxifma256_body: + + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + vmovapd %ymm0,%ymm11 + vmovapd %ymm0,%ymm12 + + xorl %r9d,%r9d + xorl %r15d,%r15d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + movl $20,%ebx + +.align 32 +.Lloop20: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq (%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm5,32(%rsp) + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vmovdqu %ymm8,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm5 + vmovdqu 72(%rsp),%ymm6 + vmovdqu 104(%rsp),%ymm7 + vmovdqu 136(%rsp),%ymm8 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8 + leaq 168(%rsp),%rsp + movq 160(%r11),%r13 + + vpbroadcastq 160(%r11),%ymm1 + movq 160(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r15 + movq %r12,%r10 + adcq $0,%r10 + + movq 8(%r8),%r13 + imulq %r15,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 160(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r15 + adcq %r12,%r10 + + shrq $52,%r15 + salq $12,%r10 + orq %r10,%r15 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + + + vmovdqu %ymm4,0(%rsp) + vmovdqu %ymm9,32(%rsp) + vmovdqu %ymm10,64(%rsp) + vmovdqu %ymm11,96(%rsp) + vmovdqu %ymm12,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm4 + vmovdqu 40(%rsp),%ymm9 + vmovdqu 72(%rsp),%ymm10 + vmovdqu 104(%rsp),%ymm11 + vmovdqu 136(%rsp),%ymm12 + + addq 8(%rsp),%r15 + +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + leaq 168(%rsp),%rsp + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop20 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm5,%ymm1 + vpsrlq $52,%ymm6,%ymm2 + vpsrlq $52,%ymm7,%ymm13 + vpsrlq $52,%ymm8,%ymm14 + + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm15 + vblendpd $1,%ymm15,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm2,%ymm15 + vblendpd $1,%ymm15,%ymm13,%ymm13 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm15 + vblendpd $1,%ymm15,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm15 + vblendpd $1,%ymm15,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm5,%ymm5 + vpaddq %ymm2,%ymm6,%ymm6 + vpaddq %ymm13,%ymm7,%ymm7 + vpaddq %ymm14,%ymm8,%ymm8 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm1 + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13 + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm14 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + vmovmskpd %ymm2,%r12d + vmovmskpd %ymm13,%r11d + vmovmskpd %ymm14,%r10d + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm1 + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13 + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm14 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + vmovmskpd %ymm2,%ebx + vmovmskpd %ymm13,%ecx + vmovmskpd %ymm14,%edx + + + + shlb $4,%r13b + orb %r13b,%r14b + shlb $4,%r11b + orb %r11b,%r12b + + addb %r14b,%r14b + adcb %r12b,%r12b + adcb %r10b,%r10b + + shlb $4,%r8b + orb %r8b,%r9b + shlb $4,%cl + orb %cl,%bl + + addb %r9b,%r14b + adcb %bl,%r12b + adcb %dl,%r10b + + xorb %r9b,%r14b + xorb %bl,%r12b + xorb %dl,%r10b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r13b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm3,%ymm3 + + shrb $4,%r13b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm5,%ymm5 + + movb %r12b,%r11b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm6,%ymm6 + + shrb $4,%r11b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm7,%ymm7 + + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm8,%ymm8 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + + vmovq %r15,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm4,%ymm4 + + + + vpsrlq $52,%ymm4,%ymm0 + vpsrlq $52,%ymm9,%ymm1 + vpsrlq $52,%ymm10,%ymm2 + vpsrlq $52,%ymm11,%ymm13 + vpsrlq $52,%ymm12,%ymm14 + + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm15 + vblendpd $1,%ymm15,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm2,%ymm15 + vblendpd $1,%ymm15,%ymm13,%ymm13 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm15 + vblendpd $1,%ymm15,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm15 + vblendpd $1,%ymm15,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + + vpaddq %ymm0,%ymm4,%ymm4 + vpaddq %ymm1,%ymm9,%ymm9 + vpaddq %ymm2,%ymm10,%ymm10 + vpaddq %ymm13,%ymm11,%ymm11 + vpaddq %ymm14,%ymm12,%ymm12 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm1 + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13 + vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm14 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + vmovmskpd %ymm2,%r12d + vmovmskpd %ymm13,%r11d + vmovmskpd %ymm14,%r10d + + + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm1 + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13 + vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm14 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + vmovmskpd %ymm2,%ebx + vmovmskpd %ymm13,%ecx + vmovmskpd %ymm14,%edx + + + + shlb $4,%r13b + orb %r13b,%r14b + shlb $4,%r11b + orb %r11b,%r12b + + addb %r14b,%r14b + adcb %r12b,%r12b + adcb %r10b,%r10b + + shlb $4,%r8b + orb %r8b,%r9b + shlb $4,%cl + orb %cl,%bl + + addb %r9b,%r14b + adcb %bl,%r12b + adcb %dl,%r10b + + xorb %r9b,%r14b + xorb %bl,%r12b + xorb %dl,%r10b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r13b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm4,%ymm4 + + shrb $4,%r13b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm9,%ymm9 + + movb %r12b,%r11b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm10,%ymm10 + + shrb $4,%r11b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm11,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm11,%ymm11 + + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm12,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm12,%ymm12 + + + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm5,32(%rdi) + vmovdqu %ymm6,64(%rdi) + vmovdqu %ymm7,96(%rdi) + vmovdqu %ymm8,128(%rdi) + + vmovdqu %ymm4,160(%rdi) + vmovdqu %ymm9,192(%rdi) + vmovdqu %ymm10,224(%rdi) + vmovdqu %ymm11,256(%rdi) + vmovdqu %ymm12,288(%rdi) + + vzeroupper + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbp +.cfi_restore %rbp + movq 40(%rsp),%rbx +.cfi_restore %rbx + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lossl_rsaz_amm52x20_x2_avxifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x20_x2_avxifma256, .-ossl_rsaz_amm52x20_x2_avxifma256 +.text + +.align 32 +.globl ossl_extract_multiplier_2x20_win5_avx +.type ossl_extract_multiplier_2x20_win5_avx,@function +ossl_extract_multiplier_2x20_win5_avx: +.cfi_startproc +.byte 243,15,30,250 + vmovapd .Lones(%rip),%ymm14 + vmovq %rdx,%xmm10 + vpbroadcastq %xmm10,%ymm12 + vmovq %rcx,%xmm10 + vpbroadcastq %xmm10,%ymm13 + leaq 10240(%rsi),%rax + + + vpxor %xmm0,%xmm0,%xmm0 + vmovapd %ymm0,%ymm11 + vmovapd %ymm0,%ymm1 + vmovapd %ymm0,%ymm2 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + +.align 32 +.Lloop: + vpcmpeqq %ymm11,%ymm12,%ymm15 + vmovdqu 0(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm0,%ymm0 + vmovdqu 32(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm1,%ymm1 + vmovdqu 64(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm2,%ymm2 + vmovdqu 96(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm3,%ymm3 + vmovdqu 128(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm4,%ymm4 + vpcmpeqq %ymm11,%ymm13,%ymm15 + vmovdqu 160(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm5,%ymm5 + vmovdqu 192(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm6,%ymm6 + vmovdqu 224(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm7,%ymm7 + vmovdqu 256(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm8,%ymm8 + vmovdqu 288(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm9,%ymm9 + vpaddq %ymm14,%ymm11,%ymm11 + addq $320,%rsi + cmpq %rsi,%rax + jne .Lloop + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm1,32(%rdi) + vmovdqu %ymm2,64(%rdi) + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,128(%rdi) + vmovdqu %ymm5,160(%rdi) + vmovdqu %ymm6,192(%rdi) + vmovdqu %ymm7,224(%rdi) + vmovdqu %ymm8,256(%rdi) + vmovdqu %ymm9,288(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_extract_multiplier_2x20_win5_avx, .-ossl_extract_multiplier_2x20_win5_avx +.section .rodata +.align 32 +.Lones: +.quad 1,1,1,1 +.Lzeros: +.quad 0,0,0,0 + .section ".note.gnu.property", "a" + .p2align 3 + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + # "GNU" encoded with .byte, since .asciz isn't supported + # on Solaris. + .byte 0x47 + .byte 0x4e + .byte 0x55 + .byte 0 +1: + .p2align 3 + .long 0xc0000002 + .long 3f - 2f +2: + .long 3 +3: + .p2align 3 +4: diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s new file mode 100644 index 0000000000..1e07eb24d6 --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-3k-avxifma.s @@ -0,0 +1,1768 @@ +.text + +.globl ossl_rsaz_amm52x30_x1_avxifma256 +.type ossl_rsaz_amm52x30_x1_avxifma256,@function +.align 32 +ossl_rsaz_amm52x30_x1_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + + movl $7,%ebx + +.align 32 +.Lloop7: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + leaq 264(%rsp),%rsp + movq 8(%r11),%r13 + + vpbroadcastq 8(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + leaq 264(%rsp),%rsp + movq 16(%r11),%r13 + + vpbroadcastq 16(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + leaq 264(%rsp),%rsp + movq 24(%r11),%r13 + + vpbroadcastq 24(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + leaq 264(%rsp),%rsp + leaq 32(%r11),%r11 + decl %ebx + jne .Lloop7 + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + leaq 264(%rsp),%rsp + movq 8(%r11),%r13 + + vpbroadcastq 8(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + leaq 264(%rsp),%rsp + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm4,%ymm1 + vpsrlq $52,%ymm5,%ymm2 + vpsrlq $52,%ymm6,%ymm11 + vpsrlq $52,%ymm7,%ymm12 + vpsrlq $52,%ymm8,%ymm13 + vpsrlq $52,%ymm9,%ymm14 + vpsrlq $52,%ymm10,%ymm15 + + leaq -32(%rsp),%rsp + vmovupd %ymm3,(%rsp) + + + vpermq $144,%ymm15,%ymm15 + vpermq $3,%ymm14,%ymm3 + vblendpd $1,%ymm3,%ymm15,%ymm15 + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm3 + vblendpd $1,%ymm3,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm12,%ymm3 + vblendpd $1,%ymm3,%ymm13,%ymm13 + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm3 + vblendpd $1,%ymm3,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm2,%ymm3 + vblendpd $1,%ymm3,%ymm11,%ymm11 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm3 + vblendpd $1,%ymm3,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm3 + vblendpd $1,%ymm3,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + vmovupd (%rsp),%ymm3 + leaq 32(%rsp),%rsp + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm4,%ymm4 + vpaddq %ymm2,%ymm5,%ymm5 + vpaddq %ymm11,%ymm6,%ymm6 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + vpaddq %ymm14,%ymm9,%ymm9 + vpaddq %ymm15,%ymm10,%ymm10 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r13d + vmovmskpd %ymm11,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%r12d + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%r11d + vmovmskpd %ymm15,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r8d + vmovmskpd %ymm11,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%edx + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%ecx + vmovmskpd %ymm15,%ebx + shlb $4,%bl + orb %bl,%cl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r10b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm3,%ymm3 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm4,%ymm4 + + movb %r13b,%r10b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm5,%ymm5 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm6,%ymm6 + + movb %r12b,%r10b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm7,%ymm7 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm8,%ymm8 + + movb %r11b,%r10b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm9,%ymm9 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm10,%ymm10 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm4,32(%rdi) + vmovdqu %ymm5,64(%rdi) + vmovdqu %ymm6,96(%rdi) + vmovdqu %ymm7,128(%rdi) + vmovdqu %ymm8,160(%rdi) + vmovdqu %ymm9,192(%rdi) + vmovdqu %ymm10,224(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x30_x1_avxifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x30_x1_avxifma256, .-ossl_rsaz_amm52x30_x1_avxifma256 +.section .rodata +.align 32 +.Lmask52x4: +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.Lhigh64x3: +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.Lkmasklut: + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.text + +.globl ossl_rsaz_amm52x30_x2_avxifma256 +.type ossl_rsaz_amm52x30_x2_avxifma256,@function +.align 32 +ossl_rsaz_amm52x30_x2_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + movl $30,%ebx + +.align 32 +.Lloop30: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq (%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + leaq 264(%rsp),%rsp + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop30 + + pushq %r11 + pushq %rsi + pushq %rcx + pushq %r8 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm4,%ymm1 + vpsrlq $52,%ymm5,%ymm2 + vpsrlq $52,%ymm6,%ymm11 + vpsrlq $52,%ymm7,%ymm12 + vpsrlq $52,%ymm8,%ymm13 + vpsrlq $52,%ymm9,%ymm14 + vpsrlq $52,%ymm10,%ymm15 + + leaq -32(%rsp),%rsp + vmovupd %ymm3,(%rsp) + + + vpermq $144,%ymm15,%ymm15 + vpermq $3,%ymm14,%ymm3 + vblendpd $1,%ymm3,%ymm15,%ymm15 + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm3 + vblendpd $1,%ymm3,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm12,%ymm3 + vblendpd $1,%ymm3,%ymm13,%ymm13 + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm3 + vblendpd $1,%ymm3,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm2,%ymm3 + vblendpd $1,%ymm3,%ymm11,%ymm11 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm3 + vblendpd $1,%ymm3,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm3 + vblendpd $1,%ymm3,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + vmovupd (%rsp),%ymm3 + leaq 32(%rsp),%rsp + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm4,%ymm4 + vpaddq %ymm2,%ymm5,%ymm5 + vpaddq %ymm11,%ymm6,%ymm6 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + vpaddq %ymm14,%ymm9,%ymm9 + vpaddq %ymm15,%ymm10,%ymm10 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r13d + vmovmskpd %ymm11,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%r12d + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%r11d + vmovmskpd %ymm15,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r8d + vmovmskpd %ymm11,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%edx + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%ecx + vmovmskpd %ymm15,%ebx + shlb $4,%bl + orb %bl,%cl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r10b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm3,%ymm3 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm4,%ymm4 + + movb %r13b,%r10b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm5,%ymm5 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm6,%ymm6 + + movb %r12b,%r10b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm7,%ymm7 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm8,%ymm8 + + movb %r11b,%r10b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm9,%ymm9 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm10,%ymm10 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + popq %r8 + popq %rcx + popq %rsi + popq %r11 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm4,32(%rdi) + vmovdqu %ymm5,64(%rdi) + vmovdqu %ymm6,96(%rdi) + vmovdqu %ymm7,128(%rdi) + vmovdqu %ymm8,160(%rdi) + vmovdqu %ymm9,192(%rdi) + vmovdqu %ymm10,224(%rdi) + + xorl %r15d,%r15d + + leaq 16(%r11),%r11 + movq $0xfffffffffffff,%rax + + movl $30,%ebx + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 +.align 32 +.Lloop40: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 256(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq 8(%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 256(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 320(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 352(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 384(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 416(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 448(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 480(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 320(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 352(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 384(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 416(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 448(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 480(%rcx), %ymm2, %ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 320(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 352(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 384(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 416(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 448(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 480(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 320(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 352(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 384(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 416(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 448(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 480(%rcx), %ymm2, %ymm10 + + leaq 264(%rsp),%rsp + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop40 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm4,%ymm1 + vpsrlq $52,%ymm5,%ymm2 + vpsrlq $52,%ymm6,%ymm11 + vpsrlq $52,%ymm7,%ymm12 + vpsrlq $52,%ymm8,%ymm13 + vpsrlq $52,%ymm9,%ymm14 + vpsrlq $52,%ymm10,%ymm15 + + leaq -32(%rsp),%rsp + vmovupd %ymm3,(%rsp) + + + vpermq $144,%ymm15,%ymm15 + vpermq $3,%ymm14,%ymm3 + vblendpd $1,%ymm3,%ymm15,%ymm15 + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm3 + vblendpd $1,%ymm3,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm12,%ymm3 + vblendpd $1,%ymm3,%ymm13,%ymm13 + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm3 + vblendpd $1,%ymm3,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm2,%ymm3 + vblendpd $1,%ymm3,%ymm11,%ymm11 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm3 + vblendpd $1,%ymm3,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm3 + vblendpd $1,%ymm3,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + vmovupd (%rsp),%ymm3 + leaq 32(%rsp),%rsp + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm4,%ymm4 + vpaddq %ymm2,%ymm5,%ymm5 + vpaddq %ymm11,%ymm6,%ymm6 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + vpaddq %ymm14,%ymm9,%ymm9 + vpaddq %ymm15,%ymm10,%ymm10 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r13d + vmovmskpd %ymm11,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%r12d + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%r11d + vmovmskpd %ymm15,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r8d + vmovmskpd %ymm11,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%edx + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%ecx + vmovmskpd %ymm15,%ebx + shlb $4,%bl + orb %bl,%cl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r10b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm3,%ymm3 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm4,%ymm4 + + movb %r13b,%r10b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm5,%ymm5 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm6,%ymm6 + + movb %r12b,%r10b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm7,%ymm7 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm8,%ymm8 + + movb %r11b,%r10b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm9,%ymm9 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10), %ymm2 + vblendvpd %ymm2,%ymm0,%ymm10,%ymm10 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + + vmovdqu %ymm3,256(%rdi) + vmovdqu %ymm4,288(%rdi) + vmovdqu %ymm5,320(%rdi) + vmovdqu %ymm6,352(%rdi) + vmovdqu %ymm7,384(%rdi) + vmovdqu %ymm8,416(%rdi) + vmovdqu %ymm9,448(%rdi) + vmovdqu %ymm10,480(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x30_x2_avxifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x30_x2_avxifma256, .-ossl_rsaz_amm52x30_x2_avxifma256 +.text + +.align 32 +.globl ossl_extract_multiplier_2x30_win5_avx +.type ossl_extract_multiplier_2x30_win5_avx,@function +ossl_extract_multiplier_2x30_win5_avx: +.cfi_startproc +.byte 243,15,30,250 + vmovapd .Lones(%rip),%ymm12 + vmovq %rdx,%xmm8 + vpbroadcastq %xmm8,%ymm10 + vmovq %rcx,%xmm8 + vpbroadcastq %xmm8,%ymm11 + leaq 16384(%rsi),%rax + + + vpxor %xmm0,%xmm0,%xmm0 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm1 + vmovapd %ymm0,%ymm2 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + +.align 32 +.Lloop: + vpcmpeqq %ymm9,%ymm10,%ymm13 + vmovdqu 0(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm0,%ymm0 + vmovdqu 32(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm1,%ymm1 + vmovdqu 64(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm2,%ymm2 + vmovdqu 96(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm3,%ymm3 + vmovdqu 128(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm4,%ymm4 + vmovdqu 160(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm5,%ymm5 + vmovdqu 192(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm6,%ymm6 + vmovdqu 224(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm7,%ymm7 + vpaddq %ymm12,%ymm9,%ymm9 + addq $512,%rsi + cmpq %rsi,%rax + jne .Lloop + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm1,32(%rdi) + vmovdqu %ymm2,64(%rdi) + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,128(%rdi) + vmovdqu %ymm5,160(%rdi) + vmovdqu %ymm6,192(%rdi) + vmovdqu %ymm7,224(%rdi) + leaq -16384(%rax),%rsi + + + vpxor %xmm0,%xmm0,%xmm0 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm0 + vmovapd %ymm0,%ymm1 + vmovapd %ymm0,%ymm2 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + +.align 32 +.Lloop_8_15: + vpcmpeqq %ymm9,%ymm11,%ymm13 + vmovdqu 256(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm0,%ymm0 + vmovdqu 288(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm1,%ymm1 + vmovdqu 320(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm2,%ymm2 + vmovdqu 352(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm3,%ymm3 + vmovdqu 384(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm4,%ymm4 + vmovdqu 416(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm5,%ymm5 + vmovdqu 448(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm6,%ymm6 + vmovdqu 480(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm7,%ymm7 + vpaddq %ymm12,%ymm9,%ymm9 + addq $512,%rsi + cmpq %rsi,%rax + jne .Lloop_8_15 + vmovdqu %ymm0,256(%rdi) + vmovdqu %ymm1,288(%rdi) + vmovdqu %ymm2,320(%rdi) + vmovdqu %ymm3,352(%rdi) + vmovdqu %ymm4,384(%rdi) + vmovdqu %ymm5,416(%rdi) + vmovdqu %ymm6,448(%rdi) + vmovdqu %ymm7,480(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_extract_multiplier_2x30_win5_avx, .-ossl_extract_multiplier_2x30_win5_avx +.section .rodata +.align 32 +.Lones: +.quad 1,1,1,1 +.Lzeros: +.quad 0,0,0,0 + .section ".note.gnu.property", "a" + .p2align 3 + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + # "GNU" encoded with .byte, since .asciz isn't supported + # on Solaris. + .byte 0x47 + .byte 0x4e + .byte 0x55 + .byte 0 +1: + .p2align 3 + .long 0xc0000002 + .long 3f - 2f +2: + .long 3 +3: + .p2align 3 +4: diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s new file mode 100644 index 0000000000..08a6243d77 --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/bn/rsaz-4k-avxifma.s @@ -0,0 +1,1922 @@ +.text + +.globl ossl_rsaz_amm52x40_x1_avxifma256 +.type ossl_rsaz_amm52x40_x1_avxifma256,@function +.align 32 +ossl_rsaz_amm52x40_x1_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + vmovapd %ymm0,%ymm11 + vmovapd %ymm0,%ymm12 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + + movl $10,%ebx + +.align 32 +.Lloop10: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + leaq 328(%rsp),%rsp + movq 8(%r11),%r13 + + vpbroadcastq 8(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + leaq 328(%rsp),%rsp + movq 16(%r11),%r13 + + vpbroadcastq 16(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + leaq 328(%rsp),%rsp + movq 24(%r11),%r13 + + vpbroadcastq 24(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + leaq 328(%rsp),%rsp + leaq 32(%r11),%r11 + decl %ebx + jne .Lloop10 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + leaq -640(%rsp),%rsp + vmovupd %ymm3,0(%rsp) + vmovupd %ymm4,32(%rsp) + vmovupd %ymm5,64(%rsp) + vmovupd %ymm6,96(%rsp) + vmovupd %ymm7,128(%rsp) + vmovupd %ymm8,160(%rsp) + vmovupd %ymm9,192(%rsp) + vmovupd %ymm10,224(%rsp) + vmovupd %ymm11,256(%rsp) + vmovupd %ymm12,288(%rsp) + + + + vpsrlq $52,%ymm3,%ymm3 + vpsrlq $52,%ymm4,%ymm4 + vpsrlq $52,%ymm5,%ymm5 + vpsrlq $52,%ymm6,%ymm6 + vpsrlq $52,%ymm7,%ymm7 + vpsrlq $52,%ymm8,%ymm8 + vpsrlq $52,%ymm9,%ymm9 + vpsrlq $52,%ymm10,%ymm10 + vpsrlq $52,%ymm11,%ymm11 + vpsrlq $52,%ymm12,%ymm12 + + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm13 + vblendpd $1,%ymm13,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm10,%ymm13 + vblendpd $1,%ymm13,%ymm11,%ymm11 + + vpermq $144,%ymm10,%ymm10 + vpermq $3,%ymm9,%ymm13 + vblendpd $1,%ymm13,%ymm10,%ymm10 + + vpermq $144,%ymm9,%ymm9 + vpermq $3,%ymm8,%ymm13 + vblendpd $1,%ymm13,%ymm9,%ymm9 + + vpermq $144,%ymm8,%ymm8 + vpermq $3,%ymm7,%ymm13 + vblendpd $1,%ymm13,%ymm8,%ymm8 + + vpermq $144,%ymm7,%ymm7 + vpermq $3,%ymm6,%ymm13 + vblendpd $1,%ymm13,%ymm7,%ymm7 + + vpermq $144,%ymm6,%ymm6 + vpermq $3,%ymm5,%ymm13 + vblendpd $1,%ymm13,%ymm6,%ymm6 + + vpermq $144,%ymm5,%ymm5 + vpermq $3,%ymm4,%ymm13 + vblendpd $1,%ymm13,%ymm5,%ymm5 + + vpermq $144,%ymm4,%ymm4 + vpermq $3,%ymm3,%ymm13 + vblendpd $1,%ymm13,%ymm4,%ymm4 + + vpermq $144,%ymm3,%ymm3 + vpand .Lhigh64x3(%rip),%ymm3,%ymm3 + + vmovupd %ymm3,320(%rsp) + vmovupd %ymm4,352(%rsp) + vmovupd %ymm5,384(%rsp) + vmovupd %ymm6,416(%rsp) + vmovupd %ymm7,448(%rsp) + vmovupd %ymm8,480(%rsp) + vmovupd %ymm9,512(%rsp) + vmovupd %ymm10,544(%rsp) + vmovupd %ymm11,576(%rsp) + vmovupd %ymm12,608(%rsp) + + vmovupd 0(%rsp),%ymm3 + vmovupd 32(%rsp),%ymm4 + vmovupd 64(%rsp),%ymm5 + vmovupd 96(%rsp),%ymm6 + vmovupd 128(%rsp),%ymm7 + vmovupd 160(%rsp),%ymm8 + vmovupd 192(%rsp),%ymm9 + vmovupd 224(%rsp),%ymm10 + vmovupd 256(%rsp),%ymm11 + vmovupd 288(%rsp),%ymm12 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + + vpaddq 320(%rsp),%ymm3,%ymm3 + vpaddq 352(%rsp),%ymm4,%ymm4 + vpaddq 384(%rsp),%ymm5,%ymm5 + vpaddq 416(%rsp),%ymm6,%ymm6 + vpaddq 448(%rsp),%ymm7,%ymm7 + vpaddq 480(%rsp),%ymm8,%ymm8 + vpaddq 512(%rsp),%ymm9,%ymm9 + vpaddq 544(%rsp),%ymm10,%ymm10 + vpaddq 576(%rsp),%ymm11,%ymm11 + vpaddq 608(%rsp),%ymm12,%ymm12 + + leaq 640(%rsp),%rsp + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r14d + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r13d + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%r12d + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%r11d + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%r10d + vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%r9d + shlb $4,%r9b + orb %r9b,%r10b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + adcb %r10b,%r10b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r9d + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r8d + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%edx + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%ecx + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%ebx + shlb $4,%bl + orb %bl,%cl + + vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%ebx + vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%eax + shlb $4,%al + orb %al,%bl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + adcb %bl,%r10b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + xorb %bl,%r10b + + pushq %r9 + pushq %r8 + + leaq .Lkmasklut(%rip),%r8 + + movb %r14b,%r9b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm13 + shlq $5,%r14 + vmovapd (%r8,%r14), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm3,%ymm3 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm4,%ymm4 + + movb %r13b,%r9b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm13 + shlq $5,%r13 + vmovapd (%r8,%r13), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm5,%ymm5 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm6,%ymm6 + + movb %r12b,%r9b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm13 + shlq $5,%r12 + vmovapd (%r8,%r12), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm7,%ymm7 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm8,%ymm8 + + movb %r11b,%r9b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm13 + shlq $5,%r11 + vmovapd (%r8,%r11), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm9,%ymm9 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm10,%ymm10 + + movb %r10b,%r9b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm11,%ymm13 + shlq $5,%r10 + vmovapd (%r8,%r10), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm11,%ymm11 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm12,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm12,%ymm12 + + popq %r8 + popq %r9 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm4,32(%rdi) + vmovdqu %ymm5,64(%rdi) + vmovdqu %ymm6,96(%rdi) + vmovdqu %ymm7,128(%rdi) + vmovdqu %ymm8,160(%rdi) + vmovdqu %ymm9,192(%rdi) + vmovdqu %ymm10,224(%rdi) + vmovdqu %ymm11,256(%rdi) + vmovdqu %ymm12,288(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x40_x1_avxifma256_epilogue: + + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x40_x1_avxifma256, .-ossl_rsaz_amm52x40_x1_avxifma256 +.section .rodata +.align 32 +.Lmask52x4: +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.Lhigh64x3: +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.Lkmasklut: + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.text + +.globl ossl_rsaz_amm52x40_x2_avxifma256 +.type ossl_rsaz_amm52x40_x2_avxifma256,@function +.align 32 +ossl_rsaz_amm52x40_x2_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + vmovapd %ymm0,%ymm11 + vmovapd %ymm0,%ymm12 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + movl $40,%ebx + +.align 32 +.Lloop40: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq (%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + leaq 328(%rsp),%rsp + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop40 + + pushq %r11 + pushq %rsi + pushq %rcx + pushq %r8 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + leaq -640(%rsp),%rsp + vmovupd %ymm3,0(%rsp) + vmovupd %ymm4,32(%rsp) + vmovupd %ymm5,64(%rsp) + vmovupd %ymm6,96(%rsp) + vmovupd %ymm7,128(%rsp) + vmovupd %ymm8,160(%rsp) + vmovupd %ymm9,192(%rsp) + vmovupd %ymm10,224(%rsp) + vmovupd %ymm11,256(%rsp) + vmovupd %ymm12,288(%rsp) + + + + vpsrlq $52,%ymm3,%ymm3 + vpsrlq $52,%ymm4,%ymm4 + vpsrlq $52,%ymm5,%ymm5 + vpsrlq $52,%ymm6,%ymm6 + vpsrlq $52,%ymm7,%ymm7 + vpsrlq $52,%ymm8,%ymm8 + vpsrlq $52,%ymm9,%ymm9 + vpsrlq $52,%ymm10,%ymm10 + vpsrlq $52,%ymm11,%ymm11 + vpsrlq $52,%ymm12,%ymm12 + + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm13 + vblendpd $1,%ymm13,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm10,%ymm13 + vblendpd $1,%ymm13,%ymm11,%ymm11 + + vpermq $144,%ymm10,%ymm10 + vpermq $3,%ymm9,%ymm13 + vblendpd $1,%ymm13,%ymm10,%ymm10 + + vpermq $144,%ymm9,%ymm9 + vpermq $3,%ymm8,%ymm13 + vblendpd $1,%ymm13,%ymm9,%ymm9 + + vpermq $144,%ymm8,%ymm8 + vpermq $3,%ymm7,%ymm13 + vblendpd $1,%ymm13,%ymm8,%ymm8 + + vpermq $144,%ymm7,%ymm7 + vpermq $3,%ymm6,%ymm13 + vblendpd $1,%ymm13,%ymm7,%ymm7 + + vpermq $144,%ymm6,%ymm6 + vpermq $3,%ymm5,%ymm13 + vblendpd $1,%ymm13,%ymm6,%ymm6 + + vpermq $144,%ymm5,%ymm5 + vpermq $3,%ymm4,%ymm13 + vblendpd $1,%ymm13,%ymm5,%ymm5 + + vpermq $144,%ymm4,%ymm4 + vpermq $3,%ymm3,%ymm13 + vblendpd $1,%ymm13,%ymm4,%ymm4 + + vpermq $144,%ymm3,%ymm3 + vpand .Lhigh64x3(%rip),%ymm3,%ymm3 + + vmovupd %ymm3,320(%rsp) + vmovupd %ymm4,352(%rsp) + vmovupd %ymm5,384(%rsp) + vmovupd %ymm6,416(%rsp) + vmovupd %ymm7,448(%rsp) + vmovupd %ymm8,480(%rsp) + vmovupd %ymm9,512(%rsp) + vmovupd %ymm10,544(%rsp) + vmovupd %ymm11,576(%rsp) + vmovupd %ymm12,608(%rsp) + + vmovupd 0(%rsp),%ymm3 + vmovupd 32(%rsp),%ymm4 + vmovupd 64(%rsp),%ymm5 + vmovupd 96(%rsp),%ymm6 + vmovupd 128(%rsp),%ymm7 + vmovupd 160(%rsp),%ymm8 + vmovupd 192(%rsp),%ymm9 + vmovupd 224(%rsp),%ymm10 + vmovupd 256(%rsp),%ymm11 + vmovupd 288(%rsp),%ymm12 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + + vpaddq 320(%rsp),%ymm3,%ymm3 + vpaddq 352(%rsp),%ymm4,%ymm4 + vpaddq 384(%rsp),%ymm5,%ymm5 + vpaddq 416(%rsp),%ymm6,%ymm6 + vpaddq 448(%rsp),%ymm7,%ymm7 + vpaddq 480(%rsp),%ymm8,%ymm8 + vpaddq 512(%rsp),%ymm9,%ymm9 + vpaddq 544(%rsp),%ymm10,%ymm10 + vpaddq 576(%rsp),%ymm11,%ymm11 + vpaddq 608(%rsp),%ymm12,%ymm12 + + leaq 640(%rsp),%rsp + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r14d + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r13d + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%r12d + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%r11d + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%r10d + vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%r9d + shlb $4,%r9b + orb %r9b,%r10b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + adcb %r10b,%r10b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r9d + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r8d + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%edx + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%ecx + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%ebx + shlb $4,%bl + orb %bl,%cl + + vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%ebx + vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%eax + shlb $4,%al + orb %al,%bl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + adcb %bl,%r10b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + xorb %bl,%r10b + + pushq %r9 + pushq %r8 + + leaq .Lkmasklut(%rip),%r8 + + movb %r14b,%r9b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm13 + shlq $5,%r14 + vmovapd (%r8,%r14), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm3,%ymm3 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm4,%ymm4 + + movb %r13b,%r9b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm13 + shlq $5,%r13 + vmovapd (%r8,%r13), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm5,%ymm5 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm6,%ymm6 + + movb %r12b,%r9b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm13 + shlq $5,%r12 + vmovapd (%r8,%r12), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm7,%ymm7 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm8,%ymm8 + + movb %r11b,%r9b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm13 + shlq $5,%r11 + vmovapd (%r8,%r11), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm9,%ymm9 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm10,%ymm10 + + movb %r10b,%r9b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm11,%ymm13 + shlq $5,%r10 + vmovapd (%r8,%r10), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm11,%ymm11 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm12,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm12,%ymm12 + + popq %r8 + popq %r9 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + popq %r8 + popq %rcx + popq %rsi + popq %r11 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm4,32(%rdi) + vmovdqu %ymm5,64(%rdi) + vmovdqu %ymm6,96(%rdi) + vmovdqu %ymm7,128(%rdi) + vmovdqu %ymm8,160(%rdi) + vmovdqu %ymm9,192(%rdi) + vmovdqu %ymm10,224(%rdi) + vmovdqu %ymm11,256(%rdi) + vmovdqu %ymm12,288(%rdi) + + xorl %r15d,%r15d + + movq $0xfffffffffffff,%rax + + movl $40,%ebx + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + vmovapd %ymm0,%ymm11 + vmovapd %ymm0,%ymm12 +.align 32 +.Lloop40_1: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 320(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq 8(%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 320(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 320(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 352(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 384(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 416(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 448(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 480(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 512(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 544(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 576(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 608(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 320(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 352(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 384(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 416(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 448(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 480(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 512(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 544(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 576(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 608(%rcx), %ymm2, %ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 320(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 352(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 384(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 416(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 448(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 480(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 512(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 544(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 576(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 608(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 320(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 352(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 384(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 416(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 448(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 480(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 512(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 544(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 576(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 608(%rcx), %ymm2, %ymm12 + leaq 328(%rsp),%rsp + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop40_1 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + leaq -640(%rsp),%rsp + vmovupd %ymm3,0(%rsp) + vmovupd %ymm4,32(%rsp) + vmovupd %ymm5,64(%rsp) + vmovupd %ymm6,96(%rsp) + vmovupd %ymm7,128(%rsp) + vmovupd %ymm8,160(%rsp) + vmovupd %ymm9,192(%rsp) + vmovupd %ymm10,224(%rsp) + vmovupd %ymm11,256(%rsp) + vmovupd %ymm12,288(%rsp) + + + + vpsrlq $52,%ymm3,%ymm3 + vpsrlq $52,%ymm4,%ymm4 + vpsrlq $52,%ymm5,%ymm5 + vpsrlq $52,%ymm6,%ymm6 + vpsrlq $52,%ymm7,%ymm7 + vpsrlq $52,%ymm8,%ymm8 + vpsrlq $52,%ymm9,%ymm9 + vpsrlq $52,%ymm10,%ymm10 + vpsrlq $52,%ymm11,%ymm11 + vpsrlq $52,%ymm12,%ymm12 + + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm13 + vblendpd $1,%ymm13,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm10,%ymm13 + vblendpd $1,%ymm13,%ymm11,%ymm11 + + vpermq $144,%ymm10,%ymm10 + vpermq $3,%ymm9,%ymm13 + vblendpd $1,%ymm13,%ymm10,%ymm10 + + vpermq $144,%ymm9,%ymm9 + vpermq $3,%ymm8,%ymm13 + vblendpd $1,%ymm13,%ymm9,%ymm9 + + vpermq $144,%ymm8,%ymm8 + vpermq $3,%ymm7,%ymm13 + vblendpd $1,%ymm13,%ymm8,%ymm8 + + vpermq $144,%ymm7,%ymm7 + vpermq $3,%ymm6,%ymm13 + vblendpd $1,%ymm13,%ymm7,%ymm7 + + vpermq $144,%ymm6,%ymm6 + vpermq $3,%ymm5,%ymm13 + vblendpd $1,%ymm13,%ymm6,%ymm6 + + vpermq $144,%ymm5,%ymm5 + vpermq $3,%ymm4,%ymm13 + vblendpd $1,%ymm13,%ymm5,%ymm5 + + vpermq $144,%ymm4,%ymm4 + vpermq $3,%ymm3,%ymm13 + vblendpd $1,%ymm13,%ymm4,%ymm4 + + vpermq $144,%ymm3,%ymm3 + vpand .Lhigh64x3(%rip),%ymm3,%ymm3 + + vmovupd %ymm3,320(%rsp) + vmovupd %ymm4,352(%rsp) + vmovupd %ymm5,384(%rsp) + vmovupd %ymm6,416(%rsp) + vmovupd %ymm7,448(%rsp) + vmovupd %ymm8,480(%rsp) + vmovupd %ymm9,512(%rsp) + vmovupd %ymm10,544(%rsp) + vmovupd %ymm11,576(%rsp) + vmovupd %ymm12,608(%rsp) + + vmovupd 0(%rsp),%ymm3 + vmovupd 32(%rsp),%ymm4 + vmovupd 64(%rsp),%ymm5 + vmovupd 96(%rsp),%ymm6 + vmovupd 128(%rsp),%ymm7 + vmovupd 160(%rsp),%ymm8 + vmovupd 192(%rsp),%ymm9 + vmovupd 224(%rsp),%ymm10 + vmovupd 256(%rsp),%ymm11 + vmovupd 288(%rsp),%ymm12 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + + vpaddq 320(%rsp),%ymm3,%ymm3 + vpaddq 352(%rsp),%ymm4,%ymm4 + vpaddq 384(%rsp),%ymm5,%ymm5 + vpaddq 416(%rsp),%ymm6,%ymm6 + vpaddq 448(%rsp),%ymm7,%ymm7 + vpaddq 480(%rsp),%ymm8,%ymm8 + vpaddq 512(%rsp),%ymm9,%ymm9 + vpaddq 544(%rsp),%ymm10,%ymm10 + vpaddq 576(%rsp),%ymm11,%ymm11 + vpaddq 608(%rsp),%ymm12,%ymm12 + + leaq 640(%rsp),%rsp + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r14d + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r13d + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%r12d + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%r11d + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%r10d + vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%r9d + shlb $4,%r9b + orb %r9b,%r10b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + adcb %r10b,%r10b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r9d + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r8d + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%edx + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%ecx + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%ebx + shlb $4,%bl + orb %bl,%cl + + vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%ebx + vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%eax + shlb $4,%al + orb %al,%bl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + adcb %bl,%r10b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + xorb %bl,%r10b + + pushq %r9 + pushq %r8 + + leaq .Lkmasklut(%rip),%r8 + + movb %r14b,%r9b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm13 + shlq $5,%r14 + vmovapd (%r8,%r14), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm3,%ymm3 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm4,%ymm4 + + movb %r13b,%r9b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm13 + shlq $5,%r13 + vmovapd (%r8,%r13), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm5,%ymm5 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm6,%ymm6 + + movb %r12b,%r9b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm13 + shlq $5,%r12 + vmovapd (%r8,%r12), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm7,%ymm7 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm8,%ymm8 + + movb %r11b,%r9b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm13 + shlq $5,%r11 + vmovapd (%r8,%r11), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm9,%ymm9 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm10,%ymm10 + + movb %r10b,%r9b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm11,%ymm13 + shlq $5,%r10 + vmovapd (%r8,%r10), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm11,%ymm11 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm12,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9), %ymm14 + vblendvpd %ymm14,%ymm13,%ymm12,%ymm12 + + popq %r8 + popq %r9 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + vmovdqu %ymm3,320(%rdi) + vmovdqu %ymm4,352(%rdi) + vmovdqu %ymm5,384(%rdi) + vmovdqu %ymm6,416(%rdi) + vmovdqu %ymm7,448(%rdi) + vmovdqu %ymm8,480(%rdi) + vmovdqu %ymm9,512(%rdi) + vmovdqu %ymm10,544(%rdi) + vmovdqu %ymm11,576(%rdi) + vmovdqu %ymm12,608(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x40_x2_avxifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x40_x2_avxifma256, .-ossl_rsaz_amm52x40_x2_avxifma256 +.text + +.align 32 +.globl ossl_extract_multiplier_2x40_win5_avx +.type ossl_extract_multiplier_2x40_win5_avx,@function +ossl_extract_multiplier_2x40_win5_avx: +.cfi_startproc +.byte 243,15,30,250 + vmovapd .Lones(%rip),%ymm14 + vmovq %rdx,%xmm10 + vpbroadcastq %xmm10,%ymm12 + vmovq %rcx,%xmm10 + vpbroadcastq %xmm10,%ymm13 + leaq 20480(%rsi),%rax + + + movq %rsi,%r10 + + + vpxor %xmm0,%xmm0,%xmm0 + vmovapd %ymm0,%ymm1 + vmovapd %ymm0,%ymm2 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vpxor %ymm11,%ymm11,%ymm11 +.align 32 +.Lloop_0: + vpcmpeqq %ymm11,%ymm12,%ymm15 + vmovdqu 0(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm0,%ymm0 + vmovdqu 32(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm1,%ymm1 + vmovdqu 64(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm2,%ymm2 + vmovdqu 96(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm3,%ymm3 + vmovdqu 128(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm4,%ymm4 + vmovdqu 160(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm5,%ymm5 + vmovdqu 192(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm6,%ymm6 + vmovdqu 224(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm7,%ymm7 + vmovdqu 256(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm8,%ymm8 + vmovdqu 288(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm9,%ymm9 + vpaddq %ymm14,%ymm11,%ymm11 + addq $640,%rsi + cmpq %rsi,%rax + jne .Lloop_0 + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm1,32(%rdi) + vmovdqu %ymm2,64(%rdi) + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,128(%rdi) + vmovdqu %ymm5,160(%rdi) + vmovdqu %ymm6,192(%rdi) + vmovdqu %ymm7,224(%rdi) + vmovdqu %ymm8,256(%rdi) + vmovdqu %ymm9,288(%rdi) + movq %r10,%rsi + vpxor %ymm11,%ymm11,%ymm11 +.align 32 +.Lloop_320: + vpcmpeqq %ymm11,%ymm13,%ymm15 + vmovdqu 320(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm0,%ymm0 + vmovdqu 352(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm1,%ymm1 + vmovdqu 384(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm2,%ymm2 + vmovdqu 416(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm3,%ymm3 + vmovdqu 448(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm4,%ymm4 + vmovdqu 480(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm5,%ymm5 + vmovdqu 512(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm6,%ymm6 + vmovdqu 544(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm7,%ymm7 + vmovdqu 576(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm8,%ymm8 + vmovdqu 608(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm9,%ymm9 + vpaddq %ymm14,%ymm11,%ymm11 + addq $640,%rsi + cmpq %rsi,%rax + jne .Lloop_320 + vmovdqu %ymm0,320(%rdi) + vmovdqu %ymm1,352(%rdi) + vmovdqu %ymm2,384(%rdi) + vmovdqu %ymm3,416(%rdi) + vmovdqu %ymm4,448(%rdi) + vmovdqu %ymm5,480(%rdi) + vmovdqu %ymm6,512(%rdi) + vmovdqu %ymm7,544(%rdi) + vmovdqu %ymm8,576(%rdi) + vmovdqu %ymm9,608(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_extract_multiplier_2x40_win5_avx, .-ossl_extract_multiplier_2x40_win5_avx +.section .rodata +.align 32 +.Lones: +.quad 1,1,1,1 +.Lzeros: +.quad 0,0,0,0 + .section ".note.gnu.property", "a" + .p2align 3 + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + # "GNU" encoded with .byte, since .asciz isn't supported + # on Solaris. + .byte 0x47 + .byte 0x4e + .byte 0x55 + .byte 0 +1: + .p2align 3 + .long 0xc0000002 + .long 3f - 2f +2: + .long 3 +3: + .p2align 3 +4: diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s index 40bfc69f38..27a5a80375 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/md5/md5-x86_64.s @@ -201,7 +201,7 @@ ossl_md5_block_asm_data_order: leal -165796510(%rax,%r10,1),%eax andl %ecx,%r11d movl 24(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%eax movl %ecx,%r11d addl %r12d,%eax movl %ecx,%r12d @@ -212,7 +212,7 @@ ossl_md5_block_asm_data_order: leal -1069501632(%rdx,%r10,1),%edx andl %ebx,%r11d movl 44(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%edx movl %ebx,%r11d addl %r12d,%edx movl %ebx,%r12d @@ -223,7 +223,7 @@ ossl_md5_block_asm_data_order: leal 643717713(%rcx,%r10,1),%ecx andl %eax,%r11d movl 0(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%ecx movl %eax,%r11d addl %r12d,%ecx movl %eax,%r12d @@ -234,7 +234,7 @@ ossl_md5_block_asm_data_order: leal -373897302(%rbx,%r10,1),%ebx andl %edx,%r11d movl 20(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%ebx movl %edx,%r11d addl %r12d,%ebx movl %edx,%r12d @@ -245,7 +245,7 @@ ossl_md5_block_asm_data_order: leal -701558691(%rax,%r10,1),%eax andl %ecx,%r11d movl 40(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%eax movl %ecx,%r11d addl %r12d,%eax movl %ecx,%r12d @@ -256,7 +256,7 @@ ossl_md5_block_asm_data_order: leal 38016083(%rdx,%r10,1),%edx andl %ebx,%r11d movl 60(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%edx movl %ebx,%r11d addl %r12d,%edx movl %ebx,%r12d @@ -267,7 +267,7 @@ ossl_md5_block_asm_data_order: leal -660478335(%rcx,%r10,1),%ecx andl %eax,%r11d movl 16(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%ecx movl %eax,%r11d addl %r12d,%ecx movl %eax,%r12d @@ -278,7 +278,7 @@ ossl_md5_block_asm_data_order: leal -405537848(%rbx,%r10,1),%ebx andl %edx,%r11d movl 36(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%ebx movl %edx,%r11d addl %r12d,%ebx movl %edx,%r12d @@ -289,7 +289,7 @@ ossl_md5_block_asm_data_order: leal 568446438(%rax,%r10,1),%eax andl %ecx,%r11d movl 56(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%eax movl %ecx,%r11d addl %r12d,%eax movl %ecx,%r12d @@ -300,7 +300,7 @@ ossl_md5_block_asm_data_order: leal -1019803690(%rdx,%r10,1),%edx andl %ebx,%r11d movl 12(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%edx movl %ebx,%r11d addl %r12d,%edx movl %ebx,%r12d @@ -311,7 +311,7 @@ ossl_md5_block_asm_data_order: leal -187363961(%rcx,%r10,1),%ecx andl %eax,%r11d movl 32(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%ecx movl %eax,%r11d addl %r12d,%ecx movl %eax,%r12d @@ -322,7 +322,7 @@ ossl_md5_block_asm_data_order: leal 1163531501(%rbx,%r10,1),%ebx andl %edx,%r11d movl 52(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%ebx movl %edx,%r11d addl %r12d,%ebx movl %edx,%r12d @@ -333,7 +333,7 @@ ossl_md5_block_asm_data_order: leal -1444681467(%rax,%r10,1),%eax andl %ecx,%r11d movl 8(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%eax movl %ecx,%r11d addl %r12d,%eax movl %ecx,%r12d @@ -344,7 +344,7 @@ ossl_md5_block_asm_data_order: leal -51403784(%rdx,%r10,1),%edx andl %ebx,%r11d movl 28(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%edx movl %ebx,%r11d addl %r12d,%edx movl %ebx,%r12d @@ -355,7 +355,7 @@ ossl_md5_block_asm_data_order: leal 1735328473(%rcx,%r10,1),%ecx andl %eax,%r11d movl 48(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%ecx movl %eax,%r11d addl %r12d,%ecx movl %eax,%r12d @@ -366,7 +366,7 @@ ossl_md5_block_asm_data_order: leal -1926607734(%rbx,%r10,1),%ebx andl %edx,%r11d movl 20(%rsi),%r10d - orl %r11d,%r12d + addl %r11d,%ebx movl %edx,%r11d addl %r12d,%ebx movl %edx,%r12d diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s index 5fda386d1d..4fb26cc6e2 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-GCC/crypto/x86_64cpuid.s @@ -1,11 +1,11 @@ + .hidden OPENSSL_cpuid_setup .section .init call OPENSSL_cpuid_setup .hidden OPENSSL_ia32cap_P -.comm OPENSSL_ia32cap_P,16,4 - +.comm OPENSSL_ia32cap_P,40,4 .text .globl OPENSSL_atomic_add @@ -163,6 +163,7 @@ OPENSSL_ia32_cpuid: movl $7,%eax xorl %ecx,%ecx cpuid + movd %eax,%xmm1 btl $26,%r9d jc .Lnotknights andl $0xfff7ffff,%ebx @@ -173,9 +174,31 @@ OPENSSL_ia32_cpuid: jne .Lnotskylakex andl $0xfffeffff,%ebx + .Lnotskylakex: movl %ebx,8(%rdi) movl %ecx,12(%rdi) + movl %edx,16(%rdi) + + movd %xmm1,%eax + cmpl $0x1,%eax + jb .Lno_extended_info + movl $0x7,%eax + movl $0x1,%ecx + cpuid + movl %eax,20(%rdi) + movl %edx,24(%rdi) + movl %ebx,28(%rdi) + movl %ecx,32(%rdi) + + andl $0x80000,%edx + cmpl $0x0,%edx + je .Lno_extended_info + movl $0x24,%eax + movl $0x0,%ecx + cpuid + movl %ebx,36(%rdi) + .Lno_extended_info: btl $27,%r9d @@ -194,6 +217,9 @@ OPENSSL_ia32_cpuid: cmpl $6,%eax je .Ldone .Lclear_avx: + andl $0xff7fffff,20(%rdi) + + movl $0xefffe7ff,%eax andl %eax,%r9d movl $0x3fdeffdf,%eax diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm new file mode 100644 index 0000000000..cf6644f9e4 --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm @@ -0,0 +1,8350 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + +EXTERN OPENSSL_ia32cap_P +global aesni_xts_avx512_eligible + +ALIGN 32 +aesni_xts_avx512_eligible: + mov ecx,DWORD[((OPENSSL_ia32cap_P+8))] + xor eax,eax + + and ecx,0xc0030000 + cmp ecx,0xc0030000 + jne NEAR $L$_done + mov ecx,DWORD[((OPENSSL_ia32cap_P+12))] + + and ecx,0x640 + cmp ecx,0x640 + cmove eax,ecx +$L$_done: + DB 0F3h,0C3h ;repret + +global aesni_xts_128_encrypt_avx512 + + +ALIGN 32 +aesni_xts_128_encrypt_avx512: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_128_encrypt_avx512: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +DB 243,15,30,250 + push rbp + mov rbp,rsp + sub rsp,312 + and rsp,0xffffffffffffffc0 + mov QWORD[288+rsp],rbx + mov QWORD[((288 + 8))+rsp],rdi + mov QWORD[((288 + 16))+rsp],rsi + vmovdqa XMMWORD[(128 + 0)+rsp],xmm6 + vmovdqa XMMWORD[(128 + 16)+rsp],xmm7 + vmovdqa XMMWORD[(128 + 32)+rsp],xmm8 + vmovdqa XMMWORD[(128 + 48)+rsp],xmm9 + vmovdqa XMMWORD[(128 + 64)+rsp],xmm10 + vmovdqa XMMWORD[(128 + 80)+rsp],xmm11 + vmovdqa XMMWORD[(128 + 96)+rsp],xmm12 + vmovdqa XMMWORD[(128 + 112)+rsp],xmm13 + vmovdqa XMMWORD[(128 + 128)+rsp],xmm14 + vmovdqa XMMWORD[(128 + 144)+rsp],xmm15 + mov r10,0x87 + vmovdqu xmm1,XMMWORD[r9] + vpxor xmm1,xmm1,XMMWORD[r8] + vaesenc xmm1,xmm1,XMMWORD[16+r8] + vaesenc xmm1,xmm1,XMMWORD[32+r8] + vaesenc xmm1,xmm1,XMMWORD[48+r8] + vaesenc xmm1,xmm1,XMMWORD[64+r8] + vaesenc xmm1,xmm1,XMMWORD[80+r8] + vaesenc xmm1,xmm1,XMMWORD[96+r8] + vaesenc xmm1,xmm1,XMMWORD[112+r8] + vaesenc xmm1,xmm1,XMMWORD[128+r8] + vaesenc xmm1,xmm1,XMMWORD[144+r8] + vaesenclast xmm1,xmm1,XMMWORD[160+r8] + vmovdqa XMMWORD[rsp],xmm1 + mov QWORD[((8 + 40))+rbp],rdi + mov QWORD[((8 + 48))+rbp],rsi + + cmp rdx,0x80 + jl NEAR $L$_less_than_128_bytes_hEgxyDlCngwrfFe + vpbroadcastq zmm25,r10 + cmp rdx,0x100 + jge NEAR $L$_start_by16_hEgxyDlCngwrfFe + cmp rdx,0x80 + jge NEAR $L$_start_by8_hEgxyDlCngwrfFe + +$L$_do_n_blocks_hEgxyDlCngwrfFe: + cmp rdx,0x0 + je NEAR $L$_ret_hEgxyDlCngwrfFe + cmp rdx,0x70 + jge NEAR $L$_remaining_num_blocks_is_7_hEgxyDlCngwrfFe + cmp rdx,0x60 + jge NEAR $L$_remaining_num_blocks_is_6_hEgxyDlCngwrfFe + cmp rdx,0x50 + jge NEAR $L$_remaining_num_blocks_is_5_hEgxyDlCngwrfFe + cmp rdx,0x40 + jge NEAR $L$_remaining_num_blocks_is_4_hEgxyDlCngwrfFe + cmp rdx,0x30 + jge NEAR $L$_remaining_num_blocks_is_3_hEgxyDlCngwrfFe + cmp rdx,0x20 + jge NEAR $L$_remaining_num_blocks_is_2_hEgxyDlCngwrfFe + cmp rdx,0x10 + jge NEAR $L$_remaining_num_blocks_is_1_hEgxyDlCngwrfFe + vmovdqa xmm8,xmm0 + vmovdqa xmm0,xmm9 + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe + +$L$_remaining_num_blocks_is_7_hEgxyDlCngwrfFe: + mov r8,0x0000ffffffffffff + kmovq k1,r8 + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2{k1},[64+rdi] + add rdi,0x70 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2 + add rsi,0x70 + vextracti32x4 xmm8,zmm2,0x2 + vextracti32x4 xmm0,zmm10,0x3 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe + +$L$_remaining_num_blocks_is_6_hEgxyDlCngwrfFe: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 ymm2,YMMWORD[64+rdi] + add rdi,0x60 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 YMMWORD[64+rsi],ymm2 + add rsi,0x60 + vextracti32x4 xmm8,zmm2,0x1 + vextracti32x4 xmm0,zmm10,0x2 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe + +$L$_remaining_num_blocks_is_5_hEgxyDlCngwrfFe: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu xmm2,XMMWORD[64+rdi] + add rdi,0x50 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu XMMWORD[64+rsi],xmm2 + add rsi,0x50 + vmovdqa xmm8,xmm2 + vextracti32x4 xmm0,zmm10,0x1 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe + +$L$_remaining_num_blocks_is_4_hEgxyDlCngwrfFe: + vmovdqu8 zmm1,ZMMWORD[rdi] + add rdi,0x40 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vpxorq zmm1,zmm1,zmm9 + vmovdqu8 ZMMWORD[rsi],zmm1 + add rsi,0x40 + vextracti32x4 xmm8,zmm1,0x3 + vmovdqa64 xmm0,xmm10 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe +$L$_remaining_num_blocks_is_3_hEgxyDlCngwrfFe: + mov r8,-1 + shr r8,0x10 + kmovq k1,r8 + vmovdqu8 zmm1{k1},[rdi] + add rdi,0x30 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vpxorq zmm1,zmm1,zmm9 + vmovdqu8 ZMMWORD[rsi]{k1},zmm1 + add rsi,0x30 + vextracti32x4 xmm8,zmm1,0x2 + vextracti32x4 xmm0,zmm9,0x3 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe +$L$_remaining_num_blocks_is_2_hEgxyDlCngwrfFe: + vmovdqu8 ymm1,YMMWORD[rdi] + add rdi,0x20 + vbroadcasti32x4 ymm0,YMMWORD[rcx] + vpternlogq ymm1,ymm9,ymm0,0x96 + vbroadcasti32x4 ymm0,YMMWORD[16+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[32+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[48+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[64+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[80+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[96+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[112+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[128+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[144+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[160+rcx] + vaesenclast ymm1,ymm1,ymm0 + vpxorq ymm1,ymm1,ymm9 + vmovdqu YMMWORD[rsi],ymm1 + add rsi,0x20 + vextracti32x4 xmm8,zmm1,0x1 + vextracti32x4 xmm0,zmm9,0x2 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe +$L$_remaining_num_blocks_is_1_hEgxyDlCngwrfFe: + vmovdqu xmm1,XMMWORD[rdi] + add rdi,0x10 + vpxor xmm1,xmm1,xmm9 + vpxor xmm1,xmm1,XMMWORD[rcx] + vaesenc xmm1,xmm1,XMMWORD[16+rcx] + vaesenc xmm1,xmm1,XMMWORD[32+rcx] + vaesenc xmm1,xmm1,XMMWORD[48+rcx] + vaesenc xmm1,xmm1,XMMWORD[64+rcx] + vaesenc xmm1,xmm1,XMMWORD[80+rcx] + vaesenc xmm1,xmm1,XMMWORD[96+rcx] + vaesenc xmm1,xmm1,XMMWORD[112+rcx] + vaesenc xmm1,xmm1,XMMWORD[128+rcx] + vaesenc xmm1,xmm1,XMMWORD[144+rcx] + vaesenclast xmm1,xmm1,XMMWORD[160+rcx] + vpxor xmm1,xmm1,xmm9 + vmovdqu XMMWORD[rsi],xmm1 + add rsi,0x10 + vmovdqa xmm8,xmm1 + vextracti32x4 xmm0,zmm9,0x1 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe + + +$L$_start_by16_hEgxyDlCngwrfFe: + vbroadcasti32x4 zmm0,ZMMWORD[rsp] + vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7] + mov r8,0xaa + kmovq k2,r8 + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x0 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x0 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + vpsrldq zmm13,zmm9,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm11,zmm9,0x1 + vpxord zmm11,zmm11,zmm14 + vpsrldq zmm15,zmm10,0xf + vpclmulqdq zmm16,zmm15,zmm25,0x0 + vpslldq zmm12,zmm10,0x1 + vpxord zmm12,zmm12,zmm16 + +$L$_main_loop_run_16_hEgxyDlCngwrfFe: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2,ZMMWORD[64+rdi] + vmovdqu8 zmm3,ZMMWORD[128+rdi] + vmovdqu8 zmm4,ZMMWORD[192+rdi] + add rdi,0x100 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vpxorq zmm3,zmm3,zmm11 + vpxorq zmm4,zmm4,zmm12 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vpxorq zmm3,zmm3,zmm0 + vpxorq zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm11,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm15,zmm11,0x1 + vpxord zmm15,zmm15,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm12,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm16,zmm12,0x1 + vpxord zmm16,zmm16,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm15,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm17,zmm15,0x1 + vpxord zmm17,zmm17,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm16,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm18,zmm16,0x1 + vpxord zmm18,zmm18,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vaesenclast zmm3,zmm3,zmm0 + vaesenclast zmm4,zmm4,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vpxorq zmm3,zmm3,zmm11 + vpxorq zmm4,zmm4,zmm12 + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqa32 zmm11,zmm17 + vmovdqa32 zmm12,zmm18 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi],zmm2 + vmovdqu8 ZMMWORD[128+rsi],zmm3 + vmovdqu8 ZMMWORD[192+rsi],zmm4 + add rsi,0x100 + sub rdx,0x100 + cmp rdx,0x100 + jae NEAR $L$_main_loop_run_16_hEgxyDlCngwrfFe + cmp rdx,0x80 + jae NEAR $L$_main_loop_run_8_hEgxyDlCngwrfFe + vextracti32x4 xmm0,zmm4,0x3 + jmp NEAR $L$_do_n_blocks_hEgxyDlCngwrfFe + +$L$_start_by8_hEgxyDlCngwrfFe: + vbroadcasti32x4 zmm0,ZMMWORD[rsp] + vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7] + mov r8,0xaa + kmovq k2,r8 + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x0 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x0 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + +$L$_main_loop_run_8_hEgxyDlCngwrfFe: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2,ZMMWORD[64+rdi] + add rdi,0x80 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vpsrldq zmm13,zmm9,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm15,zmm9,0x1 + vpxord zmm15,zmm15,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vpsrldq zmm13,zmm10,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm16,zmm10,0x1 + vpxord zmm16,zmm16,zmm14 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi],zmm2 + add rsi,0x80 + sub rdx,0x80 + cmp rdx,0x80 + jae NEAR $L$_main_loop_run_8_hEgxyDlCngwrfFe + vextracti32x4 xmm0,zmm2,0x3 + jmp NEAR $L$_do_n_blocks_hEgxyDlCngwrfFe + +$L$_steal_cipher_hEgxyDlCngwrfFe: + vmovdqa xmm2,xmm8 + lea rax,[vpshufb_shf_table] + vmovdqu xmm10,XMMWORD[rdx*1+rax] + vpshufb xmm8,xmm8,xmm10 + vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi] + vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8 + lea rax,[vpshufb_shf_table] + add rax,16 + sub rax,rdx + vmovdqu xmm10,XMMWORD[rax] + vpxor xmm10,xmm10,XMMWORD[mask1] + vpshufb xmm3,xmm3,xmm10 + vpblendvb xmm3,xmm3,xmm2,xmm10 + vpxor xmm8,xmm3,xmm0 + vpxor xmm8,xmm8,XMMWORD[rcx] + vaesenc xmm8,xmm8,XMMWORD[16+rcx] + vaesenc xmm8,xmm8,XMMWORD[32+rcx] + vaesenc xmm8,xmm8,XMMWORD[48+rcx] + vaesenc xmm8,xmm8,XMMWORD[64+rcx] + vaesenc xmm8,xmm8,XMMWORD[80+rcx] + vaesenc xmm8,xmm8,XMMWORD[96+rcx] + vaesenc xmm8,xmm8,XMMWORD[112+rcx] + vaesenc xmm8,xmm8,XMMWORD[128+rcx] + vaesenc xmm8,xmm8,XMMWORD[144+rcx] + vaesenclast xmm8,xmm8,XMMWORD[160+rcx] + vpxor xmm8,xmm8,xmm0 + vmovdqu XMMWORD[(-16)+rsi],xmm8 +$L$_ret_hEgxyDlCngwrfFe: + mov rbx,QWORD[288+rsp] + xor r8,r8 + mov QWORD[288+rsp],r8 + + vpxorq zmm0,zmm0,zmm0 + mov rdi,QWORD[((288 + 8))+rsp] + mov QWORD[((288 + 8))+rsp],r8 + mov rsi,QWORD[((288 + 16))+rsp] + mov QWORD[((288 + 16))+rsp],r8 + + vmovdqa xmm6,XMMWORD[((128 + 0))+rsp] + vmovdqa xmm7,XMMWORD[((128 + 16))+rsp] + vmovdqa xmm8,XMMWORD[((128 + 32))+rsp] + vmovdqa xmm9,XMMWORD[((128 + 48))+rsp] + + + vmovdqa64 ZMMWORD[128+rsp],zmm0 + + vmovdqa xmm10,XMMWORD[((128 + 64))+rsp] + vmovdqa xmm11,XMMWORD[((128 + 80))+rsp] + vmovdqa xmm12,XMMWORD[((128 + 96))+rsp] + vmovdqa xmm13,XMMWORD[((128 + 112))+rsp] + + + vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0 + + vmovdqa xmm14,XMMWORD[((128 + 128))+rsp] + vmovdqa xmm15,XMMWORD[((128 + 144))+rsp] + + + + vmovdqa YMMWORD[(128 + 128)+rsp],ymm0 + mov rsp,rbp + pop rbp + vzeroupper + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$_less_than_128_bytes_hEgxyDlCngwrfFe: + vpbroadcastq zmm25,r10 + cmp rdx,0x10 + jb NEAR $L$_ret_hEgxyDlCngwrfFe + vbroadcasti32x4 zmm0,ZMMWORD[rsp] + vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7] + mov r8d,0xaa + kmovq k2,r8 + mov r8,rdx + and r8,0x70 + cmp r8,0x60 + je NEAR $L$_num_blocks_is_6_hEgxyDlCngwrfFe + cmp r8,0x50 + je NEAR $L$_num_blocks_is_5_hEgxyDlCngwrfFe + cmp r8,0x40 + je NEAR $L$_num_blocks_is_4_hEgxyDlCngwrfFe + cmp r8,0x30 + je NEAR $L$_num_blocks_is_3_hEgxyDlCngwrfFe + cmp r8,0x20 + je NEAR $L$_num_blocks_is_2_hEgxyDlCngwrfFe + cmp r8,0x10 + je NEAR $L$_num_blocks_is_1_hEgxyDlCngwrfFe + +$L$_num_blocks_is_7_hEgxyDlCngwrfFe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x00 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + mov r8,0x0000ffffffffffff + kmovq k1,r8 + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2{k1},[64+rdi] + + add rdi,0x70 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2 + add rsi,0x70 + vextracti32x4 xmm8,zmm2,0x2 + vextracti32x4 xmm0,zmm10,0x3 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe +$L$_num_blocks_is_6_hEgxyDlCngwrfFe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x00 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 ymm2,YMMWORD[64+rdi] + add rdi,96 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 YMMWORD[64+rsi],ymm2 + add rsi,96 + + vextracti32x4 xmm8,ymm2,0x1 + vextracti32x4 xmm0,zmm10,0x2 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe +$L$_num_blocks_is_5_hEgxyDlCngwrfFe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x00 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 xmm2,XMMWORD[64+rdi] + add rdi,80 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 XMMWORD[64+rsi],xmm2 + add rsi,80 + + vmovdqa xmm8,xmm2 + vextracti32x4 xmm0,zmm10,0x1 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe +$L$_num_blocks_is_4_hEgxyDlCngwrfFe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x00 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + vmovdqu8 zmm1,ZMMWORD[rdi] + add rdi,64 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vpxorq zmm1,zmm1,zmm9 + vmovdqu8 ZMMWORD[rsi],zmm1 + add rsi,64 + vextracti32x4 xmm8,zmm1,0x3 + vmovdqa xmm0,xmm10 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe +$L$_num_blocks_is_3_hEgxyDlCngwrfFe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + mov r8,0x0000ffffffffffff + kmovq k1,r8 + vmovdqu8 zmm1{k1},[rdi] + add rdi,48 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenclast zmm1,zmm1,zmm0 + vpxorq zmm1,zmm1,zmm9 + vmovdqu8 ZMMWORD[rsi]{k1},zmm1 + add rsi,48 + vextracti32x4 xmm8,zmm1,2 + vextracti32x4 xmm0,zmm9,3 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe +$L$_num_blocks_is_2_hEgxyDlCngwrfFe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + + vmovdqu8 ymm1,YMMWORD[rdi] + add rdi,32 + vbroadcasti32x4 ymm0,YMMWORD[rcx] + vpternlogq ymm1,ymm9,ymm0,0x96 + vbroadcasti32x4 ymm0,YMMWORD[16+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[32+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[48+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[64+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[80+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[96+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[112+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[128+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[144+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[160+rcx] + vaesenclast ymm1,ymm1,ymm0 + vpxorq ymm1,ymm1,ymm9 + vmovdqu8 YMMWORD[rsi],ymm1 + add rsi,32 + + vextracti32x4 xmm8,ymm1,1 + vextracti32x4 xmm0,zmm9,2 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe +$L$_num_blocks_is_1_hEgxyDlCngwrfFe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + + vmovdqu8 xmm1,XMMWORD[rdi] + add rdi,16 + vbroadcasti32x4 ymm0,YMMWORD[rcx] + vpternlogq ymm1,ymm9,ymm0,0x96 + vbroadcasti32x4 ymm0,YMMWORD[16+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[32+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[48+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[64+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[80+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[96+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[112+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[128+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[144+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[160+rcx] + vaesenclast ymm1,ymm1,ymm0 + vpxorq ymm1,ymm1,ymm9 + vmovdqu8 XMMWORD[rsi],xmm1 + add rsi,16 + + vmovdqa xmm8,xmm1 + vextracti32x4 xmm0,zmm9,1 + and rdx,0xf + je NEAR $L$_ret_hEgxyDlCngwrfFe + jmp NEAR $L$_steal_cipher_hEgxyDlCngwrfFe + +global aesni_xts_128_decrypt_avx512 + + +ALIGN 32 +aesni_xts_128_decrypt_avx512: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_128_decrypt_avx512: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +DB 243,15,30,250 + push rbp + mov rbp,rsp + sub rsp,312 + and rsp,0xffffffffffffffc0 + mov QWORD[288+rsp],rbx + mov QWORD[((288 + 8))+rsp],rdi + mov QWORD[((288 + 16))+rsp],rsi + vmovdqa XMMWORD[(128 + 0)+rsp],xmm6 + vmovdqa XMMWORD[(128 + 16)+rsp],xmm7 + vmovdqa XMMWORD[(128 + 32)+rsp],xmm8 + vmovdqa XMMWORD[(128 + 48)+rsp],xmm9 + vmovdqa XMMWORD[(128 + 64)+rsp],xmm10 + vmovdqa XMMWORD[(128 + 80)+rsp],xmm11 + vmovdqa XMMWORD[(128 + 96)+rsp],xmm12 + vmovdqa XMMWORD[(128 + 112)+rsp],xmm13 + vmovdqa XMMWORD[(128 + 128)+rsp],xmm14 + vmovdqa XMMWORD[(128 + 144)+rsp],xmm15 + mov r10,0x87 + vmovdqu xmm1,XMMWORD[r9] + vpxor xmm1,xmm1,XMMWORD[r8] + vaesenc xmm1,xmm1,XMMWORD[16+r8] + vaesenc xmm1,xmm1,XMMWORD[32+r8] + vaesenc xmm1,xmm1,XMMWORD[48+r8] + vaesenc xmm1,xmm1,XMMWORD[64+r8] + vaesenc xmm1,xmm1,XMMWORD[80+r8] + vaesenc xmm1,xmm1,XMMWORD[96+r8] + vaesenc xmm1,xmm1,XMMWORD[112+r8] + vaesenc xmm1,xmm1,XMMWORD[128+r8] + vaesenc xmm1,xmm1,XMMWORD[144+r8] + vaesenclast xmm1,xmm1,XMMWORD[160+r8] + vmovdqa XMMWORD[rsp],xmm1 + mov QWORD[((8 + 40))+rbp],rdi + mov QWORD[((8 + 48))+rbp],rsi + + cmp rdx,0x80 + jb NEAR $L$_less_than_128_bytes_amivrujEyduiFoi + vpbroadcastq zmm25,r10 + cmp rdx,0x100 + jge NEAR $L$_start_by16_amivrujEyduiFoi + jmp NEAR $L$_start_by8_amivrujEyduiFoi + +$L$_do_n_blocks_amivrujEyduiFoi: + cmp rdx,0x0 + je NEAR $L$_ret_amivrujEyduiFoi + cmp rdx,0x70 + jge NEAR $L$_remaining_num_blocks_is_7_amivrujEyduiFoi + cmp rdx,0x60 + jge NEAR $L$_remaining_num_blocks_is_6_amivrujEyduiFoi + cmp rdx,0x50 + jge NEAR $L$_remaining_num_blocks_is_5_amivrujEyduiFoi + cmp rdx,0x40 + jge NEAR $L$_remaining_num_blocks_is_4_amivrujEyduiFoi + cmp rdx,0x30 + jge NEAR $L$_remaining_num_blocks_is_3_amivrujEyduiFoi + cmp rdx,0x20 + jge NEAR $L$_remaining_num_blocks_is_2_amivrujEyduiFoi + cmp rdx,0x10 + jge NEAR $L$_remaining_num_blocks_is_1_amivrujEyduiFoi + + + vmovdqu xmm1,xmm5 + + vpxor xmm1,xmm1,xmm9 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vpxor xmm1,xmm1,xmm9 + vmovdqu XMMWORD[(-16)+rsi],xmm1 + vmovdqa xmm8,xmm1 + + + mov r8,0x1 + kmovq k1,r8 + vpsllq xmm13,xmm9,0x3f + vpsraq xmm14,xmm13,0x3f + vpandq xmm5,xmm14,xmm25 + vpxorq xmm9{k1},xmm9,xmm5 + vpsrldq xmm10,xmm9,0x8 +DB 98,211,181,8,115,194,1 + vpslldq xmm13,xmm13,0x8 + vpxorq xmm0,xmm0,xmm13 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_remaining_num_blocks_is_7_amivrujEyduiFoi: + mov r8,0xffffffffffffffff + shr r8,0x10 + kmovq k1,r8 + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2{k1},[64+rdi] + add rdi,0x70 + and rdx,0xf + je NEAR $L$_done_7_remain_amivrujEyduiFoi + vextracti32x4 xmm12,zmm10,0x2 + vextracti32x4 xmm13,zmm10,0x3 + vinserti32x4 zmm10,zmm10,xmm13,0x2 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2 + add rsi,0x70 + vextracti32x4 xmm8,zmm2,0x2 + vmovdqa xmm0,xmm12 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_7_remain_amivrujEyduiFoi: + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2 + jmp NEAR $L$_ret_amivrujEyduiFoi + +$L$_remaining_num_blocks_is_6_amivrujEyduiFoi: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 ymm2,YMMWORD[64+rdi] + add rdi,0x60 + and rdx,0xf + je NEAR $L$_done_6_remain_amivrujEyduiFoi + vextracti32x4 xmm12,zmm10,0x1 + vextracti32x4 xmm13,zmm10,0x2 + vinserti32x4 zmm10,zmm10,xmm13,0x1 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 YMMWORD[64+rsi],ymm2 + add rsi,0x60 + vextracti32x4 xmm8,zmm2,0x1 + vmovdqa xmm0,xmm12 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_6_remain_amivrujEyduiFoi: + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 YMMWORD[64+rsi],ymm2 + jmp NEAR $L$_ret_amivrujEyduiFoi + +$L$_remaining_num_blocks_is_5_amivrujEyduiFoi: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu xmm2,XMMWORD[64+rdi] + add rdi,0x50 + and rdx,0xf + je NEAR $L$_done_5_remain_amivrujEyduiFoi + vmovdqa xmm12,xmm10 + vextracti32x4 xmm10,zmm10,0x1 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu XMMWORD[64+rsi],xmm2 + add rsi,0x50 + vmovdqa xmm8,xmm2 + vmovdqa xmm0,xmm12 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_5_remain_amivrujEyduiFoi: + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 XMMWORD[64+rsi],xmm2 + jmp NEAR $L$_ret_amivrujEyduiFoi + +$L$_remaining_num_blocks_is_4_amivrujEyduiFoi: + vmovdqu8 zmm1,ZMMWORD[rdi] + add rdi,0x40 + and rdx,0xf + je NEAR $L$_done_4_remain_amivrujEyduiFoi + vextracti32x4 xmm12,zmm9,0x3 + vinserti32x4 zmm9,zmm9,xmm10,0x3 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + add rsi,0x40 + vextracti32x4 xmm8,zmm1,0x3 + vmovdqa xmm0,xmm12 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_4_remain_amivrujEyduiFoi: + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + jmp NEAR $L$_ret_amivrujEyduiFoi + +$L$_remaining_num_blocks_is_3_amivrujEyduiFoi: + vmovdqu xmm1,XMMWORD[rdi] + vmovdqu xmm2,XMMWORD[16+rdi] + vmovdqu xmm3,XMMWORD[32+rdi] + add rdi,0x30 + and rdx,0xf + je NEAR $L$_done_3_remain_amivrujEyduiFoi + vextracti32x4 xmm13,zmm9,0x2 + vextracti32x4 xmm10,zmm9,0x1 + vextracti32x4 xmm11,zmm9,0x3 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + add rsi,0x30 + vmovdqa xmm8,xmm3 + vmovdqa xmm0,xmm13 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_3_remain_amivrujEyduiFoi: + vextracti32x4 xmm10,zmm9,0x1 + vextracti32x4 xmm11,zmm9,0x2 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + jmp NEAR $L$_ret_amivrujEyduiFoi + +$L$_remaining_num_blocks_is_2_amivrujEyduiFoi: + vmovdqu xmm1,XMMWORD[rdi] + vmovdqu xmm2,XMMWORD[16+rdi] + add rdi,0x20 + and rdx,0xf + je NEAR $L$_done_2_remain_amivrujEyduiFoi + vextracti32x4 xmm10,zmm9,0x2 + vextracti32x4 xmm12,zmm9,0x1 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + add rsi,0x20 + vmovdqa xmm8,xmm2 + vmovdqa xmm0,xmm12 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_2_remain_amivrujEyduiFoi: + vextracti32x4 xmm10,zmm9,0x1 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + jmp NEAR $L$_ret_amivrujEyduiFoi + +$L$_remaining_num_blocks_is_1_amivrujEyduiFoi: + vmovdqu xmm1,XMMWORD[rdi] + add rdi,0x10 + and rdx,0xf + je NEAR $L$_done_1_remain_amivrujEyduiFoi + vextracti32x4 xmm11,zmm9,0x1 + vpxor xmm1,xmm1,xmm11 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vpxor xmm1,xmm1,xmm11 + vmovdqu XMMWORD[rsi],xmm1 + add rsi,0x10 + vmovdqa xmm8,xmm1 + vmovdqa xmm0,xmm9 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_1_remain_amivrujEyduiFoi: + vpxor xmm1,xmm1,xmm9 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vpxor xmm1,xmm1,xmm9 + vmovdqu XMMWORD[rsi],xmm1 + jmp NEAR $L$_ret_amivrujEyduiFoi + +$L$_start_by16_amivrujEyduiFoi: + vbroadcasti32x4 zmm0,ZMMWORD[rsp] + vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7] + mov r8,0xaa + kmovq k2,r8 + + + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x0 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + + + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x0 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + + + vpsrldq zmm13,zmm9,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm11,zmm9,0x1 + vpxord zmm11,zmm11,zmm14 + + vpsrldq zmm15,zmm10,0xf + vpclmulqdq zmm16,zmm15,zmm25,0x0 + vpslldq zmm12,zmm10,0x1 + vpxord zmm12,zmm12,zmm16 + +$L$_main_loop_run_16_amivrujEyduiFoi: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2,ZMMWORD[64+rdi] + vmovdqu8 zmm3,ZMMWORD[128+rdi] + vmovdqu8 zmm4,ZMMWORD[192+rdi] + vmovdqu8 xmm5,XMMWORD[240+rdi] + add rdi,0x100 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vpxorq zmm3,zmm3,zmm11 + vpxorq zmm4,zmm4,zmm12 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vpxorq zmm3,zmm3,zmm0 + vpxorq zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm11,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm15,zmm11,0x1 + vpxord zmm15,zmm15,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm12,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm16,zmm12,0x1 + vpxord zmm16,zmm16,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm15,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm17,zmm15,0x1 + vpxord zmm17,zmm17,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm16,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm18,zmm16,0x1 + vpxord zmm18,zmm18,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + vaesdeclast zmm3,zmm3,zmm0 + vaesdeclast zmm4,zmm4,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vpxorq zmm3,zmm3,zmm11 + vpxorq zmm4,zmm4,zmm12 + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqa32 zmm11,zmm17 + vmovdqa32 zmm12,zmm18 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi],zmm2 + vmovdqu8 ZMMWORD[128+rsi],zmm3 + vmovdqu8 ZMMWORD[192+rsi],zmm4 + add rsi,0x100 + sub rdx,0x100 + cmp rdx,0x100 + jge NEAR $L$_main_loop_run_16_amivrujEyduiFoi + + cmp rdx,0x80 + jge NEAR $L$_main_loop_run_8_amivrujEyduiFoi + jmp NEAR $L$_do_n_blocks_amivrujEyduiFoi + +$L$_start_by8_amivrujEyduiFoi: + + vbroadcasti32x4 zmm0,ZMMWORD[rsp] + vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7] + mov r8,0xaa + kmovq k2,r8 + + + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x0 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + + + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x0 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + +$L$_main_loop_run_8_amivrujEyduiFoi: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2,ZMMWORD[64+rdi] + vmovdqu8 xmm5,XMMWORD[112+rdi] + add rdi,0x80 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vpsrldq zmm13,zmm9,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm15,zmm9,0x1 + vpxord zmm15,zmm15,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vpsrldq zmm13,zmm10,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm16,zmm10,0x1 + vpxord zmm16,zmm16,zmm14 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi],zmm2 + add rsi,0x80 + sub rdx,0x80 + cmp rdx,0x80 + jge NEAR $L$_main_loop_run_8_amivrujEyduiFoi + jmp NEAR $L$_do_n_blocks_amivrujEyduiFoi + +$L$_steal_cipher_amivrujEyduiFoi: + + vmovdqa xmm2,xmm8 + + + lea rax,[vpshufb_shf_table] + vmovdqu xmm10,XMMWORD[rdx*1+rax] + vpshufb xmm8,xmm8,xmm10 + + + vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi] + vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8 + + + lea rax,[vpshufb_shf_table] + add rax,16 + sub rax,rdx + vmovdqu xmm10,XMMWORD[rax] + vpxor xmm10,xmm10,XMMWORD[mask1] + vpshufb xmm3,xmm3,xmm10 + + vpblendvb xmm3,xmm3,xmm2,xmm10 + + + vpxor xmm8,xmm3,xmm0 + + + vpxor xmm8,xmm8,XMMWORD[rcx] + vaesdec xmm8,xmm8,XMMWORD[16+rcx] + vaesdec xmm8,xmm8,XMMWORD[32+rcx] + vaesdec xmm8,xmm8,XMMWORD[48+rcx] + vaesdec xmm8,xmm8,XMMWORD[64+rcx] + vaesdec xmm8,xmm8,XMMWORD[80+rcx] + vaesdec xmm8,xmm8,XMMWORD[96+rcx] + vaesdec xmm8,xmm8,XMMWORD[112+rcx] + vaesdec xmm8,xmm8,XMMWORD[128+rcx] + vaesdec xmm8,xmm8,XMMWORD[144+rcx] + vaesdeclast xmm8,xmm8,XMMWORD[160+rcx] + + vpxor xmm8,xmm8,xmm0 + +$L$_done_amivrujEyduiFoi: + + vmovdqu XMMWORD[(-16)+rsi],xmm8 +$L$_ret_amivrujEyduiFoi: + mov rbx,QWORD[288+rsp] + xor r8,r8 + mov QWORD[288+rsp],r8 + + vpxorq zmm0,zmm0,zmm0 + mov rdi,QWORD[((288 + 8))+rsp] + mov QWORD[((288 + 8))+rsp],r8 + mov rsi,QWORD[((288 + 16))+rsp] + mov QWORD[((288 + 16))+rsp],r8 + + vmovdqa xmm6,XMMWORD[((128 + 0))+rsp] + vmovdqa xmm7,XMMWORD[((128 + 16))+rsp] + vmovdqa xmm8,XMMWORD[((128 + 32))+rsp] + vmovdqa xmm9,XMMWORD[((128 + 48))+rsp] + + + vmovdqa64 ZMMWORD[128+rsp],zmm0 + + vmovdqa xmm10,XMMWORD[((128 + 64))+rsp] + vmovdqa xmm11,XMMWORD[((128 + 80))+rsp] + vmovdqa xmm12,XMMWORD[((128 + 96))+rsp] + vmovdqa xmm13,XMMWORD[((128 + 112))+rsp] + + + vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0 + + vmovdqa xmm14,XMMWORD[((128 + 128))+rsp] + vmovdqa xmm15,XMMWORD[((128 + 144))+rsp] + + + + vmovdqa YMMWORD[(128 + 128)+rsp],ymm0 + mov rsp,rbp + pop rbp + vzeroupper + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$_less_than_128_bytes_amivrujEyduiFoi: + cmp rdx,0x10 + jb NEAR $L$_ret_amivrujEyduiFoi + + mov r8,rdx + and r8,0x70 + cmp r8,0x60 + je NEAR $L$_num_blocks_is_6_amivrujEyduiFoi + cmp r8,0x50 + je NEAR $L$_num_blocks_is_5_amivrujEyduiFoi + cmp r8,0x40 + je NEAR $L$_num_blocks_is_4_amivrujEyduiFoi + cmp r8,0x30 + je NEAR $L$_num_blocks_is_3_amivrujEyduiFoi + cmp r8,0x20 + je NEAR $L$_num_blocks_is_2_amivrujEyduiFoi + cmp r8,0x10 + je NEAR $L$_num_blocks_is_1_amivrujEyduiFoi + +$L$_num_blocks_is_7_amivrujEyduiFoi: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[32+rsp],rax + mov QWORD[((32 + 8))+rsp],rbx + vmovdqa xmm11,XMMWORD[32+rsp] + vmovdqu xmm3,XMMWORD[32+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[48+rsp],rax + mov QWORD[((48 + 8))+rsp],rbx + vmovdqa xmm12,XMMWORD[48+rsp] + vmovdqu xmm4,XMMWORD[48+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[64+rsp],rax + mov QWORD[((64 + 8))+rsp],rbx + vmovdqa xmm13,XMMWORD[64+rsp] + vmovdqu xmm5,XMMWORD[64+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[80+rsp],rax + mov QWORD[((80 + 8))+rsp],rbx + vmovdqa xmm14,XMMWORD[80+rsp] + vmovdqu xmm6,XMMWORD[80+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[96+rsp],rax + mov QWORD[((96 + 8))+rsp],rbx + vmovdqa xmm15,XMMWORD[96+rsp] + vmovdqu xmm7,XMMWORD[96+rdi] + add rdi,0x70 + and rdx,0xf + je NEAR $L$_done_7_amivrujEyduiFoi + +$L$_steal_cipher_7_amivrujEyduiFoi: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm16,xmm15 + vmovdqa xmm15,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vpxor xmm7,xmm7,xmm15 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vpxor xmm6,xmm6,xmm0 + vpxor xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 + vaesdeclast xmm7,xmm7,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vpxor xmm7,xmm7,xmm15 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + vmovdqu XMMWORD[64+rsi],xmm5 + vmovdqu XMMWORD[80+rsi],xmm6 + add rsi,0x70 + vmovdqa64 xmm0,xmm16 + vmovdqa xmm8,xmm7 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_7_amivrujEyduiFoi: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vpxor xmm7,xmm7,xmm15 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vpxor xmm6,xmm6,xmm0 + vpxor xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 + vaesdeclast xmm7,xmm7,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vpxor xmm7,xmm7,xmm15 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + vmovdqu XMMWORD[64+rsi],xmm5 + vmovdqu XMMWORD[80+rsi],xmm6 + add rsi,0x70 + vmovdqa xmm8,xmm7 + jmp NEAR $L$_done_amivrujEyduiFoi + +$L$_num_blocks_is_6_amivrujEyduiFoi: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[32+rsp],rax + mov QWORD[((32 + 8))+rsp],rbx + vmovdqa xmm11,XMMWORD[32+rsp] + vmovdqu xmm3,XMMWORD[32+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[48+rsp],rax + mov QWORD[((48 + 8))+rsp],rbx + vmovdqa xmm12,XMMWORD[48+rsp] + vmovdqu xmm4,XMMWORD[48+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[64+rsp],rax + mov QWORD[((64 + 8))+rsp],rbx + vmovdqa xmm13,XMMWORD[64+rsp] + vmovdqu xmm5,XMMWORD[64+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[80+rsp],rax + mov QWORD[((80 + 8))+rsp],rbx + vmovdqa xmm14,XMMWORD[80+rsp] + vmovdqu xmm6,XMMWORD[80+rdi] + add rdi,0x60 + and rdx,0xf + je NEAR $L$_done_6_amivrujEyduiFoi + +$L$_steal_cipher_6_amivrujEyduiFoi: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm15,xmm14 + vmovdqa xmm14,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vpxor xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + vmovdqu XMMWORD[64+rsi],xmm5 + add rsi,0x60 + vmovdqa xmm0,xmm15 + vmovdqa xmm8,xmm6 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_6_amivrujEyduiFoi: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vpxor xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + vmovdqu XMMWORD[64+rsi],xmm5 + add rsi,0x60 + vmovdqa xmm8,xmm6 + jmp NEAR $L$_done_amivrujEyduiFoi + +$L$_num_blocks_is_5_amivrujEyduiFoi: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[32+rsp],rax + mov QWORD[((32 + 8))+rsp],rbx + vmovdqa xmm11,XMMWORD[32+rsp] + vmovdqu xmm3,XMMWORD[32+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[48+rsp],rax + mov QWORD[((48 + 8))+rsp],rbx + vmovdqa xmm12,XMMWORD[48+rsp] + vmovdqu xmm4,XMMWORD[48+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[64+rsp],rax + mov QWORD[((64 + 8))+rsp],rbx + vmovdqa xmm13,XMMWORD[64+rsp] + vmovdqu xmm5,XMMWORD[64+rdi] + add rdi,0x50 + and rdx,0xf + je NEAR $L$_done_5_amivrujEyduiFoi + +$L$_steal_cipher_5_amivrujEyduiFoi: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm14,xmm13 + vmovdqa xmm13,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + add rsi,0x50 + vmovdqa xmm0,xmm14 + vmovdqa xmm8,xmm5 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_5_amivrujEyduiFoi: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + add rsi,0x50 + vmovdqa xmm8,xmm5 + jmp NEAR $L$_done_amivrujEyduiFoi + +$L$_num_blocks_is_4_amivrujEyduiFoi: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[32+rsp],rax + mov QWORD[((32 + 8))+rsp],rbx + vmovdqa xmm11,XMMWORD[32+rsp] + vmovdqu xmm3,XMMWORD[32+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[48+rsp],rax + mov QWORD[((48 + 8))+rsp],rbx + vmovdqa xmm12,XMMWORD[48+rsp] + vmovdqu xmm4,XMMWORD[48+rdi] + add rdi,0x40 + and rdx,0xf + je NEAR $L$_done_4_amivrujEyduiFoi + +$L$_steal_cipher_4_amivrujEyduiFoi: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm13,xmm12 + vmovdqa xmm12,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + add rsi,0x40 + vmovdqa xmm0,xmm13 + vmovdqa xmm8,xmm4 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_4_amivrujEyduiFoi: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + add rsi,0x40 + vmovdqa xmm8,xmm4 + jmp NEAR $L$_done_amivrujEyduiFoi + +$L$_num_blocks_is_3_amivrujEyduiFoi: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[32+rsp],rax + mov QWORD[((32 + 8))+rsp],rbx + vmovdqa xmm11,XMMWORD[32+rsp] + vmovdqu xmm3,XMMWORD[32+rdi] + add rdi,0x30 + and rdx,0xf + je NEAR $L$_done_3_amivrujEyduiFoi + +$L$_steal_cipher_3_amivrujEyduiFoi: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm12,xmm11 + vmovdqa xmm11,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + add rsi,0x30 + vmovdqa xmm0,xmm12 + vmovdqa xmm8,xmm3 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_3_amivrujEyduiFoi: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + add rsi,0x30 + vmovdqa xmm8,xmm3 + jmp NEAR $L$_done_amivrujEyduiFoi + +$L$_num_blocks_is_2_amivrujEyduiFoi: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + add rdi,0x20 + and rdx,0xf + je NEAR $L$_done_2_amivrujEyduiFoi + +$L$_steal_cipher_2_amivrujEyduiFoi: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm11,xmm10 + vmovdqa xmm10,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu XMMWORD[rsi],xmm1 + add rsi,0x20 + vmovdqa xmm0,xmm11 + vmovdqa xmm8,xmm2 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_2_amivrujEyduiFoi: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu XMMWORD[rsi],xmm1 + add rsi,0x20 + vmovdqa xmm8,xmm2 + jmp NEAR $L$_done_amivrujEyduiFoi + +$L$_num_blocks_is_1_amivrujEyduiFoi: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + add rdi,0x10 + and rdx,0xf + je NEAR $L$_done_1_amivrujEyduiFoi + +$L$_steal_cipher_1_amivrujEyduiFoi: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm10,xmm9 + vmovdqa xmm9,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vpxor xmm1,xmm1,xmm9 + add rsi,0x10 + vmovdqa xmm0,xmm10 + vmovdqa xmm8,xmm1 + jmp NEAR $L$_steal_cipher_amivrujEyduiFoi + +$L$_done_1_amivrujEyduiFoi: + vpxor xmm1,xmm1,xmm9 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vpxor xmm1,xmm1,xmm9 + add rsi,0x10 + vmovdqa xmm8,xmm1 + jmp NEAR $L$_done_amivrujEyduiFoi + +global aesni_xts_256_encrypt_avx512 + + +ALIGN 32 +aesni_xts_256_encrypt_avx512: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_256_encrypt_avx512: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +DB 243,15,30,250 + push rbp + mov rbp,rsp + sub rsp,312 + and rsp,0xffffffffffffffc0 + mov QWORD[288+rsp],rbx + mov QWORD[((288 + 8))+rsp],rdi + mov QWORD[((288 + 16))+rsp],rsi + vmovdqa XMMWORD[(128 + 0)+rsp],xmm6 + vmovdqa XMMWORD[(128 + 16)+rsp],xmm7 + vmovdqa XMMWORD[(128 + 32)+rsp],xmm8 + vmovdqa XMMWORD[(128 + 48)+rsp],xmm9 + vmovdqa XMMWORD[(128 + 64)+rsp],xmm10 + vmovdqa XMMWORD[(128 + 80)+rsp],xmm11 + vmovdqa XMMWORD[(128 + 96)+rsp],xmm12 + vmovdqa XMMWORD[(128 + 112)+rsp],xmm13 + vmovdqa XMMWORD[(128 + 128)+rsp],xmm14 + vmovdqa XMMWORD[(128 + 144)+rsp],xmm15 + mov r10,0x87 + vmovdqu xmm1,XMMWORD[r9] + vpxor xmm1,xmm1,XMMWORD[r8] + vaesenc xmm1,xmm1,XMMWORD[16+r8] + vaesenc xmm1,xmm1,XMMWORD[32+r8] + vaesenc xmm1,xmm1,XMMWORD[48+r8] + vaesenc xmm1,xmm1,XMMWORD[64+r8] + vaesenc xmm1,xmm1,XMMWORD[80+r8] + vaesenc xmm1,xmm1,XMMWORD[96+r8] + vaesenc xmm1,xmm1,XMMWORD[112+r8] + vaesenc xmm1,xmm1,XMMWORD[128+r8] + vaesenc xmm1,xmm1,XMMWORD[144+r8] + vaesenc xmm1,xmm1,XMMWORD[160+r8] + vaesenc xmm1,xmm1,XMMWORD[176+r8] + vaesenc xmm1,xmm1,XMMWORD[192+r8] + vaesenc xmm1,xmm1,XMMWORD[208+r8] + vaesenclast xmm1,xmm1,XMMWORD[224+r8] + vmovdqa XMMWORD[rsp],xmm1 + mov QWORD[((8 + 40))+rbp],rdi + mov QWORD[((8 + 48))+rbp],rsi + + cmp rdx,0x80 + jl NEAR $L$_less_than_128_bytes_wcpqaDvsGlbjGoe + vpbroadcastq zmm25,r10 + cmp rdx,0x100 + jge NEAR $L$_start_by16_wcpqaDvsGlbjGoe + cmp rdx,0x80 + jge NEAR $L$_start_by8_wcpqaDvsGlbjGoe + +$L$_do_n_blocks_wcpqaDvsGlbjGoe: + cmp rdx,0x0 + je NEAR $L$_ret_wcpqaDvsGlbjGoe + cmp rdx,0x70 + jge NEAR $L$_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe + cmp rdx,0x60 + jge NEAR $L$_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe + cmp rdx,0x50 + jge NEAR $L$_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe + cmp rdx,0x40 + jge NEAR $L$_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe + cmp rdx,0x30 + jge NEAR $L$_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe + cmp rdx,0x20 + jge NEAR $L$_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe + cmp rdx,0x10 + jge NEAR $L$_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe + vmovdqa xmm8,xmm0 + vmovdqa xmm0,xmm9 + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe + +$L$_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe: + mov r8,0x0000ffffffffffff + kmovq k1,r8 + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2{k1},[64+rdi] + add rdi,0x70 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2 + add rsi,0x70 + vextracti32x4 xmm8,zmm2,0x2 + vextracti32x4 xmm0,zmm10,0x3 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe + +$L$_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 ymm2,YMMWORD[64+rdi] + add rdi,0x60 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 YMMWORD[64+rsi],ymm2 + add rsi,0x60 + vextracti32x4 xmm8,zmm2,0x1 + vextracti32x4 xmm0,zmm10,0x2 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe + +$L$_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu xmm2,XMMWORD[64+rdi] + add rdi,0x50 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu XMMWORD[64+rsi],xmm2 + add rsi,0x50 + vmovdqa xmm8,xmm2 + vextracti32x4 xmm0,zmm10,0x1 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe + +$L$_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe: + vmovdqu8 zmm1,ZMMWORD[rdi] + add rdi,0x40 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vpxorq zmm1,zmm1,zmm9 + vmovdqu8 ZMMWORD[rsi],zmm1 + add rsi,0x40 + vextracti32x4 xmm8,zmm1,0x3 + vmovdqa64 xmm0,xmm10 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe +$L$_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe: + mov r8,-1 + shr r8,0x10 + kmovq k1,r8 + vmovdqu8 zmm1{k1},[rdi] + add rdi,0x30 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vpxorq zmm1,zmm1,zmm9 + vmovdqu8 ZMMWORD[rsi]{k1},zmm1 + add rsi,0x30 + vextracti32x4 xmm8,zmm1,0x2 + vextracti32x4 xmm0,zmm9,0x3 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe +$L$_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe: + vmovdqu8 ymm1,YMMWORD[rdi] + add rdi,0x20 + vbroadcasti32x4 ymm0,YMMWORD[rcx] + vpternlogq ymm1,ymm9,ymm0,0x96 + vbroadcasti32x4 ymm0,YMMWORD[16+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[32+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[48+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[64+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[80+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[96+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[112+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[128+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[144+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[160+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[176+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[192+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[208+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[224+rcx] + vaesenclast ymm1,ymm1,ymm0 + vpxorq ymm1,ymm1,ymm9 + vmovdqu YMMWORD[rsi],ymm1 + add rsi,0x20 + vextracti32x4 xmm8,zmm1,0x1 + vextracti32x4 xmm0,zmm9,0x2 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe +$L$_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe: + vmovdqu xmm1,XMMWORD[rdi] + add rdi,0x10 + vpxor xmm1,xmm1,xmm9 + vpxor xmm1,xmm1,XMMWORD[rcx] + vaesenc xmm1,xmm1,XMMWORD[16+rcx] + vaesenc xmm1,xmm1,XMMWORD[32+rcx] + vaesenc xmm1,xmm1,XMMWORD[48+rcx] + vaesenc xmm1,xmm1,XMMWORD[64+rcx] + vaesenc xmm1,xmm1,XMMWORD[80+rcx] + vaesenc xmm1,xmm1,XMMWORD[96+rcx] + vaesenc xmm1,xmm1,XMMWORD[112+rcx] + vaesenc xmm1,xmm1,XMMWORD[128+rcx] + vaesenc xmm1,xmm1,XMMWORD[144+rcx] + vaesenc xmm1,xmm1,XMMWORD[160+rcx] + vaesenc xmm1,xmm1,XMMWORD[176+rcx] + vaesenc xmm1,xmm1,XMMWORD[192+rcx] + vaesenc xmm1,xmm1,XMMWORD[208+rcx] + vaesenclast xmm1,xmm1,XMMWORD[224+rcx] + vpxor xmm1,xmm1,xmm9 + vmovdqu XMMWORD[rsi],xmm1 + add rsi,0x10 + vmovdqa xmm8,xmm1 + vextracti32x4 xmm0,zmm9,0x1 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe + + +$L$_start_by16_wcpqaDvsGlbjGoe: + vbroadcasti32x4 zmm0,ZMMWORD[rsp] + vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7] + mov r8,0xaa + kmovq k2,r8 + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x0 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x0 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + vpsrldq zmm13,zmm9,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm11,zmm9,0x1 + vpxord zmm11,zmm11,zmm14 + vpsrldq zmm15,zmm10,0xf + vpclmulqdq zmm16,zmm15,zmm25,0x0 + vpslldq zmm12,zmm10,0x1 + vpxord zmm12,zmm12,zmm16 + +$L$_main_loop_run_16_wcpqaDvsGlbjGoe: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2,ZMMWORD[64+rdi] + vmovdqu8 zmm3,ZMMWORD[128+rdi] + vmovdqu8 zmm4,ZMMWORD[192+rdi] + add rdi,0x100 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vpxorq zmm3,zmm3,zmm11 + vpxorq zmm4,zmm4,zmm12 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vpxorq zmm3,zmm3,zmm0 + vpxorq zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm11,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm15,zmm11,0x1 + vpxord zmm15,zmm15,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm12,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm16,zmm12,0x1 + vpxord zmm16,zmm16,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm15,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm17,zmm15,0x1 + vpxord zmm17,zmm17,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm16,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm18,zmm16,0x1 + vpxord zmm18,zmm18,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vaesenclast zmm3,zmm3,zmm0 + vaesenclast zmm4,zmm4,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vpxorq zmm3,zmm3,zmm11 + vpxorq zmm4,zmm4,zmm12 + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqa32 zmm11,zmm17 + vmovdqa32 zmm12,zmm18 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi],zmm2 + vmovdqu8 ZMMWORD[128+rsi],zmm3 + vmovdqu8 ZMMWORD[192+rsi],zmm4 + add rsi,0x100 + sub rdx,0x100 + cmp rdx,0x100 + jae NEAR $L$_main_loop_run_16_wcpqaDvsGlbjGoe + cmp rdx,0x80 + jae NEAR $L$_main_loop_run_8_wcpqaDvsGlbjGoe + vextracti32x4 xmm0,zmm4,0x3 + jmp NEAR $L$_do_n_blocks_wcpqaDvsGlbjGoe + +$L$_start_by8_wcpqaDvsGlbjGoe: + vbroadcasti32x4 zmm0,ZMMWORD[rsp] + vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7] + mov r8,0xaa + kmovq k2,r8 + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x0 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x0 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + +$L$_main_loop_run_8_wcpqaDvsGlbjGoe: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2,ZMMWORD[64+rdi] + add rdi,0x80 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vpsrldq zmm13,zmm9,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm15,zmm9,0x1 + vpxord zmm15,zmm15,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vpsrldq zmm13,zmm10,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm16,zmm10,0x1 + vpxord zmm16,zmm16,zmm14 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi],zmm2 + add rsi,0x80 + sub rdx,0x80 + cmp rdx,0x80 + jae NEAR $L$_main_loop_run_8_wcpqaDvsGlbjGoe + vextracti32x4 xmm0,zmm2,0x3 + jmp NEAR $L$_do_n_blocks_wcpqaDvsGlbjGoe + +$L$_steal_cipher_wcpqaDvsGlbjGoe: + vmovdqa xmm2,xmm8 + lea rax,[vpshufb_shf_table] + vmovdqu xmm10,XMMWORD[rdx*1+rax] + vpshufb xmm8,xmm8,xmm10 + vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi] + vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8 + lea rax,[vpshufb_shf_table] + add rax,16 + sub rax,rdx + vmovdqu xmm10,XMMWORD[rax] + vpxor xmm10,xmm10,XMMWORD[mask1] + vpshufb xmm3,xmm3,xmm10 + vpblendvb xmm3,xmm3,xmm2,xmm10 + vpxor xmm8,xmm3,xmm0 + vpxor xmm8,xmm8,XMMWORD[rcx] + vaesenc xmm8,xmm8,XMMWORD[16+rcx] + vaesenc xmm8,xmm8,XMMWORD[32+rcx] + vaesenc xmm8,xmm8,XMMWORD[48+rcx] + vaesenc xmm8,xmm8,XMMWORD[64+rcx] + vaesenc xmm8,xmm8,XMMWORD[80+rcx] + vaesenc xmm8,xmm8,XMMWORD[96+rcx] + vaesenc xmm8,xmm8,XMMWORD[112+rcx] + vaesenc xmm8,xmm8,XMMWORD[128+rcx] + vaesenc xmm8,xmm8,XMMWORD[144+rcx] + vaesenc xmm8,xmm8,XMMWORD[160+rcx] + vaesenc xmm8,xmm8,XMMWORD[176+rcx] + vaesenc xmm8,xmm8,XMMWORD[192+rcx] + vaesenc xmm8,xmm8,XMMWORD[208+rcx] + vaesenclast xmm8,xmm8,XMMWORD[224+rcx] + vpxor xmm8,xmm8,xmm0 + vmovdqu XMMWORD[(-16)+rsi],xmm8 +$L$_ret_wcpqaDvsGlbjGoe: + mov rbx,QWORD[288+rsp] + xor r8,r8 + mov QWORD[288+rsp],r8 + + vpxorq zmm0,zmm0,zmm0 + mov rdi,QWORD[((288 + 8))+rsp] + mov QWORD[((288 + 8))+rsp],r8 + mov rsi,QWORD[((288 + 16))+rsp] + mov QWORD[((288 + 16))+rsp],r8 + + vmovdqa xmm6,XMMWORD[((128 + 0))+rsp] + vmovdqa xmm7,XMMWORD[((128 + 16))+rsp] + vmovdqa xmm8,XMMWORD[((128 + 32))+rsp] + vmovdqa xmm9,XMMWORD[((128 + 48))+rsp] + + + vmovdqa64 ZMMWORD[128+rsp],zmm0 + + vmovdqa xmm10,XMMWORD[((128 + 64))+rsp] + vmovdqa xmm11,XMMWORD[((128 + 80))+rsp] + vmovdqa xmm12,XMMWORD[((128 + 96))+rsp] + vmovdqa xmm13,XMMWORD[((128 + 112))+rsp] + + + vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0 + + vmovdqa xmm14,XMMWORD[((128 + 128))+rsp] + vmovdqa xmm15,XMMWORD[((128 + 144))+rsp] + + + + vmovdqa YMMWORD[(128 + 128)+rsp],ymm0 + mov rsp,rbp + pop rbp + vzeroupper + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$_less_than_128_bytes_wcpqaDvsGlbjGoe: + vpbroadcastq zmm25,r10 + cmp rdx,0x10 + jb NEAR $L$_ret_wcpqaDvsGlbjGoe + vbroadcasti32x4 zmm0,ZMMWORD[rsp] + vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7] + mov r8d,0xaa + kmovq k2,r8 + mov r8,rdx + and r8,0x70 + cmp r8,0x60 + je NEAR $L$_num_blocks_is_6_wcpqaDvsGlbjGoe + cmp r8,0x50 + je NEAR $L$_num_blocks_is_5_wcpqaDvsGlbjGoe + cmp r8,0x40 + je NEAR $L$_num_blocks_is_4_wcpqaDvsGlbjGoe + cmp r8,0x30 + je NEAR $L$_num_blocks_is_3_wcpqaDvsGlbjGoe + cmp r8,0x20 + je NEAR $L$_num_blocks_is_2_wcpqaDvsGlbjGoe + cmp r8,0x10 + je NEAR $L$_num_blocks_is_1_wcpqaDvsGlbjGoe + +$L$_num_blocks_is_7_wcpqaDvsGlbjGoe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x00 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + mov r8,0x0000ffffffffffff + kmovq k1,r8 + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2{k1},[64+rdi] + + add rdi,0x70 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2 + add rsi,0x70 + vextracti32x4 xmm8,zmm2,0x2 + vextracti32x4 xmm0,zmm10,0x3 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe +$L$_num_blocks_is_6_wcpqaDvsGlbjGoe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x00 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 ymm2,YMMWORD[64+rdi] + add rdi,96 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 YMMWORD[64+rsi],ymm2 + add rsi,96 + + vextracti32x4 xmm8,ymm2,0x1 + vextracti32x4 xmm0,zmm10,0x2 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe +$L$_num_blocks_is_5_wcpqaDvsGlbjGoe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x00 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 xmm2,XMMWORD[64+rdi] + add rdi,80 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vpternlogq zmm2,zmm10,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 XMMWORD[64+rsi],xmm2 + add rsi,80 + + vmovdqa xmm8,xmm2 + vextracti32x4 xmm0,zmm10,0x1 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe +$L$_num_blocks_is_4_wcpqaDvsGlbjGoe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x00 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + vmovdqu8 zmm1,ZMMWORD[rdi] + add rdi,64 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vpxorq zmm1,zmm1,zmm9 + vmovdqu8 ZMMWORD[rsi],zmm1 + add rsi,64 + vextracti32x4 xmm8,zmm1,0x3 + vmovdqa xmm0,xmm10 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe +$L$_num_blocks_is_3_wcpqaDvsGlbjGoe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + mov r8,0x0000ffffffffffff + kmovq k1,r8 + vmovdqu8 zmm1{k1},[rdi] + add rdi,48 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpternlogq zmm1,zmm9,zmm0,0x96 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesenc zmm1,zmm1,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesenclast zmm1,zmm1,zmm0 + vpxorq zmm1,zmm1,zmm9 + vmovdqu8 ZMMWORD[rsi]{k1},zmm1 + add rsi,48 + vextracti32x4 xmm8,zmm1,2 + vextracti32x4 xmm0,zmm9,3 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe +$L$_num_blocks_is_2_wcpqaDvsGlbjGoe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + + vmovdqu8 ymm1,YMMWORD[rdi] + add rdi,32 + vbroadcasti32x4 ymm0,YMMWORD[rcx] + vpternlogq ymm1,ymm9,ymm0,0x96 + vbroadcasti32x4 ymm0,YMMWORD[16+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[32+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[48+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[64+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[80+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[96+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[112+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[128+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[144+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[160+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[176+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[192+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[208+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[224+rcx] + vaesenclast ymm1,ymm1,ymm0 + vpxorq ymm1,ymm1,ymm9 + vmovdqu8 YMMWORD[rsi],ymm1 + add rsi,32 + + vextracti32x4 xmm8,ymm1,1 + vextracti32x4 xmm0,zmm9,2 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe +$L$_num_blocks_is_1_wcpqaDvsGlbjGoe: + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x00 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + + vmovdqu8 xmm1,XMMWORD[rdi] + add rdi,16 + vbroadcasti32x4 ymm0,YMMWORD[rcx] + vpternlogq ymm1,ymm9,ymm0,0x96 + vbroadcasti32x4 ymm0,YMMWORD[16+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[32+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[48+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[64+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[80+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[96+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[112+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[128+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[144+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[160+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[176+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[192+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[208+rcx] + vaesenc ymm1,ymm1,ymm0 + vbroadcasti32x4 ymm0,YMMWORD[224+rcx] + vaesenclast ymm1,ymm1,ymm0 + vpxorq ymm1,ymm1,ymm9 + vmovdqu8 XMMWORD[rsi],xmm1 + add rsi,16 + + vmovdqa xmm8,xmm1 + vextracti32x4 xmm0,zmm9,1 + and rdx,0xf + je NEAR $L$_ret_wcpqaDvsGlbjGoe + jmp NEAR $L$_steal_cipher_wcpqaDvsGlbjGoe + +global aesni_xts_256_decrypt_avx512 + + +ALIGN 32 +aesni_xts_256_decrypt_avx512: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_256_decrypt_avx512: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + +DB 243,15,30,250 + push rbp + mov rbp,rsp + sub rsp,312 + and rsp,0xffffffffffffffc0 + mov QWORD[288+rsp],rbx + mov QWORD[((288 + 8))+rsp],rdi + mov QWORD[((288 + 16))+rsp],rsi + vmovdqa XMMWORD[(128 + 0)+rsp],xmm6 + vmovdqa XMMWORD[(128 + 16)+rsp],xmm7 + vmovdqa XMMWORD[(128 + 32)+rsp],xmm8 + vmovdqa XMMWORD[(128 + 48)+rsp],xmm9 + vmovdqa XMMWORD[(128 + 64)+rsp],xmm10 + vmovdqa XMMWORD[(128 + 80)+rsp],xmm11 + vmovdqa XMMWORD[(128 + 96)+rsp],xmm12 + vmovdqa XMMWORD[(128 + 112)+rsp],xmm13 + vmovdqa XMMWORD[(128 + 128)+rsp],xmm14 + vmovdqa XMMWORD[(128 + 144)+rsp],xmm15 + mov r10,0x87 + vmovdqu xmm1,XMMWORD[r9] + vpxor xmm1,xmm1,XMMWORD[r8] + vaesenc xmm1,xmm1,XMMWORD[16+r8] + vaesenc xmm1,xmm1,XMMWORD[32+r8] + vaesenc xmm1,xmm1,XMMWORD[48+r8] + vaesenc xmm1,xmm1,XMMWORD[64+r8] + vaesenc xmm1,xmm1,XMMWORD[80+r8] + vaesenc xmm1,xmm1,XMMWORD[96+r8] + vaesenc xmm1,xmm1,XMMWORD[112+r8] + vaesenc xmm1,xmm1,XMMWORD[128+r8] + vaesenc xmm1,xmm1,XMMWORD[144+r8] + vaesenc xmm1,xmm1,XMMWORD[160+r8] + vaesenc xmm1,xmm1,XMMWORD[176+r8] + vaesenc xmm1,xmm1,XMMWORD[192+r8] + vaesenc xmm1,xmm1,XMMWORD[208+r8] + vaesenclast xmm1,xmm1,XMMWORD[224+r8] + vmovdqa XMMWORD[rsp],xmm1 + mov QWORD[((8 + 40))+rbp],rdi + mov QWORD[((8 + 48))+rbp],rsi + + cmp rdx,0x80 + jb NEAR $L$_less_than_128_bytes_EmbgEptodyewbFa + vpbroadcastq zmm25,r10 + cmp rdx,0x100 + jge NEAR $L$_start_by16_EmbgEptodyewbFa + jmp NEAR $L$_start_by8_EmbgEptodyewbFa + +$L$_do_n_blocks_EmbgEptodyewbFa: + cmp rdx,0x0 + je NEAR $L$_ret_EmbgEptodyewbFa + cmp rdx,0x70 + jge NEAR $L$_remaining_num_blocks_is_7_EmbgEptodyewbFa + cmp rdx,0x60 + jge NEAR $L$_remaining_num_blocks_is_6_EmbgEptodyewbFa + cmp rdx,0x50 + jge NEAR $L$_remaining_num_blocks_is_5_EmbgEptodyewbFa + cmp rdx,0x40 + jge NEAR $L$_remaining_num_blocks_is_4_EmbgEptodyewbFa + cmp rdx,0x30 + jge NEAR $L$_remaining_num_blocks_is_3_EmbgEptodyewbFa + cmp rdx,0x20 + jge NEAR $L$_remaining_num_blocks_is_2_EmbgEptodyewbFa + cmp rdx,0x10 + jge NEAR $L$_remaining_num_blocks_is_1_EmbgEptodyewbFa + + + vmovdqu xmm1,xmm5 + + vpxor xmm1,xmm1,xmm9 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vpxor xmm1,xmm1,xmm9 + vmovdqu XMMWORD[(-16)+rsi],xmm1 + vmovdqa xmm8,xmm1 + + + mov r8,0x1 + kmovq k1,r8 + vpsllq xmm13,xmm9,0x3f + vpsraq xmm14,xmm13,0x3f + vpandq xmm5,xmm14,xmm25 + vpxorq xmm9{k1},xmm9,xmm5 + vpsrldq xmm10,xmm9,0x8 +DB 98,211,181,8,115,194,1 + vpslldq xmm13,xmm13,0x8 + vpxorq xmm0,xmm0,xmm13 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_remaining_num_blocks_is_7_EmbgEptodyewbFa: + mov r8,0xffffffffffffffff + shr r8,0x10 + kmovq k1,r8 + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2{k1},[64+rdi] + add rdi,0x70 + and rdx,0xf + je NEAR $L$_done_7_remain_EmbgEptodyewbFa + vextracti32x4 xmm12,zmm10,0x2 + vextracti32x4 xmm13,zmm10,0x3 + vinserti32x4 zmm10,zmm10,xmm13,0x2 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2 + add rsi,0x70 + vextracti32x4 xmm8,zmm2,0x2 + vmovdqa xmm0,xmm12 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_7_remain_EmbgEptodyewbFa: + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi]{k1},zmm2 + jmp NEAR $L$_ret_EmbgEptodyewbFa + +$L$_remaining_num_blocks_is_6_EmbgEptodyewbFa: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 ymm2,YMMWORD[64+rdi] + add rdi,0x60 + and rdx,0xf + je NEAR $L$_done_6_remain_EmbgEptodyewbFa + vextracti32x4 xmm12,zmm10,0x1 + vextracti32x4 xmm13,zmm10,0x2 + vinserti32x4 zmm10,zmm10,xmm13,0x1 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 YMMWORD[64+rsi],ymm2 + add rsi,0x60 + vextracti32x4 xmm8,zmm2,0x1 + vmovdqa xmm0,xmm12 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_6_remain_EmbgEptodyewbFa: + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 YMMWORD[64+rsi],ymm2 + jmp NEAR $L$_ret_EmbgEptodyewbFa + +$L$_remaining_num_blocks_is_5_EmbgEptodyewbFa: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu xmm2,XMMWORD[64+rdi] + add rdi,0x50 + and rdx,0xf + je NEAR $L$_done_5_remain_EmbgEptodyewbFa + vmovdqa xmm12,xmm10 + vextracti32x4 xmm10,zmm10,0x1 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu XMMWORD[64+rsi],xmm2 + add rsi,0x50 + vmovdqa xmm8,xmm2 + vmovdqa xmm0,xmm12 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_5_remain_EmbgEptodyewbFa: + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 XMMWORD[64+rsi],xmm2 + jmp NEAR $L$_ret_EmbgEptodyewbFa + +$L$_remaining_num_blocks_is_4_EmbgEptodyewbFa: + vmovdqu8 zmm1,ZMMWORD[rdi] + add rdi,0x40 + and rdx,0xf + je NEAR $L$_done_4_remain_EmbgEptodyewbFa + vextracti32x4 xmm12,zmm9,0x3 + vinserti32x4 zmm9,zmm9,xmm10,0x3 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + add rsi,0x40 + vextracti32x4 xmm8,zmm1,0x3 + vmovdqa xmm0,xmm12 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_4_remain_EmbgEptodyewbFa: + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + jmp NEAR $L$_ret_EmbgEptodyewbFa + +$L$_remaining_num_blocks_is_3_EmbgEptodyewbFa: + vmovdqu xmm1,XMMWORD[rdi] + vmovdqu xmm2,XMMWORD[16+rdi] + vmovdqu xmm3,XMMWORD[32+rdi] + add rdi,0x30 + and rdx,0xf + je NEAR $L$_done_3_remain_EmbgEptodyewbFa + vextracti32x4 xmm13,zmm9,0x2 + vextracti32x4 xmm10,zmm9,0x1 + vextracti32x4 xmm11,zmm9,0x3 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + add rsi,0x30 + vmovdqa xmm8,xmm3 + vmovdqa xmm0,xmm13 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_3_remain_EmbgEptodyewbFa: + vextracti32x4 xmm10,zmm9,0x1 + vextracti32x4 xmm11,zmm9,0x2 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + jmp NEAR $L$_ret_EmbgEptodyewbFa + +$L$_remaining_num_blocks_is_2_EmbgEptodyewbFa: + vmovdqu xmm1,XMMWORD[rdi] + vmovdqu xmm2,XMMWORD[16+rdi] + add rdi,0x20 + and rdx,0xf + je NEAR $L$_done_2_remain_EmbgEptodyewbFa + vextracti32x4 xmm10,zmm9,0x2 + vextracti32x4 xmm12,zmm9,0x1 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + add rsi,0x20 + vmovdqa xmm8,xmm2 + vmovdqa xmm0,xmm12 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_2_remain_EmbgEptodyewbFa: + vextracti32x4 xmm10,zmm9,0x1 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + jmp NEAR $L$_ret_EmbgEptodyewbFa + +$L$_remaining_num_blocks_is_1_EmbgEptodyewbFa: + vmovdqu xmm1,XMMWORD[rdi] + add rdi,0x10 + and rdx,0xf + je NEAR $L$_done_1_remain_EmbgEptodyewbFa + vextracti32x4 xmm11,zmm9,0x1 + vpxor xmm1,xmm1,xmm11 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vpxor xmm1,xmm1,xmm11 + vmovdqu XMMWORD[rsi],xmm1 + add rsi,0x10 + vmovdqa xmm8,xmm1 + vmovdqa xmm0,xmm9 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_1_remain_EmbgEptodyewbFa: + vpxor xmm1,xmm1,xmm9 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vpxor xmm1,xmm1,xmm9 + vmovdqu XMMWORD[rsi],xmm1 + jmp NEAR $L$_ret_EmbgEptodyewbFa + +$L$_start_by16_EmbgEptodyewbFa: + vbroadcasti32x4 zmm0,ZMMWORD[rsp] + vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7] + mov r8,0xaa + kmovq k2,r8 + + + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x0 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + + + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x0 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + + + vpsrldq zmm13,zmm9,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm11,zmm9,0x1 + vpxord zmm11,zmm11,zmm14 + + vpsrldq zmm15,zmm10,0xf + vpclmulqdq zmm16,zmm15,zmm25,0x0 + vpslldq zmm12,zmm10,0x1 + vpxord zmm12,zmm12,zmm16 + +$L$_main_loop_run_16_EmbgEptodyewbFa: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2,ZMMWORD[64+rdi] + vmovdqu8 zmm3,ZMMWORD[128+rdi] + vmovdqu8 zmm4,ZMMWORD[192+rdi] + vmovdqu8 xmm5,XMMWORD[240+rdi] + add rdi,0x100 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vpxorq zmm3,zmm3,zmm11 + vpxorq zmm4,zmm4,zmm12 + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vpxorq zmm3,zmm3,zmm0 + vpxorq zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm11,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm15,zmm11,0x1 + vpxord zmm15,zmm15,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm12,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm16,zmm12,0x1 + vpxord zmm16,zmm16,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm15,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm17,zmm15,0x1 + vpxord zmm17,zmm17,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vpsrldq zmm13,zmm16,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm18,zmm16,0x1 + vpxord zmm18,zmm18,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + vaesdeclast zmm3,zmm3,zmm0 + vaesdeclast zmm4,zmm4,zmm0 + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + vpxorq zmm3,zmm3,zmm11 + vpxorq zmm4,zmm4,zmm12 + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqa32 zmm11,zmm17 + vmovdqa32 zmm12,zmm18 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi],zmm2 + vmovdqu8 ZMMWORD[128+rsi],zmm3 + vmovdqu8 ZMMWORD[192+rsi],zmm4 + add rsi,0x100 + sub rdx,0x100 + cmp rdx,0x100 + jge NEAR $L$_main_loop_run_16_EmbgEptodyewbFa + + cmp rdx,0x80 + jge NEAR $L$_main_loop_run_8_EmbgEptodyewbFa + jmp NEAR $L$_do_n_blocks_EmbgEptodyewbFa + +$L$_start_by8_EmbgEptodyewbFa: + + vbroadcasti32x4 zmm0,ZMMWORD[rsp] + vbroadcasti32x4 zmm8,ZMMWORD[shufb_15_7] + mov r8,0xaa + kmovq k2,r8 + + + vpshufb zmm1,zmm0,zmm8 + vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] + vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] + vpclmulqdq zmm3,zmm2,zmm25,0x0 + vpxorq zmm4{k2},zmm4,zmm2 + vpxord zmm9,zmm3,zmm4 + + + vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] + vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] + vpclmulqdq zmm7,zmm6,zmm25,0x0 + vpxorq zmm5{k2},zmm5,zmm6 + vpxord zmm10,zmm7,zmm5 + +$L$_main_loop_run_8_EmbgEptodyewbFa: + vmovdqu8 zmm1,ZMMWORD[rdi] + vmovdqu8 zmm2,ZMMWORD[64+rdi] + vmovdqu8 xmm5,XMMWORD[112+rdi] + add rdi,0x80 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vbroadcasti32x4 zmm0,ZMMWORD[rcx] + vpxorq zmm1,zmm1,zmm0 + vpxorq zmm2,zmm2,zmm0 + vpsrldq zmm13,zmm9,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm15,zmm9,0x1 + vpxord zmm15,zmm15,zmm14 + vbroadcasti32x4 zmm0,ZMMWORD[16+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[32+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[48+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vpsrldq zmm13,zmm10,0xf + vpclmulqdq zmm14,zmm13,zmm25,0x0 + vpslldq zmm16,zmm10,0x1 + vpxord zmm16,zmm16,zmm14 + + vbroadcasti32x4 zmm0,ZMMWORD[64+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[80+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[96+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[112+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[128+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[144+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[160+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[176+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[192+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[208+rcx] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + + + vbroadcasti32x4 zmm0,ZMMWORD[224+rcx] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + + vpxorq zmm1,zmm1,zmm9 + vpxorq zmm2,zmm2,zmm10 + + + vmovdqa32 zmm9,zmm15 + vmovdqa32 zmm10,zmm16 + vmovdqu8 ZMMWORD[rsi],zmm1 + vmovdqu8 ZMMWORD[64+rsi],zmm2 + add rsi,0x80 + sub rdx,0x80 + cmp rdx,0x80 + jge NEAR $L$_main_loop_run_8_EmbgEptodyewbFa + jmp NEAR $L$_do_n_blocks_EmbgEptodyewbFa + +$L$_steal_cipher_EmbgEptodyewbFa: + + vmovdqa xmm2,xmm8 + + + lea rax,[vpshufb_shf_table] + vmovdqu xmm10,XMMWORD[rdx*1+rax] + vpshufb xmm8,xmm8,xmm10 + + + vmovdqu xmm3,XMMWORD[((-16))+rdx*1+rdi] + vmovdqu XMMWORD[(-16)+rdx*1+rsi],xmm8 + + + lea rax,[vpshufb_shf_table] + add rax,16 + sub rax,rdx + vmovdqu xmm10,XMMWORD[rax] + vpxor xmm10,xmm10,XMMWORD[mask1] + vpshufb xmm3,xmm3,xmm10 + + vpblendvb xmm3,xmm3,xmm2,xmm10 + + + vpxor xmm8,xmm3,xmm0 + + + vpxor xmm8,xmm8,XMMWORD[rcx] + vaesdec xmm8,xmm8,XMMWORD[16+rcx] + vaesdec xmm8,xmm8,XMMWORD[32+rcx] + vaesdec xmm8,xmm8,XMMWORD[48+rcx] + vaesdec xmm8,xmm8,XMMWORD[64+rcx] + vaesdec xmm8,xmm8,XMMWORD[80+rcx] + vaesdec xmm8,xmm8,XMMWORD[96+rcx] + vaesdec xmm8,xmm8,XMMWORD[112+rcx] + vaesdec xmm8,xmm8,XMMWORD[128+rcx] + vaesdec xmm8,xmm8,XMMWORD[144+rcx] + vaesdec xmm8,xmm8,XMMWORD[160+rcx] + vaesdec xmm8,xmm8,XMMWORD[176+rcx] + vaesdec xmm8,xmm8,XMMWORD[192+rcx] + vaesdec xmm8,xmm8,XMMWORD[208+rcx] + vaesdeclast xmm8,xmm8,XMMWORD[224+rcx] + + vpxor xmm8,xmm8,xmm0 + +$L$_done_EmbgEptodyewbFa: + + vmovdqu XMMWORD[(-16)+rsi],xmm8 +$L$_ret_EmbgEptodyewbFa: + mov rbx,QWORD[288+rsp] + xor r8,r8 + mov QWORD[288+rsp],r8 + + vpxorq zmm0,zmm0,zmm0 + mov rdi,QWORD[((288 + 8))+rsp] + mov QWORD[((288 + 8))+rsp],r8 + mov rsi,QWORD[((288 + 16))+rsp] + mov QWORD[((288 + 16))+rsp],r8 + + vmovdqa xmm6,XMMWORD[((128 + 0))+rsp] + vmovdqa xmm7,XMMWORD[((128 + 16))+rsp] + vmovdqa xmm8,XMMWORD[((128 + 32))+rsp] + vmovdqa xmm9,XMMWORD[((128 + 48))+rsp] + + + vmovdqa64 ZMMWORD[128+rsp],zmm0 + + vmovdqa xmm10,XMMWORD[((128 + 64))+rsp] + vmovdqa xmm11,XMMWORD[((128 + 80))+rsp] + vmovdqa xmm12,XMMWORD[((128 + 96))+rsp] + vmovdqa xmm13,XMMWORD[((128 + 112))+rsp] + + + vmovdqa64 ZMMWORD[(128 + 64)+rsp],zmm0 + + vmovdqa xmm14,XMMWORD[((128 + 128))+rsp] + vmovdqa xmm15,XMMWORD[((128 + 144))+rsp] + + + + vmovdqa YMMWORD[(128 + 128)+rsp],ymm0 + mov rsp,rbp + pop rbp + vzeroupper + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$_less_than_128_bytes_EmbgEptodyewbFa: + cmp rdx,0x10 + jb NEAR $L$_ret_EmbgEptodyewbFa + + mov r8,rdx + and r8,0x70 + cmp r8,0x60 + je NEAR $L$_num_blocks_is_6_EmbgEptodyewbFa + cmp r8,0x50 + je NEAR $L$_num_blocks_is_5_EmbgEptodyewbFa + cmp r8,0x40 + je NEAR $L$_num_blocks_is_4_EmbgEptodyewbFa + cmp r8,0x30 + je NEAR $L$_num_blocks_is_3_EmbgEptodyewbFa + cmp r8,0x20 + je NEAR $L$_num_blocks_is_2_EmbgEptodyewbFa + cmp r8,0x10 + je NEAR $L$_num_blocks_is_1_EmbgEptodyewbFa + +$L$_num_blocks_is_7_EmbgEptodyewbFa: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[32+rsp],rax + mov QWORD[((32 + 8))+rsp],rbx + vmovdqa xmm11,XMMWORD[32+rsp] + vmovdqu xmm3,XMMWORD[32+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[48+rsp],rax + mov QWORD[((48 + 8))+rsp],rbx + vmovdqa xmm12,XMMWORD[48+rsp] + vmovdqu xmm4,XMMWORD[48+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[64+rsp],rax + mov QWORD[((64 + 8))+rsp],rbx + vmovdqa xmm13,XMMWORD[64+rsp] + vmovdqu xmm5,XMMWORD[64+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[80+rsp],rax + mov QWORD[((80 + 8))+rsp],rbx + vmovdqa xmm14,XMMWORD[80+rsp] + vmovdqu xmm6,XMMWORD[80+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[96+rsp],rax + mov QWORD[((96 + 8))+rsp],rbx + vmovdqa xmm15,XMMWORD[96+rsp] + vmovdqu xmm7,XMMWORD[96+rdi] + add rdi,0x70 + and rdx,0xf + je NEAR $L$_done_7_EmbgEptodyewbFa + +$L$_steal_cipher_7_EmbgEptodyewbFa: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm16,xmm15 + vmovdqa xmm15,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vpxor xmm7,xmm7,xmm15 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vpxor xmm6,xmm6,xmm0 + vpxor xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 + vaesdeclast xmm7,xmm7,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vpxor xmm7,xmm7,xmm15 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + vmovdqu XMMWORD[64+rsi],xmm5 + vmovdqu XMMWORD[80+rsi],xmm6 + add rsi,0x70 + vmovdqa64 xmm0,xmm16 + vmovdqa xmm8,xmm7 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_7_EmbgEptodyewbFa: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vpxor xmm7,xmm7,xmm15 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vpxor xmm6,xmm6,xmm0 + vpxor xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 + vaesdeclast xmm7,xmm7,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vpxor xmm7,xmm7,xmm15 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + vmovdqu XMMWORD[64+rsi],xmm5 + vmovdqu XMMWORD[80+rsi],xmm6 + add rsi,0x70 + vmovdqa xmm8,xmm7 + jmp NEAR $L$_done_EmbgEptodyewbFa + +$L$_num_blocks_is_6_EmbgEptodyewbFa: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[32+rsp],rax + mov QWORD[((32 + 8))+rsp],rbx + vmovdqa xmm11,XMMWORD[32+rsp] + vmovdqu xmm3,XMMWORD[32+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[48+rsp],rax + mov QWORD[((48 + 8))+rsp],rbx + vmovdqa xmm12,XMMWORD[48+rsp] + vmovdqu xmm4,XMMWORD[48+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[64+rsp],rax + mov QWORD[((64 + 8))+rsp],rbx + vmovdqa xmm13,XMMWORD[64+rsp] + vmovdqu xmm5,XMMWORD[64+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[80+rsp],rax + mov QWORD[((80 + 8))+rsp],rbx + vmovdqa xmm14,XMMWORD[80+rsp] + vmovdqu xmm6,XMMWORD[80+rdi] + add rdi,0x60 + and rdx,0xf + je NEAR $L$_done_6_EmbgEptodyewbFa + +$L$_steal_cipher_6_EmbgEptodyewbFa: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm15,xmm14 + vmovdqa xmm14,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vpxor xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + vmovdqu XMMWORD[64+rsi],xmm5 + add rsi,0x60 + vmovdqa xmm0,xmm15 + vmovdqa xmm8,xmm6 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_6_EmbgEptodyewbFa: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vpxor xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vpxor xmm6,xmm6,xmm14 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + vmovdqu XMMWORD[64+rsi],xmm5 + add rsi,0x60 + vmovdqa xmm8,xmm6 + jmp NEAR $L$_done_EmbgEptodyewbFa + +$L$_num_blocks_is_5_EmbgEptodyewbFa: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[32+rsp],rax + mov QWORD[((32 + 8))+rsp],rbx + vmovdqa xmm11,XMMWORD[32+rsp] + vmovdqu xmm3,XMMWORD[32+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[48+rsp],rax + mov QWORD[((48 + 8))+rsp],rbx + vmovdqa xmm12,XMMWORD[48+rsp] + vmovdqu xmm4,XMMWORD[48+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[64+rsp],rax + mov QWORD[((64 + 8))+rsp],rbx + vmovdqa xmm13,XMMWORD[64+rsp] + vmovdqu xmm5,XMMWORD[64+rdi] + add rdi,0x50 + and rdx,0xf + je NEAR $L$_done_5_EmbgEptodyewbFa + +$L$_steal_cipher_5_EmbgEptodyewbFa: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm14,xmm13 + vmovdqa xmm13,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + add rsi,0x50 + vmovdqa xmm0,xmm14 + vmovdqa xmm8,xmm5 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_5_EmbgEptodyewbFa: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vpxor xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vpxor xmm5,xmm5,xmm13 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + vmovdqu XMMWORD[48+rsi],xmm4 + add rsi,0x50 + vmovdqa xmm8,xmm5 + jmp NEAR $L$_done_EmbgEptodyewbFa + +$L$_num_blocks_is_4_EmbgEptodyewbFa: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[32+rsp],rax + mov QWORD[((32 + 8))+rsp],rbx + vmovdqa xmm11,XMMWORD[32+rsp] + vmovdqu xmm3,XMMWORD[32+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[48+rsp],rax + mov QWORD[((48 + 8))+rsp],rbx + vmovdqa xmm12,XMMWORD[48+rsp] + vmovdqu xmm4,XMMWORD[48+rdi] + add rdi,0x40 + and rdx,0xf + je NEAR $L$_done_4_EmbgEptodyewbFa + +$L$_steal_cipher_4_EmbgEptodyewbFa: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm13,xmm12 + vmovdqa xmm12,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + add rsi,0x40 + vmovdqa xmm0,xmm13 + vmovdqa xmm8,xmm4 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_4_EmbgEptodyewbFa: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vpxor xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vpxor xmm4,xmm4,xmm12 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + vmovdqu XMMWORD[32+rsi],xmm3 + add rsi,0x40 + vmovdqa xmm8,xmm4 + jmp NEAR $L$_done_EmbgEptodyewbFa + +$L$_num_blocks_is_3_EmbgEptodyewbFa: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[32+rsp],rax + mov QWORD[((32 + 8))+rsp],rbx + vmovdqa xmm11,XMMWORD[32+rsp] + vmovdqu xmm3,XMMWORD[32+rdi] + add rdi,0x30 + and rdx,0xf + je NEAR $L$_done_3_EmbgEptodyewbFa + +$L$_steal_cipher_3_EmbgEptodyewbFa: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm12,xmm11 + vmovdqa xmm11,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + add rsi,0x30 + vmovdqa xmm0,xmm12 + vmovdqa xmm8,xmm3 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_3_EmbgEptodyewbFa: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vpxor xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vpxor xmm3,xmm3,xmm11 + vmovdqu XMMWORD[rsi],xmm1 + vmovdqu XMMWORD[16+rsi],xmm2 + add rsi,0x30 + vmovdqa xmm8,xmm3 + jmp NEAR $L$_done_EmbgEptodyewbFa + +$L$_num_blocks_is_2_EmbgEptodyewbFa: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[((16 + 8))+rsp],rbx + vmovdqa xmm10,XMMWORD[16+rsp] + vmovdqu xmm2,XMMWORD[16+rdi] + add rdi,0x20 + and rdx,0xf + je NEAR $L$_done_2_EmbgEptodyewbFa + +$L$_steal_cipher_2_EmbgEptodyewbFa: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm11,xmm10 + vmovdqa xmm10,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu XMMWORD[rsi],xmm1 + add rsi,0x20 + vmovdqa xmm0,xmm11 + vmovdqa xmm8,xmm2 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_2_EmbgEptodyewbFa: + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vpxor xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vpxor xmm1,xmm1,xmm9 + vpxor xmm2,xmm2,xmm10 + vmovdqu XMMWORD[rsi],xmm1 + add rsi,0x20 + vmovdqa xmm8,xmm2 + jmp NEAR $L$_done_EmbgEptodyewbFa + +$L$_num_blocks_is_1_EmbgEptodyewbFa: + vmovdqa xmm9,XMMWORD[rsp] + mov rax,QWORD[rsp] + mov rbx,QWORD[8+rsp] + vmovdqu xmm1,XMMWORD[rdi] + add rdi,0x10 + and rdx,0xf + je NEAR $L$_done_1_EmbgEptodyewbFa + +$L$_steal_cipher_1_EmbgEptodyewbFa: + xor r11,r11 + shl rax,1 + adc rbx,rbx + cmovc r11,r10 + xor rax,r11 + mov QWORD[16+rsp],rax + mov QWORD[24+rsp],rbx + vmovdqa64 xmm10,xmm9 + vmovdqa xmm9,XMMWORD[16+rsp] + vpxor xmm1,xmm1,xmm9 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vpxor xmm1,xmm1,xmm9 + add rsi,0x10 + vmovdqa xmm0,xmm10 + vmovdqa xmm8,xmm1 + jmp NEAR $L$_steal_cipher_EmbgEptodyewbFa + +$L$_done_1_EmbgEptodyewbFa: + vpxor xmm1,xmm1,xmm9 + vmovdqu xmm0,XMMWORD[rcx] + vpxor xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[16+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+rcx] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+rcx] + vaesdeclast xmm1,xmm1,xmm0 + vpxor xmm1,xmm1,xmm9 + add rsi,0x10 + vmovdqa xmm8,xmm1 + jmp NEAR $L$_done_EmbgEptodyewbFa + +section .rdata rdata align=8 +ALIGN 16 + +vpshufb_shf_table: + DQ 0x8786858483828100,0x8f8e8d8c8b8a8988 + DQ 0x0706050403020100,0x000e0d0c0b0a0908 + +mask1: + DQ 0x8080808080808080,0x8080808080808080 + +const_dq3210: + DQ 0,0,1,1,2,2,3,3 +const_dq5678: + DQ 8,8,7,7,6,6,5,5 +const_dq7654: + DQ 4,4,5,5,6,6,7,7 +const_dq1234: + DQ 4,4,3,3,2,2,1,1 + +shufb_15_7: +DB 15,0xff,0xff,0xff,0xff,0xff,0xff,0xff,7,0xff,0xff +DB 0xff,0xff,0xff,0xff,0xff + +section .text code align=64 + diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm new file mode 100644 index 0000000000..6c1ccfb458 --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-2k-avxifma.nasm @@ -0,0 +1,1276 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + +EXTERN OPENSSL_ia32cap_P +global ossl_rsaz_avxifma_eligible + +ALIGN 32 +ossl_rsaz_avxifma_eligible: + mov ecx,DWORD[((OPENSSL_ia32cap_P+20))] + xor eax,eax + and ecx,8388608 + cmp ecx,8388608 + cmove eax,ecx + DB 0F3h,0C3h ;repret + +section .text code align=64 + + +global ossl_rsaz_amm52x20_x1_avxifma256 + +ALIGN 32 +ossl_rsaz_amm52x20_x1_avxifma256: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ossl_rsaz_amm52x20_x1_avxifma256: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +DB 243,15,30,250 + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + +$L$ossl_rsaz_amm52x20_x1_avxifma256_body: + + + vpxor ymm0,ymm0,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + vmovapd ymm8,ymm0 + + xor r9d,r9d + + mov r11,rdx + mov rax,0xfffffffffffff + + + mov ebx,5 + +ALIGN 32 +$L$loop5: + mov r13,QWORD[r11] + + vpbroadcastq ymm1,QWORD[r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-168))+rsp] +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm5 + vmovdqu YMMWORD[64+rsp],ymm6 + vmovdqu YMMWORD[96+rsp],ymm7 + vmovdqu YMMWORD[128+rsp],ymm8 + mov QWORD[160+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm5,YMMWORD[40+rsp] + vmovdqu ymm6,YMMWORD[72+rsp] + vmovdqu ymm7,YMMWORD[104+rsp] + vmovdqu ymm8,YMMWORD[136+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8 + lea rsp,[168+rsp] + mov r13,QWORD[8+r11] + + vpbroadcastq ymm1,QWORD[8+r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-168))+rsp] +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm5 + vmovdqu YMMWORD[64+rsp],ymm6 + vmovdqu YMMWORD[96+rsp],ymm7 + vmovdqu YMMWORD[128+rsp],ymm8 + mov QWORD[160+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm5,YMMWORD[40+rsp] + vmovdqu ymm6,YMMWORD[72+rsp] + vmovdqu ymm7,YMMWORD[104+rsp] + vmovdqu ymm8,YMMWORD[136+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8 + lea rsp,[168+rsp] + mov r13,QWORD[16+r11] + + vpbroadcastq ymm1,QWORD[16+r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-168))+rsp] +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm5 + vmovdqu YMMWORD[64+rsp],ymm6 + vmovdqu YMMWORD[96+rsp],ymm7 + vmovdqu YMMWORD[128+rsp],ymm8 + mov QWORD[160+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm5,YMMWORD[40+rsp] + vmovdqu ymm6,YMMWORD[72+rsp] + vmovdqu ymm7,YMMWORD[104+rsp] + vmovdqu ymm8,YMMWORD[136+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8 + lea rsp,[168+rsp] + mov r13,QWORD[24+r11] + + vpbroadcastq ymm1,QWORD[24+r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-168))+rsp] +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm5 + vmovdqu YMMWORD[64+rsp],ymm6 + vmovdqu YMMWORD[96+rsp],ymm7 + vmovdqu YMMWORD[128+rsp],ymm8 + mov QWORD[160+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm5,YMMWORD[40+rsp] + vmovdqu ymm6,YMMWORD[72+rsp] + vmovdqu ymm7,YMMWORD[104+rsp] + vmovdqu ymm8,YMMWORD[136+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8 + lea rsp,[168+rsp] + lea r11,[32+r11] + dec ebx + jne NEAR $L$loop5 + + vmovq xmm0,r9 + vpbroadcastq ymm0,xmm0 + vpblendd ymm3,ymm3,ymm0,3 + + + + vpsrlq ymm0,ymm3,52 + vpsrlq ymm1,ymm5,52 + vpsrlq ymm2,ymm6,52 + vpsrlq ymm13,ymm7,52 + vpsrlq ymm14,ymm8,52 + + + vpermq ymm14,ymm14,144 + vpermq ymm15,ymm13,3 + vblendpd ymm14,ymm14,ymm15,1 + + vpermq ymm13,ymm13,144 + vpermq ymm15,ymm2,3 + vblendpd ymm13,ymm13,ymm15,1 + + vpermq ymm2,ymm2,144 + vpermq ymm15,ymm1,3 + vblendpd ymm2,ymm2,ymm15,1 + + vpermq ymm1,ymm1,144 + vpermq ymm15,ymm0,3 + vblendpd ymm1,ymm1,ymm15,1 + + vpermq ymm0,ymm0,144 + vpand ymm0,ymm0,YMMWORD[$L$high64x3] + + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + + + vpaddq ymm3,ymm3,ymm0 + vpaddq ymm5,ymm5,ymm1 + vpaddq ymm6,ymm6,ymm2 + vpaddq ymm7,ymm7,ymm13 + vpaddq ymm8,ymm8,ymm14 + + + + vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4] + vpcmpgtq ymm1,ymm5,YMMWORD[$L$mask52x4] + vpcmpgtq ymm2,ymm6,YMMWORD[$L$mask52x4] + vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4] + vpcmpgtq ymm14,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd r14d,ymm0 + vmovmskpd r13d,ymm1 + vmovmskpd r12d,ymm2 + vmovmskpd r11d,ymm13 + vmovmskpd r10d,ymm14 + + + vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4] + vpcmpeqq ymm1,ymm5,YMMWORD[$L$mask52x4] + vpcmpeqq ymm2,ymm6,YMMWORD[$L$mask52x4] + vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4] + vpcmpeqq ymm14,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm0 + vmovmskpd r8d,ymm1 + vmovmskpd ebx,ymm2 + vmovmskpd ecx,ymm13 + vmovmskpd edx,ymm14 + + + + shl r13b,4 + or r14b,r13b + shl r11b,4 + or r12b,r11b + + add r14b,r14b + adc r12b,r12b + adc r10b,r10b + + shl r8b,4 + or r9b,r8b + shl cl,4 + or bl,cl + + add r14b,r9b + adc r12b,bl + adc r10b,dl + + xor r14b,r9b + xor r12b,bl + xor r10b,dl + + lea rdx,[$L$kmasklut] + + mov r13b,r14b + and r14,0xf + vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4] + shl r14,5 + vmovapd r14,(%rdx), %ymm2 + vblendvpd ymm3,ymm3,ymm0,ymm2 + + shr r13b,4 + and r13,0xf + vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4] + shl r13,5 + vmovapd r13,(%rdx), %ymm2 + vblendvpd ymm5,ymm5,ymm0,ymm2 + + mov r11b,r12b + and r12,0xf + vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4] + shl r12,5 + vmovapd r12,(%rdx), %ymm2 + vblendvpd ymm6,ymm6,ymm0,ymm2 + + shr r11b,4 + and r11,0xf + vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4] + shl r11,5 + vmovapd r11,(%rdx), %ymm2 + vblendvpd ymm7,ymm7,ymm0,ymm2 + + and r10,0xf + vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm8,ymm8,ymm0,ymm2 + + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + + vmovdqu YMMWORD[rdi],ymm3 + vmovdqu YMMWORD[32+rdi],ymm5 + vmovdqu YMMWORD[64+rdi],ymm6 + vmovdqu YMMWORD[96+rdi],ymm7 + vmovdqu YMMWORD[128+rdi],ymm8 + + vzeroupper + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbp,QWORD[32+rsp] + + mov rbx,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$ossl_rsaz_amm52x20_x1_avxifma256_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_ossl_rsaz_amm52x20_x1_avxifma256: +section .rdata rdata align=32 +ALIGN 32 +$L$mask52x4: + DQ 0xfffffffffffff + DQ 0xfffffffffffff + DQ 0xfffffffffffff + DQ 0xfffffffffffff +$L$high64x3: + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff +$L$kmasklut: + + DQ 0x0 + DQ 0x0 + DQ 0x0 + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + DQ 0x0 + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0x0 + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff +section .text code align=64 + + +global ossl_rsaz_amm52x20_x2_avxifma256 + +ALIGN 32 +ossl_rsaz_amm52x20_x2_avxifma256: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ossl_rsaz_amm52x20_x2_avxifma256: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +DB 243,15,30,250 + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + +$L$ossl_rsaz_amm52x20_x2_avxifma256_body: + + + vpxor ymm0,ymm0,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + vmovapd ymm8,ymm0 + vmovapd ymm4,ymm0 + vmovapd ymm9,ymm0 + vmovapd ymm10,ymm0 + vmovapd ymm11,ymm0 + vmovapd ymm12,ymm0 + + xor r9d,r9d + xor r15d,r15d + + mov r11,rdx + mov rax,0xfffffffffffff + + mov ebx,20 + +ALIGN 32 +$L$loop20: + mov r13,QWORD[r11] + + vpbroadcastq ymm1,QWORD[r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,QWORD[r8] + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-168))+rsp] +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm8 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm5 + vmovdqu YMMWORD[64+rsp],ymm6 + vmovdqu YMMWORD[96+rsp],ymm7 + vmovdqu YMMWORD[128+rsp],ymm8 + mov QWORD[160+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm5,YMMWORD[40+rsp] + vmovdqu ymm6,YMMWORD[72+rsp] + vmovdqu ymm7,YMMWORD[104+rsp] + vmovdqu ymm8,YMMWORD[136+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm8 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm8 + lea rsp,[168+rsp] + mov r13,QWORD[160+r11] + + vpbroadcastq ymm1,QWORD[160+r11] + mov rdx,QWORD[160+rsi] + mulx r12,r13,r13 + add r15,r13 + mov r10,r12 + adc r10,0 + + mov r13,QWORD[8+r8] + imul r13,r15 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[160+rcx] + mulx r12,r13,r13 + add r15,r13 + adc r10,r12 + + shr r15,52 + sal r10,12 + or r15,r10 + + lea rsp,[((-168))+rsp] +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + + + vmovdqu YMMWORD[rsp],ymm4 + vmovdqu YMMWORD[32+rsp],ymm9 + vmovdqu YMMWORD[64+rsp],ymm10 + vmovdqu YMMWORD[96+rsp],ymm11 + vmovdqu YMMWORD[128+rsp],ymm12 + mov QWORD[160+rsp],0 + + vmovdqu ymm4,YMMWORD[8+rsp] + vmovdqu ymm9,YMMWORD[40+rsp] + vmovdqu ymm10,YMMWORD[72+rsp] + vmovdqu ymm11,YMMWORD[104+rsp] + vmovdqu ymm12,YMMWORD[136+rsp] + + add r15,QWORD[8+rsp] + +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + lea rsp,[168+rsp] + lea r11,[8+r11] + dec ebx + jne NEAR $L$loop20 + + vmovq xmm0,r9 + vpbroadcastq ymm0,xmm0 + vpblendd ymm3,ymm3,ymm0,3 + + + + vpsrlq ymm0,ymm3,52 + vpsrlq ymm1,ymm5,52 + vpsrlq ymm2,ymm6,52 + vpsrlq ymm13,ymm7,52 + vpsrlq ymm14,ymm8,52 + + + vpermq ymm14,ymm14,144 + vpermq ymm15,ymm13,3 + vblendpd ymm14,ymm14,ymm15,1 + + vpermq ymm13,ymm13,144 + vpermq ymm15,ymm2,3 + vblendpd ymm13,ymm13,ymm15,1 + + vpermq ymm2,ymm2,144 + vpermq ymm15,ymm1,3 + vblendpd ymm2,ymm2,ymm15,1 + + vpermq ymm1,ymm1,144 + vpermq ymm15,ymm0,3 + vblendpd ymm1,ymm1,ymm15,1 + + vpermq ymm0,ymm0,144 + vpand ymm0,ymm0,YMMWORD[$L$high64x3] + + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + + + vpaddq ymm3,ymm3,ymm0 + vpaddq ymm5,ymm5,ymm1 + vpaddq ymm6,ymm6,ymm2 + vpaddq ymm7,ymm7,ymm13 + vpaddq ymm8,ymm8,ymm14 + + + + vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4] + vpcmpgtq ymm1,ymm5,YMMWORD[$L$mask52x4] + vpcmpgtq ymm2,ymm6,YMMWORD[$L$mask52x4] + vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4] + vpcmpgtq ymm14,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd r14d,ymm0 + vmovmskpd r13d,ymm1 + vmovmskpd r12d,ymm2 + vmovmskpd r11d,ymm13 + vmovmskpd r10d,ymm14 + + + vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4] + vpcmpeqq ymm1,ymm5,YMMWORD[$L$mask52x4] + vpcmpeqq ymm2,ymm6,YMMWORD[$L$mask52x4] + vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4] + vpcmpeqq ymm14,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm0 + vmovmskpd r8d,ymm1 + vmovmskpd ebx,ymm2 + vmovmskpd ecx,ymm13 + vmovmskpd edx,ymm14 + + + + shl r13b,4 + or r14b,r13b + shl r11b,4 + or r12b,r11b + + add r14b,r14b + adc r12b,r12b + adc r10b,r10b + + shl r8b,4 + or r9b,r8b + shl cl,4 + or bl,cl + + add r14b,r9b + adc r12b,bl + adc r10b,dl + + xor r14b,r9b + xor r12b,bl + xor r10b,dl + + lea rdx,[$L$kmasklut] + + mov r13b,r14b + and r14,0xf + vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4] + shl r14,5 + vmovapd r14,(%rdx), %ymm2 + vblendvpd ymm3,ymm3,ymm0,ymm2 + + shr r13b,4 + and r13,0xf + vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4] + shl r13,5 + vmovapd r13,(%rdx), %ymm2 + vblendvpd ymm5,ymm5,ymm0,ymm2 + + mov r11b,r12b + and r12,0xf + vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4] + shl r12,5 + vmovapd r12,(%rdx), %ymm2 + vblendvpd ymm6,ymm6,ymm0,ymm2 + + shr r11b,4 + and r11,0xf + vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4] + shl r11,5 + vmovapd r11,(%rdx), %ymm2 + vblendvpd ymm7,ymm7,ymm0,ymm2 + + and r10,0xf + vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm8,ymm8,ymm0,ymm2 + + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + + vmovq xmm0,r15 + vpbroadcastq ymm0,xmm0 + vpblendd ymm4,ymm4,ymm0,3 + + + + vpsrlq ymm0,ymm4,52 + vpsrlq ymm1,ymm9,52 + vpsrlq ymm2,ymm10,52 + vpsrlq ymm13,ymm11,52 + vpsrlq ymm14,ymm12,52 + + + vpermq ymm14,ymm14,144 + vpermq ymm15,ymm13,3 + vblendpd ymm14,ymm14,ymm15,1 + + vpermq ymm13,ymm13,144 + vpermq ymm15,ymm2,3 + vblendpd ymm13,ymm13,ymm15,1 + + vpermq ymm2,ymm2,144 + vpermq ymm15,ymm1,3 + vblendpd ymm2,ymm2,ymm15,1 + + vpermq ymm1,ymm1,144 + vpermq ymm15,ymm0,3 + vblendpd ymm1,ymm1,ymm15,1 + + vpermq ymm0,ymm0,144 + vpand ymm0,ymm0,YMMWORD[$L$high64x3] + + + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + vpand ymm11,ymm11,YMMWORD[$L$mask52x4] + vpand ymm12,ymm12,YMMWORD[$L$mask52x4] + + + vpaddq ymm4,ymm4,ymm0 + vpaddq ymm9,ymm9,ymm1 + vpaddq ymm10,ymm10,ymm2 + vpaddq ymm11,ymm11,ymm13 + vpaddq ymm12,ymm12,ymm14 + + + + vpcmpgtq ymm0,ymm4,YMMWORD[$L$mask52x4] + vpcmpgtq ymm1,ymm9,YMMWORD[$L$mask52x4] + vpcmpgtq ymm2,ymm10,YMMWORD[$L$mask52x4] + vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4] + vpcmpgtq ymm14,ymm12,YMMWORD[$L$mask52x4] + vmovmskpd r14d,ymm0 + vmovmskpd r13d,ymm1 + vmovmskpd r12d,ymm2 + vmovmskpd r11d,ymm13 + vmovmskpd r10d,ymm14 + + + vpcmpeqq ymm0,ymm4,YMMWORD[$L$mask52x4] + vpcmpeqq ymm1,ymm9,YMMWORD[$L$mask52x4] + vpcmpeqq ymm2,ymm10,YMMWORD[$L$mask52x4] + vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4] + vpcmpeqq ymm14,ymm12,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm0 + vmovmskpd r8d,ymm1 + vmovmskpd ebx,ymm2 + vmovmskpd ecx,ymm13 + vmovmskpd edx,ymm14 + + + + shl r13b,4 + or r14b,r13b + shl r11b,4 + or r12b,r11b + + add r14b,r14b + adc r12b,r12b + adc r10b,r10b + + shl r8b,4 + or r9b,r8b + shl cl,4 + or bl,cl + + add r14b,r9b + adc r12b,bl + adc r10b,dl + + xor r14b,r9b + xor r12b,bl + xor r10b,dl + + lea rdx,[$L$kmasklut] + + mov r13b,r14b + and r14,0xf + vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4] + shl r14,5 + vmovapd r14,(%rdx), %ymm2 + vblendvpd ymm4,ymm4,ymm0,ymm2 + + shr r13b,4 + and r13,0xf + vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4] + shl r13,5 + vmovapd r13,(%rdx), %ymm2 + vblendvpd ymm9,ymm9,ymm0,ymm2 + + mov r11b,r12b + and r12,0xf + vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4] + shl r12,5 + vmovapd r12,(%rdx), %ymm2 + vblendvpd ymm10,ymm10,ymm0,ymm2 + + shr r11b,4 + and r11,0xf + vpsubq ymm0,ymm11,YMMWORD[$L$mask52x4] + shl r11,5 + vmovapd r11,(%rdx), %ymm2 + vblendvpd ymm11,ymm11,ymm0,ymm2 + + and r10,0xf + vpsubq ymm0,ymm12,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm12,ymm12,ymm0,ymm2 + + + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + vpand ymm11,ymm11,YMMWORD[$L$mask52x4] + vpand ymm12,ymm12,YMMWORD[$L$mask52x4] + + vmovdqu YMMWORD[rdi],ymm3 + vmovdqu YMMWORD[32+rdi],ymm5 + vmovdqu YMMWORD[64+rdi],ymm6 + vmovdqu YMMWORD[96+rdi],ymm7 + vmovdqu YMMWORD[128+rdi],ymm8 + + vmovdqu YMMWORD[160+rdi],ymm4 + vmovdqu YMMWORD[192+rdi],ymm9 + vmovdqu YMMWORD[224+rdi],ymm10 + vmovdqu YMMWORD[256+rdi],ymm11 + vmovdqu YMMWORD[288+rdi],ymm12 + + vzeroupper + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbp,QWORD[32+rsp] + + mov rbx,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$ossl_rsaz_amm52x20_x2_avxifma256_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_ossl_rsaz_amm52x20_x2_avxifma256: +section .text code align=64 + + +ALIGN 32 +global ossl_extract_multiplier_2x20_win5_avx + +ossl_extract_multiplier_2x20_win5_avx: + +DB 243,15,30,250 + vmovapd ymm14,YMMWORD[$L$ones] + vmovq xmm10,r8 + vpbroadcastq ymm12,xmm10 + vmovq xmm10,r9 + vpbroadcastq ymm13,xmm10 + lea rax,[10240+rdx] + + + vpxor xmm0,xmm0,xmm0 + vmovapd ymm11,ymm0 + vmovapd ymm1,ymm0 + vmovapd ymm2,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm4,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + vmovapd ymm8,ymm0 + vmovapd ymm9,ymm0 + +ALIGN 32 +$L$loop: + vpcmpeqq ymm15,ymm12,ymm11 + vmovdqu ymm10,YMMWORD[rdx] + vblendvpd ymm0,ymm0,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[32+rdx] + vblendvpd ymm1,ymm1,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[64+rdx] + vblendvpd ymm2,ymm2,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[96+rdx] + vblendvpd ymm3,ymm3,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[128+rdx] + vblendvpd ymm4,ymm4,ymm10,ymm15 + vpcmpeqq ymm15,ymm13,ymm11 + vmovdqu ymm10,YMMWORD[160+rdx] + vblendvpd ymm5,ymm5,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[192+rdx] + vblendvpd ymm6,ymm6,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[224+rdx] + vblendvpd ymm7,ymm7,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[256+rdx] + vblendvpd ymm8,ymm8,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[288+rdx] + vblendvpd ymm9,ymm9,ymm10,ymm15 + vpaddq ymm11,ymm11,ymm14 + add rdx,320 + cmp rax,rdx + jne NEAR $L$loop + vmovdqu YMMWORD[rcx],ymm0 + vmovdqu YMMWORD[32+rcx],ymm1 + vmovdqu YMMWORD[64+rcx],ymm2 + vmovdqu YMMWORD[96+rcx],ymm3 + vmovdqu YMMWORD[128+rcx],ymm4 + vmovdqu YMMWORD[160+rcx],ymm5 + vmovdqu YMMWORD[192+rcx],ymm6 + vmovdqu YMMWORD[224+rcx],ymm7 + vmovdqu YMMWORD[256+rcx],ymm8 + vmovdqu YMMWORD[288+rcx],ymm9 + DB 0F3h,0C3h ;repret + + +section .rdata rdata align=32 +ALIGN 32 +$L$ones: + DQ 1,1,1,1 +$L$zeros: + DQ 0,0,0,0 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +rsaz_def_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rax,[48+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_ossl_rsaz_amm52x20_x1_avxifma256 wrt ..imagebase + DD $L$SEH_end_ossl_rsaz_amm52x20_x1_avxifma256 wrt ..imagebase + DD $L$SEH_info_ossl_rsaz_amm52x20_x1_avxifma256 wrt ..imagebase + + DD $L$SEH_begin_ossl_rsaz_amm52x20_x2_avxifma256 wrt ..imagebase + DD $L$SEH_end_ossl_rsaz_amm52x20_x2_avxifma256 wrt ..imagebase + DD $L$SEH_info_ossl_rsaz_amm52x20_x2_avxifma256 wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_ossl_rsaz_amm52x20_x1_avxifma256: +DB 9,0,0,0 + DD rsaz_def_handler wrt ..imagebase + DD $L$ossl_rsaz_amm52x20_x1_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x20_x1_avxifma256_epilogue wrt ..imagebase +$L$SEH_info_ossl_rsaz_amm52x20_x2_avxifma256: +DB 9,0,0,0 + DD rsaz_def_handler wrt ..imagebase + DD $L$ossl_rsaz_amm52x20_x2_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x20_x2_avxifma256_epilogue wrt ..imagebase diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm new file mode 100644 index 0000000000..c9e1700b25 --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-3k-avxifma.nasm @@ -0,0 +1,1927 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +global ossl_rsaz_amm52x30_x1_avxifma256 + +ALIGN 32 +ossl_rsaz_amm52x30_x1_avxifma256: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ossl_rsaz_amm52x30_x1_avxifma256: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +DB 243,15,30,250 + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,[((-168))+rsp] + vmovapd XMMWORD[rsp],xmm6 + vmovapd XMMWORD[16+rsp],xmm7 + vmovapd XMMWORD[32+rsp],xmm8 + vmovapd XMMWORD[48+rsp],xmm9 + vmovapd XMMWORD[64+rsp],xmm10 + vmovapd XMMWORD[80+rsp],xmm11 + vmovapd XMMWORD[96+rsp],xmm12 + vmovapd XMMWORD[112+rsp],xmm13 + vmovapd XMMWORD[128+rsp],xmm14 + vmovapd XMMWORD[144+rsp],xmm15 +$L$ossl_rsaz_amm52x30_x1_avxifma256_body: + + vpxor ymm0,ymm0,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm4,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + vmovapd ymm8,ymm0 + vmovapd ymm9,ymm0 + vmovapd ymm10,ymm0 + + xor r9d,r9d + + mov r11,rdx + mov rax,0xfffffffffffff + + + mov ebx,7 + +ALIGN 32 +$L$loop7: + mov r13,QWORD[r11] + + vpbroadcastq ymm1,QWORD[r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-264))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + mov QWORD[256+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + lea rsp,[264+rsp] + mov r13,QWORD[8+r11] + + vpbroadcastq ymm1,QWORD[8+r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-264))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + mov QWORD[256+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + lea rsp,[264+rsp] + mov r13,QWORD[16+r11] + + vpbroadcastq ymm1,QWORD[16+r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-264))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + mov QWORD[256+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + lea rsp,[264+rsp] + mov r13,QWORD[24+r11] + + vpbroadcastq ymm1,QWORD[24+r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-264))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + mov QWORD[256+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + lea rsp,[264+rsp] + lea r11,[32+r11] + dec ebx + jne NEAR $L$loop7 + mov r13,QWORD[r11] + + vpbroadcastq ymm1,QWORD[r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-264))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + mov QWORD[256+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + lea rsp,[264+rsp] + mov r13,QWORD[8+r11] + + vpbroadcastq ymm1,QWORD[8+r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-264))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + mov QWORD[256+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + lea rsp,[264+rsp] + + vmovq xmm0,r9 + vpbroadcastq ymm0,xmm0 + vpblendd ymm3,ymm3,ymm0,3 + + + + vpsrlq ymm0,ymm3,52 + vpsrlq ymm1,ymm4,52 + vpsrlq ymm2,ymm5,52 + vpsrlq ymm11,ymm6,52 + vpsrlq ymm12,ymm7,52 + vpsrlq ymm13,ymm8,52 + vpsrlq ymm14,ymm9,52 + vpsrlq ymm15,ymm10,52 + + lea rsp,[((-32))+rsp] + vmovupd YMMWORD[rsp],ymm3 + + + vpermq ymm15,ymm15,144 + vpermq ymm3,ymm14,3 + vblendpd ymm15,ymm15,ymm3,1 + + vpermq ymm14,ymm14,144 + vpermq ymm3,ymm13,3 + vblendpd ymm14,ymm14,ymm3,1 + + vpermq ymm13,ymm13,144 + vpermq ymm3,ymm12,3 + vblendpd ymm13,ymm13,ymm3,1 + + vpermq ymm12,ymm12,144 + vpermq ymm3,ymm11,3 + vblendpd ymm12,ymm12,ymm3,1 + + vpermq ymm11,ymm11,144 + vpermq ymm3,ymm2,3 + vblendpd ymm11,ymm11,ymm3,1 + + vpermq ymm2,ymm2,144 + vpermq ymm3,ymm1,3 + vblendpd ymm2,ymm2,ymm3,1 + + vpermq ymm1,ymm1,144 + vpermq ymm3,ymm0,3 + vblendpd ymm1,ymm1,ymm3,1 + + vpermq ymm0,ymm0,144 + vpand ymm0,ymm0,YMMWORD[$L$high64x3] + + vmovupd ymm3,YMMWORD[rsp] + lea rsp,[32+rsp] + + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + + + vpaddq ymm3,ymm3,ymm0 + vpaddq ymm4,ymm4,ymm1 + vpaddq ymm5,ymm5,ymm2 + vpaddq ymm6,ymm6,ymm11 + vpaddq ymm7,ymm7,ymm12 + vpaddq ymm8,ymm8,ymm13 + vpaddq ymm9,ymm9,ymm14 + vpaddq ymm10,ymm10,ymm15 + + + + vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4] + vpcmpgtq ymm1,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r14d,ymm0 + vmovmskpd r13d,ymm1 + shl r13b,4 + or r14b,r13b + + vpcmpgtq ymm2,ymm5,YMMWORD[$L$mask52x4] + vpcmpgtq ymm11,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd r13d,ymm2 + vmovmskpd r12d,ymm11 + shl r12b,4 + or r13b,r12b + + vpcmpgtq ymm12,ymm7,YMMWORD[$L$mask52x4] + vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd r12d,ymm12 + vmovmskpd r11d,ymm13 + shl r11b,4 + or r12b,r11b + + vpcmpgtq ymm14,ymm9,YMMWORD[$L$mask52x4] + vpcmpgtq ymm15,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd r11d,ymm14 + vmovmskpd r10d,ymm15 + shl r10b,4 + or r11b,r10b + + add r14b,r14b + adc r13b,r13b + adc r12b,r12b + adc r11b,r11b + + + vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4] + vpcmpeqq ymm1,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm0 + vmovmskpd r8d,ymm1 + shl r8b,4 + or r9b,r8b + + vpcmpeqq ymm2,ymm5,YMMWORD[$L$mask52x4] + vpcmpeqq ymm11,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd r8d,ymm2 + vmovmskpd edx,ymm11 + shl dl,4 + or r8b,dl + + vpcmpeqq ymm12,ymm7,YMMWORD[$L$mask52x4] + vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd edx,ymm12 + vmovmskpd ecx,ymm13 + shl cl,4 + or dl,cl + + vpcmpeqq ymm14,ymm9,YMMWORD[$L$mask52x4] + vpcmpeqq ymm15,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd ecx,ymm14 + vmovmskpd ebx,ymm15 + shl bl,4 + or cl,bl + + add r14b,r9b + adc r13b,r8b + adc r12b,dl + adc r11b,cl + + xor r14b,r9b + xor r13b,r8b + xor r12b,dl + xor r11b,cl + + lea rdx,[$L$kmasklut] + + mov r10b,r14b + and r14,0xf + vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4] + shl r14,5 + vmovapd r14,(%rdx), %ymm2 + vblendvpd ymm3,ymm3,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm4,ymm4,ymm0,ymm2 + + mov r10b,r13b + and r13,0xf + vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4] + shl r13,5 + vmovapd r13,(%rdx), %ymm2 + vblendvpd ymm5,ymm5,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm6,ymm6,ymm0,ymm2 + + mov r10b,r12b + and r12,0xf + vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4] + shl r12,5 + vmovapd r12,(%rdx), %ymm2 + vblendvpd ymm7,ymm7,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm8,ymm8,ymm0,ymm2 + + mov r10b,r11b + and r11,0xf + vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4] + shl r11,5 + vmovapd r11,(%rdx), %ymm2 + vblendvpd ymm9,ymm9,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm10,ymm10,ymm0,ymm2 + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + + vmovdqu YMMWORD[rdi],ymm3 + vmovdqu YMMWORD[32+rdi],ymm4 + vmovdqu YMMWORD[64+rdi],ymm5 + vmovdqu YMMWORD[96+rdi],ymm6 + vmovdqu YMMWORD[128+rdi],ymm7 + vmovdqu YMMWORD[160+rdi],ymm8 + vmovdqu YMMWORD[192+rdi],ymm9 + vmovdqu YMMWORD[224+rdi],ymm10 + + vzeroupper + lea rax,[rsp] + + vmovapd xmm6,XMMWORD[rax] + vmovapd xmm7,XMMWORD[16+rax] + vmovapd xmm8,XMMWORD[32+rax] + vmovapd xmm9,XMMWORD[48+rax] + vmovapd xmm10,XMMWORD[64+rax] + vmovapd xmm11,XMMWORD[80+rax] + vmovapd xmm12,XMMWORD[96+rax] + vmovapd xmm13,XMMWORD[112+rax] + vmovapd xmm14,XMMWORD[128+rax] + vmovapd xmm15,XMMWORD[144+rax] + lea rax,[168+rsp] + mov r15,QWORD[rax] + + mov r14,QWORD[8+rax] + + mov r13,QWORD[16+rax] + + mov r12,QWORD[24+rax] + + mov rbp,QWORD[32+rax] + + mov rbx,QWORD[40+rax] + + lea rsp,[48+rax] + +$L$ossl_rsaz_amm52x30_x1_avxifma256_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_ossl_rsaz_amm52x30_x1_avxifma256: +section .rdata rdata align=32 +ALIGN 32 +$L$mask52x4: + DQ 0xfffffffffffff + DQ 0xfffffffffffff + DQ 0xfffffffffffff + DQ 0xfffffffffffff +$L$high64x3: + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff +$L$kmasklut: + + DQ 0x0 + DQ 0x0 + DQ 0x0 + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + DQ 0x0 + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0x0 + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff +section .text code align=64 + + +global ossl_rsaz_amm52x30_x2_avxifma256 + +ALIGN 32 +ossl_rsaz_amm52x30_x2_avxifma256: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ossl_rsaz_amm52x30_x2_avxifma256: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +DB 243,15,30,250 + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,[((-168))+rsp] + vmovapd XMMWORD[rsp],xmm6 + vmovapd XMMWORD[16+rsp],xmm7 + vmovapd XMMWORD[32+rsp],xmm8 + vmovapd XMMWORD[48+rsp],xmm9 + vmovapd XMMWORD[64+rsp],xmm10 + vmovapd XMMWORD[80+rsp],xmm11 + vmovapd XMMWORD[96+rsp],xmm12 + vmovapd XMMWORD[112+rsp],xmm13 + vmovapd XMMWORD[128+rsp],xmm14 + vmovapd XMMWORD[144+rsp],xmm15 +$L$ossl_rsaz_amm52x30_x2_avxifma256_body: + + vpxor ymm0,ymm0,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm4,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + vmovapd ymm8,ymm0 + vmovapd ymm9,ymm0 + vmovapd ymm10,ymm0 + + xor r9d,r9d + + mov r11,rdx + mov rax,0xfffffffffffff + + mov ebx,30 + +ALIGN 32 +$L$loop30: + mov r13,QWORD[r11] + + vpbroadcastq ymm1,QWORD[r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,QWORD[r8] + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-264))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + mov QWORD[256+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 + + lea rsp,[264+rsp] + lea r11,[8+r11] + dec ebx + jne NEAR $L$loop30 + + push r11 + push rsi + push rcx + push r8 + + vmovq xmm0,r9 + vpbroadcastq ymm0,xmm0 + vpblendd ymm3,ymm3,ymm0,3 + + + + vpsrlq ymm0,ymm3,52 + vpsrlq ymm1,ymm4,52 + vpsrlq ymm2,ymm5,52 + vpsrlq ymm11,ymm6,52 + vpsrlq ymm12,ymm7,52 + vpsrlq ymm13,ymm8,52 + vpsrlq ymm14,ymm9,52 + vpsrlq ymm15,ymm10,52 + + lea rsp,[((-32))+rsp] + vmovupd YMMWORD[rsp],ymm3 + + + vpermq ymm15,ymm15,144 + vpermq ymm3,ymm14,3 + vblendpd ymm15,ymm15,ymm3,1 + + vpermq ymm14,ymm14,144 + vpermq ymm3,ymm13,3 + vblendpd ymm14,ymm14,ymm3,1 + + vpermq ymm13,ymm13,144 + vpermq ymm3,ymm12,3 + vblendpd ymm13,ymm13,ymm3,1 + + vpermq ymm12,ymm12,144 + vpermq ymm3,ymm11,3 + vblendpd ymm12,ymm12,ymm3,1 + + vpermq ymm11,ymm11,144 + vpermq ymm3,ymm2,3 + vblendpd ymm11,ymm11,ymm3,1 + + vpermq ymm2,ymm2,144 + vpermq ymm3,ymm1,3 + vblendpd ymm2,ymm2,ymm3,1 + + vpermq ymm1,ymm1,144 + vpermq ymm3,ymm0,3 + vblendpd ymm1,ymm1,ymm3,1 + + vpermq ymm0,ymm0,144 + vpand ymm0,ymm0,YMMWORD[$L$high64x3] + + vmovupd ymm3,YMMWORD[rsp] + lea rsp,[32+rsp] + + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + + + vpaddq ymm3,ymm3,ymm0 + vpaddq ymm4,ymm4,ymm1 + vpaddq ymm5,ymm5,ymm2 + vpaddq ymm6,ymm6,ymm11 + vpaddq ymm7,ymm7,ymm12 + vpaddq ymm8,ymm8,ymm13 + vpaddq ymm9,ymm9,ymm14 + vpaddq ymm10,ymm10,ymm15 + + + + vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4] + vpcmpgtq ymm1,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r14d,ymm0 + vmovmskpd r13d,ymm1 + shl r13b,4 + or r14b,r13b + + vpcmpgtq ymm2,ymm5,YMMWORD[$L$mask52x4] + vpcmpgtq ymm11,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd r13d,ymm2 + vmovmskpd r12d,ymm11 + shl r12b,4 + or r13b,r12b + + vpcmpgtq ymm12,ymm7,YMMWORD[$L$mask52x4] + vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd r12d,ymm12 + vmovmskpd r11d,ymm13 + shl r11b,4 + or r12b,r11b + + vpcmpgtq ymm14,ymm9,YMMWORD[$L$mask52x4] + vpcmpgtq ymm15,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd r11d,ymm14 + vmovmskpd r10d,ymm15 + shl r10b,4 + or r11b,r10b + + add r14b,r14b + adc r13b,r13b + adc r12b,r12b + adc r11b,r11b + + + vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4] + vpcmpeqq ymm1,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm0 + vmovmskpd r8d,ymm1 + shl r8b,4 + or r9b,r8b + + vpcmpeqq ymm2,ymm5,YMMWORD[$L$mask52x4] + vpcmpeqq ymm11,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd r8d,ymm2 + vmovmskpd edx,ymm11 + shl dl,4 + or r8b,dl + + vpcmpeqq ymm12,ymm7,YMMWORD[$L$mask52x4] + vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd edx,ymm12 + vmovmskpd ecx,ymm13 + shl cl,4 + or dl,cl + + vpcmpeqq ymm14,ymm9,YMMWORD[$L$mask52x4] + vpcmpeqq ymm15,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd ecx,ymm14 + vmovmskpd ebx,ymm15 + shl bl,4 + or cl,bl + + add r14b,r9b + adc r13b,r8b + adc r12b,dl + adc r11b,cl + + xor r14b,r9b + xor r13b,r8b + xor r12b,dl + xor r11b,cl + + lea rdx,[$L$kmasklut] + + mov r10b,r14b + and r14,0xf + vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4] + shl r14,5 + vmovapd r14,(%rdx), %ymm2 + vblendvpd ymm3,ymm3,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm4,ymm4,ymm0,ymm2 + + mov r10b,r13b + and r13,0xf + vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4] + shl r13,5 + vmovapd r13,(%rdx), %ymm2 + vblendvpd ymm5,ymm5,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm6,ymm6,ymm0,ymm2 + + mov r10b,r12b + and r12,0xf + vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4] + shl r12,5 + vmovapd r12,(%rdx), %ymm2 + vblendvpd ymm7,ymm7,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm8,ymm8,ymm0,ymm2 + + mov r10b,r11b + and r11,0xf + vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4] + shl r11,5 + vmovapd r11,(%rdx), %ymm2 + vblendvpd ymm9,ymm9,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm10,ymm10,ymm0,ymm2 + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + pop r8 + pop rcx + pop rsi + pop r11 + + vmovdqu YMMWORD[rdi],ymm3 + vmovdqu YMMWORD[32+rdi],ymm4 + vmovdqu YMMWORD[64+rdi],ymm5 + vmovdqu YMMWORD[96+rdi],ymm6 + vmovdqu YMMWORD[128+rdi],ymm7 + vmovdqu YMMWORD[160+rdi],ymm8 + vmovdqu YMMWORD[192+rdi],ymm9 + vmovdqu YMMWORD[224+rdi],ymm10 + + xor r15d,r15d + + lea r11,[16+r11] + mov rax,0xfffffffffffff + + mov ebx,30 + + vpxor ymm0,ymm0,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm4,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + vmovapd ymm8,ymm0 + vmovapd ymm9,ymm0 + vmovapd ymm10,ymm0 +ALIGN 32 +$L$loop40: + mov r13,QWORD[r11] + + vpbroadcastq ymm1,QWORD[r11] + mov rdx,QWORD[256+rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,QWORD[8+r8] + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[256+rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-264))+rsp] + +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 320(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 352(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 384(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 416(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 448(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 480(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 320(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 352(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 384(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 416(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 448(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 480(%rcx), %ymm2, %ymm10 + + + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + mov QWORD[256+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 320(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 352(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 384(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 416(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 448(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 480(%rsi), %ymm1, %ymm10 + +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 320(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 352(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 384(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 416(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 448(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 480(%rcx), %ymm2, %ymm10 + + lea rsp,[264+rsp] + lea r11,[8+r11] + dec ebx + jne NEAR $L$loop40 + + vmovq xmm0,r9 + vpbroadcastq ymm0,xmm0 + vpblendd ymm3,ymm3,ymm0,3 + + + + vpsrlq ymm0,ymm3,52 + vpsrlq ymm1,ymm4,52 + vpsrlq ymm2,ymm5,52 + vpsrlq ymm11,ymm6,52 + vpsrlq ymm12,ymm7,52 + vpsrlq ymm13,ymm8,52 + vpsrlq ymm14,ymm9,52 + vpsrlq ymm15,ymm10,52 + + lea rsp,[((-32))+rsp] + vmovupd YMMWORD[rsp],ymm3 + + + vpermq ymm15,ymm15,144 + vpermq ymm3,ymm14,3 + vblendpd ymm15,ymm15,ymm3,1 + + vpermq ymm14,ymm14,144 + vpermq ymm3,ymm13,3 + vblendpd ymm14,ymm14,ymm3,1 + + vpermq ymm13,ymm13,144 + vpermq ymm3,ymm12,3 + vblendpd ymm13,ymm13,ymm3,1 + + vpermq ymm12,ymm12,144 + vpermq ymm3,ymm11,3 + vblendpd ymm12,ymm12,ymm3,1 + + vpermq ymm11,ymm11,144 + vpermq ymm3,ymm2,3 + vblendpd ymm11,ymm11,ymm3,1 + + vpermq ymm2,ymm2,144 + vpermq ymm3,ymm1,3 + vblendpd ymm2,ymm2,ymm3,1 + + vpermq ymm1,ymm1,144 + vpermq ymm3,ymm0,3 + vblendpd ymm1,ymm1,ymm3,1 + + vpermq ymm0,ymm0,144 + vpand ymm0,ymm0,YMMWORD[$L$high64x3] + + vmovupd ymm3,YMMWORD[rsp] + lea rsp,[32+rsp] + + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + + + vpaddq ymm3,ymm3,ymm0 + vpaddq ymm4,ymm4,ymm1 + vpaddq ymm5,ymm5,ymm2 + vpaddq ymm6,ymm6,ymm11 + vpaddq ymm7,ymm7,ymm12 + vpaddq ymm8,ymm8,ymm13 + vpaddq ymm9,ymm9,ymm14 + vpaddq ymm10,ymm10,ymm15 + + + + vpcmpgtq ymm0,ymm3,YMMWORD[$L$mask52x4] + vpcmpgtq ymm1,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r14d,ymm0 + vmovmskpd r13d,ymm1 + shl r13b,4 + or r14b,r13b + + vpcmpgtq ymm2,ymm5,YMMWORD[$L$mask52x4] + vpcmpgtq ymm11,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd r13d,ymm2 + vmovmskpd r12d,ymm11 + shl r12b,4 + or r13b,r12b + + vpcmpgtq ymm12,ymm7,YMMWORD[$L$mask52x4] + vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd r12d,ymm12 + vmovmskpd r11d,ymm13 + shl r11b,4 + or r12b,r11b + + vpcmpgtq ymm14,ymm9,YMMWORD[$L$mask52x4] + vpcmpgtq ymm15,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd r11d,ymm14 + vmovmskpd r10d,ymm15 + shl r10b,4 + or r11b,r10b + + add r14b,r14b + adc r13b,r13b + adc r12b,r12b + adc r11b,r11b + + + vpcmpeqq ymm0,ymm3,YMMWORD[$L$mask52x4] + vpcmpeqq ymm1,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm0 + vmovmskpd r8d,ymm1 + shl r8b,4 + or r9b,r8b + + vpcmpeqq ymm2,ymm5,YMMWORD[$L$mask52x4] + vpcmpeqq ymm11,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd r8d,ymm2 + vmovmskpd edx,ymm11 + shl dl,4 + or r8b,dl + + vpcmpeqq ymm12,ymm7,YMMWORD[$L$mask52x4] + vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd edx,ymm12 + vmovmskpd ecx,ymm13 + shl cl,4 + or dl,cl + + vpcmpeqq ymm14,ymm9,YMMWORD[$L$mask52x4] + vpcmpeqq ymm15,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd ecx,ymm14 + vmovmskpd ebx,ymm15 + shl bl,4 + or cl,bl + + add r14b,r9b + adc r13b,r8b + adc r12b,dl + adc r11b,cl + + xor r14b,r9b + xor r13b,r8b + xor r12b,dl + xor r11b,cl + + lea rdx,[$L$kmasklut] + + mov r10b,r14b + and r14,0xf + vpsubq ymm0,ymm3,YMMWORD[$L$mask52x4] + shl r14,5 + vmovapd r14,(%rdx), %ymm2 + vblendvpd ymm3,ymm3,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm4,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm4,ymm4,ymm0,ymm2 + + mov r10b,r13b + and r13,0xf + vpsubq ymm0,ymm5,YMMWORD[$L$mask52x4] + shl r13,5 + vmovapd r13,(%rdx), %ymm2 + vblendvpd ymm5,ymm5,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm6,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm6,ymm6,ymm0,ymm2 + + mov r10b,r12b + and r12,0xf + vpsubq ymm0,ymm7,YMMWORD[$L$mask52x4] + shl r12,5 + vmovapd r12,(%rdx), %ymm2 + vblendvpd ymm7,ymm7,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm8,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm8,ymm8,ymm0,ymm2 + + mov r10b,r11b + and r11,0xf + vpsubq ymm0,ymm9,YMMWORD[$L$mask52x4] + shl r11,5 + vmovapd r11,(%rdx), %ymm2 + vblendvpd ymm9,ymm9,ymm0,ymm2 + + shr r10b,4 + and r10,0xf + vpsubq ymm0,ymm10,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%rdx), %ymm2 + vblendvpd ymm10,ymm10,ymm0,ymm2 + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + + vmovdqu YMMWORD[256+rdi],ymm3 + vmovdqu YMMWORD[288+rdi],ymm4 + vmovdqu YMMWORD[320+rdi],ymm5 + vmovdqu YMMWORD[352+rdi],ymm6 + vmovdqu YMMWORD[384+rdi],ymm7 + vmovdqu YMMWORD[416+rdi],ymm8 + vmovdqu YMMWORD[448+rdi],ymm9 + vmovdqu YMMWORD[480+rdi],ymm10 + + vzeroupper + lea rax,[rsp] + + vmovapd xmm6,XMMWORD[rax] + vmovapd xmm7,XMMWORD[16+rax] + vmovapd xmm8,XMMWORD[32+rax] + vmovapd xmm9,XMMWORD[48+rax] + vmovapd xmm10,XMMWORD[64+rax] + vmovapd xmm11,XMMWORD[80+rax] + vmovapd xmm12,XMMWORD[96+rax] + vmovapd xmm13,XMMWORD[112+rax] + vmovapd xmm14,XMMWORD[128+rax] + vmovapd xmm15,XMMWORD[144+rax] + lea rax,[168+rsp] + mov r15,QWORD[rax] + + mov r14,QWORD[8+rax] + + mov r13,QWORD[16+rax] + + mov r12,QWORD[24+rax] + + mov rbp,QWORD[32+rax] + + mov rbx,QWORD[40+rax] + + lea rsp,[48+rax] + +$L$ossl_rsaz_amm52x30_x2_avxifma256_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_ossl_rsaz_amm52x30_x2_avxifma256: +section .text code align=64 + + +ALIGN 32 +global ossl_extract_multiplier_2x30_win5_avx + +ossl_extract_multiplier_2x30_win5_avx: + +DB 243,15,30,250 + vmovapd ymm12,YMMWORD[$L$ones] + vmovq xmm8,r8 + vpbroadcastq ymm10,xmm8 + vmovq xmm8,r9 + vpbroadcastq ymm11,xmm8 + lea rax,[16384+rdx] + + + vpxor xmm0,xmm0,xmm0 + vmovapd ymm9,ymm0 + vmovapd ymm1,ymm0 + vmovapd ymm2,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm4,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + +ALIGN 32 +$L$loop: + vpcmpeqq ymm13,ymm10,ymm9 + vmovdqu ymm8,YMMWORD[rdx] + + vblendvpd ymm0,ymm0,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[32+rdx] + + vblendvpd ymm1,ymm1,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[64+rdx] + + vblendvpd ymm2,ymm2,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[96+rdx] + + vblendvpd ymm3,ymm3,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[128+rdx] + + vblendvpd ymm4,ymm4,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[160+rdx] + + vblendvpd ymm5,ymm5,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[192+rdx] + + vblendvpd ymm6,ymm6,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[224+rdx] + + vblendvpd ymm7,ymm7,ymm8,ymm13 + vpaddq ymm9,ymm9,ymm12 + add rdx,512 + cmp rax,rdx + jne NEAR $L$loop + vmovdqu YMMWORD[rcx],ymm0 + vmovdqu YMMWORD[32+rcx],ymm1 + vmovdqu YMMWORD[64+rcx],ymm2 + vmovdqu YMMWORD[96+rcx],ymm3 + vmovdqu YMMWORD[128+rcx],ymm4 + vmovdqu YMMWORD[160+rcx],ymm5 + vmovdqu YMMWORD[192+rcx],ymm6 + vmovdqu YMMWORD[224+rcx],ymm7 + lea rdx,[((-16384))+rax] + + + vpxor xmm0,xmm0,xmm0 + vmovapd ymm9,ymm0 + vmovapd ymm0,ymm0 + vmovapd ymm1,ymm0 + vmovapd ymm2,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm4,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + +ALIGN 32 +$L$loop_8_15: + vpcmpeqq ymm13,ymm11,ymm9 + vmovdqu ymm8,YMMWORD[256+rdx] + + vblendvpd ymm0,ymm0,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[288+rdx] + + vblendvpd ymm1,ymm1,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[320+rdx] + + vblendvpd ymm2,ymm2,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[352+rdx] + + vblendvpd ymm3,ymm3,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[384+rdx] + + vblendvpd ymm4,ymm4,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[416+rdx] + + vblendvpd ymm5,ymm5,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[448+rdx] + + vblendvpd ymm6,ymm6,ymm8,ymm13 + vmovdqu ymm8,YMMWORD[480+rdx] + + vblendvpd ymm7,ymm7,ymm8,ymm13 + vpaddq ymm9,ymm9,ymm12 + add rdx,512 + cmp rax,rdx + jne NEAR $L$loop_8_15 + vmovdqu YMMWORD[256+rcx],ymm0 + vmovdqu YMMWORD[288+rcx],ymm1 + vmovdqu YMMWORD[320+rcx],ymm2 + vmovdqu YMMWORD[352+rcx],ymm3 + vmovdqu YMMWORD[384+rcx],ymm4 + vmovdqu YMMWORD[416+rcx],ymm5 + vmovdqu YMMWORD[448+rcx],ymm6 + vmovdqu YMMWORD[480+rcx],ymm7 + + DB 0F3h,0C3h ;repret + + +section .rdata rdata align=32 +ALIGN 32 +$L$ones: + DQ 1,1,1,1 +$L$zeros: + DQ 0,0,0,0 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +rsaz_avx_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + lea rsi,[rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + + lea rax,[216+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_ossl_rsaz_amm52x30_x1_avxifma256 wrt ..imagebase + DD $L$SEH_end_ossl_rsaz_amm52x30_x1_avxifma256 wrt ..imagebase + DD $L$SEH_info_ossl_rsaz_amm52x30_x1_avxifma256 wrt ..imagebase + + DD $L$SEH_begin_ossl_rsaz_amm52x30_x2_avxifma256 wrt ..imagebase + DD $L$SEH_end_ossl_rsaz_amm52x30_x2_avxifma256 wrt ..imagebase + DD $L$SEH_info_ossl_rsaz_amm52x30_x2_avxifma256 wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_ossl_rsaz_amm52x30_x1_avxifma256: +DB 9,0,0,0 + DD rsaz_avx_handler wrt ..imagebase + DD $L$ossl_rsaz_amm52x30_x1_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x30_x1_avxifma256_epilogue wrt ..imagebase +$L$SEH_info_ossl_rsaz_amm52x30_x2_avxifma256: +DB 9,0,0,0 + DD rsaz_avx_handler wrt ..imagebase + DD $L$ossl_rsaz_amm52x30_x2_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x30_x2_avxifma256_epilogue wrt ..imagebase diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm new file mode 100644 index 0000000000..ec91662d3b --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-4k-avxifma.nasm @@ -0,0 +1,2081 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +global ossl_rsaz_amm52x40_x1_avxifma256 + +ALIGN 32 +ossl_rsaz_amm52x40_x1_avxifma256: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ossl_rsaz_amm52x40_x1_avxifma256: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +DB 243,15,30,250 + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,[((-168))+rsp] + vmovapd XMMWORD[rsp],xmm6 + vmovapd XMMWORD[16+rsp],xmm7 + vmovapd XMMWORD[32+rsp],xmm8 + vmovapd XMMWORD[48+rsp],xmm9 + vmovapd XMMWORD[64+rsp],xmm10 + vmovapd XMMWORD[80+rsp],xmm11 + vmovapd XMMWORD[96+rsp],xmm12 + vmovapd XMMWORD[112+rsp],xmm13 + vmovapd XMMWORD[128+rsp],xmm14 + vmovapd XMMWORD[144+rsp],xmm15 +$L$ossl_rsaz_amm52x40_x1_avxifma256_body: + + vpxor ymm0,ymm0,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm4,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + vmovapd ymm8,ymm0 + vmovapd ymm9,ymm0 + vmovapd ymm10,ymm0 + vmovapd ymm11,ymm0 + vmovapd ymm12,ymm0 + + xor r9d,r9d + + mov r11,rdx + mov rax,0xfffffffffffff + + + mov ebx,10 + +ALIGN 32 +$L$loop10: + mov r13,QWORD[r11] + + vpbroadcastq ymm1,QWORD[r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-328))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + vmovdqu YMMWORD[256+rsp],ymm11 + vmovdqu YMMWORD[288+rsp],ymm12 + mov QWORD[320+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + vmovdqu ymm11,YMMWORD[264+rsp] + vmovdqu ymm12,YMMWORD[296+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + lea rsp,[328+rsp] + mov r13,QWORD[8+r11] + + vpbroadcastq ymm1,QWORD[8+r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-328))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + vmovdqu YMMWORD[256+rsp],ymm11 + vmovdqu YMMWORD[288+rsp],ymm12 + mov QWORD[320+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + vmovdqu ymm11,YMMWORD[264+rsp] + vmovdqu ymm12,YMMWORD[296+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + lea rsp,[328+rsp] + mov r13,QWORD[16+r11] + + vpbroadcastq ymm1,QWORD[16+r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-328))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + vmovdqu YMMWORD[256+rsp],ymm11 + vmovdqu YMMWORD[288+rsp],ymm12 + mov QWORD[320+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + vmovdqu ymm11,YMMWORD[264+rsp] + vmovdqu ymm12,YMMWORD[296+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + lea rsp,[328+rsp] + mov r13,QWORD[24+r11] + + vpbroadcastq ymm1,QWORD[24+r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,r8 + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-328))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + vmovdqu YMMWORD[256+rsp],ymm11 + vmovdqu YMMWORD[288+rsp],ymm12 + mov QWORD[320+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + vmovdqu ymm11,YMMWORD[264+rsp] + vmovdqu ymm12,YMMWORD[296+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + lea rsp,[328+rsp] + lea r11,[32+r11] + dec ebx + jne NEAR $L$loop10 + + vmovq xmm0,r9 + vpbroadcastq ymm0,xmm0 + vpblendd ymm3,ymm3,ymm0,3 + + lea rsp,[((-640))+rsp] + vmovupd YMMWORD[rsp],ymm3 + vmovupd YMMWORD[32+rsp],ymm4 + vmovupd YMMWORD[64+rsp],ymm5 + vmovupd YMMWORD[96+rsp],ymm6 + vmovupd YMMWORD[128+rsp],ymm7 + vmovupd YMMWORD[160+rsp],ymm8 + vmovupd YMMWORD[192+rsp],ymm9 + vmovupd YMMWORD[224+rsp],ymm10 + vmovupd YMMWORD[256+rsp],ymm11 + vmovupd YMMWORD[288+rsp],ymm12 + + + + vpsrlq ymm3,ymm3,52 + vpsrlq ymm4,ymm4,52 + vpsrlq ymm5,ymm5,52 + vpsrlq ymm6,ymm6,52 + vpsrlq ymm7,ymm7,52 + vpsrlq ymm8,ymm8,52 + vpsrlq ymm9,ymm9,52 + vpsrlq ymm10,ymm10,52 + vpsrlq ymm11,ymm11,52 + vpsrlq ymm12,ymm12,52 + + + vpermq ymm12,ymm12,144 + vpermq ymm13,ymm11,3 + vblendpd ymm12,ymm12,ymm13,1 + + vpermq ymm11,ymm11,144 + vpermq ymm13,ymm10,3 + vblendpd ymm11,ymm11,ymm13,1 + + vpermq ymm10,ymm10,144 + vpermq ymm13,ymm9,3 + vblendpd ymm10,ymm10,ymm13,1 + + vpermq ymm9,ymm9,144 + vpermq ymm13,ymm8,3 + vblendpd ymm9,ymm9,ymm13,1 + + vpermq ymm8,ymm8,144 + vpermq ymm13,ymm7,3 + vblendpd ymm8,ymm8,ymm13,1 + + vpermq ymm7,ymm7,144 + vpermq ymm13,ymm6,3 + vblendpd ymm7,ymm7,ymm13,1 + + vpermq ymm6,ymm6,144 + vpermq ymm13,ymm5,3 + vblendpd ymm6,ymm6,ymm13,1 + + vpermq ymm5,ymm5,144 + vpermq ymm13,ymm4,3 + vblendpd ymm5,ymm5,ymm13,1 + + vpermq ymm4,ymm4,144 + vpermq ymm13,ymm3,3 + vblendpd ymm4,ymm4,ymm13,1 + + vpermq ymm3,ymm3,144 + vpand ymm3,ymm3,YMMWORD[$L$high64x3] + + vmovupd YMMWORD[320+rsp],ymm3 + vmovupd YMMWORD[352+rsp],ymm4 + vmovupd YMMWORD[384+rsp],ymm5 + vmovupd YMMWORD[416+rsp],ymm6 + vmovupd YMMWORD[448+rsp],ymm7 + vmovupd YMMWORD[480+rsp],ymm8 + vmovupd YMMWORD[512+rsp],ymm9 + vmovupd YMMWORD[544+rsp],ymm10 + vmovupd YMMWORD[576+rsp],ymm11 + vmovupd YMMWORD[608+rsp],ymm12 + + vmovupd ymm3,YMMWORD[rsp] + vmovupd ymm4,YMMWORD[32+rsp] + vmovupd ymm5,YMMWORD[64+rsp] + vmovupd ymm6,YMMWORD[96+rsp] + vmovupd ymm7,YMMWORD[128+rsp] + vmovupd ymm8,YMMWORD[160+rsp] + vmovupd ymm9,YMMWORD[192+rsp] + vmovupd ymm10,YMMWORD[224+rsp] + vmovupd ymm11,YMMWORD[256+rsp] + vmovupd ymm12,YMMWORD[288+rsp] + + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + vpand ymm11,ymm11,YMMWORD[$L$mask52x4] + vpand ymm12,ymm12,YMMWORD[$L$mask52x4] + + + vpaddq ymm3,ymm3,YMMWORD[320+rsp] + vpaddq ymm4,ymm4,YMMWORD[352+rsp] + vpaddq ymm5,ymm5,YMMWORD[384+rsp] + vpaddq ymm6,ymm6,YMMWORD[416+rsp] + vpaddq ymm7,ymm7,YMMWORD[448+rsp] + vpaddq ymm8,ymm8,YMMWORD[480+rsp] + vpaddq ymm9,ymm9,YMMWORD[512+rsp] + vpaddq ymm10,ymm10,YMMWORD[544+rsp] + vpaddq ymm11,ymm11,YMMWORD[576+rsp] + vpaddq ymm12,ymm12,YMMWORD[608+rsp] + + lea rsp,[640+rsp] + + + + vpcmpgtq ymm13,ymm3,YMMWORD[$L$mask52x4] + vmovmskpd r14d,ymm13 + vpcmpgtq ymm13,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r13d,ymm13 + shl r13b,4 + or r14b,r13b + + vpcmpgtq ymm13,ymm5,YMMWORD[$L$mask52x4] + vmovmskpd r13d,ymm13 + vpcmpgtq ymm13,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd r12d,ymm13 + shl r12b,4 + or r13b,r12b + + vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4] + vmovmskpd r12d,ymm13 + vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd r11d,ymm13 + shl r11b,4 + or r12b,r11b + + vpcmpgtq ymm13,ymm9,YMMWORD[$L$mask52x4] + vmovmskpd r11d,ymm13 + vpcmpgtq ymm13,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd r10d,ymm13 + shl r10b,4 + or r11b,r10b + + vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4] + vmovmskpd r10d,ymm13 + vpcmpgtq ymm13,ymm12,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm13 + shl r9b,4 + or r10b,r9b + + add r14b,r14b + adc r13b,r13b + adc r12b,r12b + adc r11b,r11b + adc r10b,r10b + + + vpcmpeqq ymm13,ymm3,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm13 + vpcmpeqq ymm13,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r8d,ymm13 + shl r8b,4 + or r9b,r8b + + vpcmpeqq ymm13,ymm5,YMMWORD[$L$mask52x4] + vmovmskpd r8d,ymm13 + vpcmpeqq ymm13,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd edx,ymm13 + shl dl,4 + or r8b,dl + + vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4] + vmovmskpd edx,ymm13 + vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd ecx,ymm13 + shl cl,4 + or dl,cl + + vpcmpeqq ymm13,ymm9,YMMWORD[$L$mask52x4] + vmovmskpd ecx,ymm13 + vpcmpeqq ymm13,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd ebx,ymm13 + shl bl,4 + or cl,bl + + vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4] + vmovmskpd ebx,ymm13 + vpcmpeqq ymm13,ymm12,YMMWORD[$L$mask52x4] + vmovmskpd eax,ymm13 + shl al,4 + or bl,al + + add r14b,r9b + adc r13b,r8b + adc r12b,dl + adc r11b,cl + adc r10b,bl + + xor r14b,r9b + xor r13b,r8b + xor r12b,dl + xor r11b,cl + xor r10b,bl + + push r9 + push r8 + + lea r8,[$L$kmasklut] + + mov r9b,r14b + and r14,0xf + vpsubq ymm13,ymm3,YMMWORD[$L$mask52x4] + shl r14,5 + vmovapd r14,(%r8), %ymm14 + vblendvpd ymm3,ymm3,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm4,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm4,ymm4,ymm13,ymm14 + + mov r9b,r13b + and r13,0xf + vpsubq ymm13,ymm5,YMMWORD[$L$mask52x4] + shl r13,5 + vmovapd r13,(%r8), %ymm14 + vblendvpd ymm5,ymm5,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm6,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm6,ymm6,ymm13,ymm14 + + mov r9b,r12b + and r12,0xf + vpsubq ymm13,ymm7,YMMWORD[$L$mask52x4] + shl r12,5 + vmovapd r12,(%r8), %ymm14 + vblendvpd ymm7,ymm7,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm8,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm8,ymm8,ymm13,ymm14 + + mov r9b,r11b + and r11,0xf + vpsubq ymm13,ymm9,YMMWORD[$L$mask52x4] + shl r11,5 + vmovapd r11,(%r8), %ymm14 + vblendvpd ymm9,ymm9,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm10,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm10,ymm10,ymm13,ymm14 + + mov r9b,r10b + and r10,0xf + vpsubq ymm13,ymm11,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%r8), %ymm14 + vblendvpd ymm11,ymm11,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm12,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm12,ymm12,ymm13,ymm14 + + pop r8 + pop r9 + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + vpand ymm11,ymm11,YMMWORD[$L$mask52x4] + vpand ymm12,ymm12,YMMWORD[$L$mask52x4] + + vmovdqu YMMWORD[rdi],ymm3 + vmovdqu YMMWORD[32+rdi],ymm4 + vmovdqu YMMWORD[64+rdi],ymm5 + vmovdqu YMMWORD[96+rdi],ymm6 + vmovdqu YMMWORD[128+rdi],ymm7 + vmovdqu YMMWORD[160+rdi],ymm8 + vmovdqu YMMWORD[192+rdi],ymm9 + vmovdqu YMMWORD[224+rdi],ymm10 + vmovdqu YMMWORD[256+rdi],ymm11 + vmovdqu YMMWORD[288+rdi],ymm12 + + vzeroupper + lea rax,[rsp] + + vmovapd xmm6,XMMWORD[rax] + vmovapd xmm7,XMMWORD[16+rax] + vmovapd xmm8,XMMWORD[32+rax] + vmovapd xmm9,XMMWORD[48+rax] + vmovapd xmm10,XMMWORD[64+rax] + vmovapd xmm11,XMMWORD[80+rax] + vmovapd xmm12,XMMWORD[96+rax] + vmovapd xmm13,XMMWORD[112+rax] + vmovapd xmm14,XMMWORD[128+rax] + vmovapd xmm15,XMMWORD[144+rax] + lea rax,[168+rsp] + mov r15,QWORD[rax] + + mov r14,QWORD[8+rax] + + mov r13,QWORD[16+rax] + + mov r12,QWORD[24+rax] + + mov rbp,QWORD[32+rax] + + mov rbx,QWORD[40+rax] + + lea rsp,[48+rax] + +$L$ossl_rsaz_amm52x40_x1_avxifma256_epilogue: + + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_ossl_rsaz_amm52x40_x1_avxifma256: +section .rdata rdata align=32 +ALIGN 32 +$L$mask52x4: + DQ 0xfffffffffffff + DQ 0xfffffffffffff + DQ 0xfffffffffffff + DQ 0xfffffffffffff +$L$high64x3: + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff +$L$kmasklut: + + DQ 0x0 + DQ 0x0 + DQ 0x0 + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + DQ 0x0 + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + + DQ 0x0 + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + + DQ 0x0 + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + + DQ 0x0 + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff + DQ 0xffffffffffffffff +section .text code align=64 + + +global ossl_rsaz_amm52x40_x2_avxifma256 + +ALIGN 32 +ossl_rsaz_amm52x40_x2_avxifma256: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ossl_rsaz_amm52x40_x2_avxifma256: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + +DB 243,15,30,250 + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,[((-168))+rsp] + vmovapd XMMWORD[rsp],xmm6 + vmovapd XMMWORD[16+rsp],xmm7 + vmovapd XMMWORD[32+rsp],xmm8 + vmovapd XMMWORD[48+rsp],xmm9 + vmovapd XMMWORD[64+rsp],xmm10 + vmovapd XMMWORD[80+rsp],xmm11 + vmovapd XMMWORD[96+rsp],xmm12 + vmovapd XMMWORD[112+rsp],xmm13 + vmovapd XMMWORD[128+rsp],xmm14 + vmovapd XMMWORD[144+rsp],xmm15 +$L$ossl_rsaz_amm52x40_x2_avxifma256_body: + + vpxor ymm0,ymm0,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm4,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + vmovapd ymm8,ymm0 + vmovapd ymm9,ymm0 + vmovapd ymm10,ymm0 + vmovapd ymm11,ymm0 + vmovapd ymm12,ymm0 + + xor r9d,r9d + + mov r11,rdx + mov rax,0xfffffffffffff + + mov ebx,40 + +ALIGN 32 +$L$loop40: + mov r13,QWORD[r11] + + vpbroadcastq ymm1,QWORD[r11] + mov rdx,QWORD[rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,QWORD[r8] + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-328))+rsp] + +{vex} vpmadd52luq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 288(%rcx), %ymm2, %ymm12 + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + vmovdqu YMMWORD[256+rsp],ymm11 + vmovdqu YMMWORD[288+rsp],ymm12 + mov QWORD[320+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + vmovdqu ymm11,YMMWORD[264+rsp] + vmovdqu ymm12,YMMWORD[296+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 0(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 32(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 64(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 96(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 128(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 160(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 192(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 224(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 256(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 288(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 0(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 32(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 64(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 96(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 128(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 160(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 192(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 224(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 256(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 288(%rcx), %ymm2, %ymm12 + lea rsp,[328+rsp] + lea r11,[8+r11] + dec ebx + jne NEAR $L$loop40 + + push r11 + push rsi + push rcx + push r8 + + vmovq xmm0,r9 + vpbroadcastq ymm0,xmm0 + vpblendd ymm3,ymm3,ymm0,3 + + lea rsp,[((-640))+rsp] + vmovupd YMMWORD[rsp],ymm3 + vmovupd YMMWORD[32+rsp],ymm4 + vmovupd YMMWORD[64+rsp],ymm5 + vmovupd YMMWORD[96+rsp],ymm6 + vmovupd YMMWORD[128+rsp],ymm7 + vmovupd YMMWORD[160+rsp],ymm8 + vmovupd YMMWORD[192+rsp],ymm9 + vmovupd YMMWORD[224+rsp],ymm10 + vmovupd YMMWORD[256+rsp],ymm11 + vmovupd YMMWORD[288+rsp],ymm12 + + + + vpsrlq ymm3,ymm3,52 + vpsrlq ymm4,ymm4,52 + vpsrlq ymm5,ymm5,52 + vpsrlq ymm6,ymm6,52 + vpsrlq ymm7,ymm7,52 + vpsrlq ymm8,ymm8,52 + vpsrlq ymm9,ymm9,52 + vpsrlq ymm10,ymm10,52 + vpsrlq ymm11,ymm11,52 + vpsrlq ymm12,ymm12,52 + + + vpermq ymm12,ymm12,144 + vpermq ymm13,ymm11,3 + vblendpd ymm12,ymm12,ymm13,1 + + vpermq ymm11,ymm11,144 + vpermq ymm13,ymm10,3 + vblendpd ymm11,ymm11,ymm13,1 + + vpermq ymm10,ymm10,144 + vpermq ymm13,ymm9,3 + vblendpd ymm10,ymm10,ymm13,1 + + vpermq ymm9,ymm9,144 + vpermq ymm13,ymm8,3 + vblendpd ymm9,ymm9,ymm13,1 + + vpermq ymm8,ymm8,144 + vpermq ymm13,ymm7,3 + vblendpd ymm8,ymm8,ymm13,1 + + vpermq ymm7,ymm7,144 + vpermq ymm13,ymm6,3 + vblendpd ymm7,ymm7,ymm13,1 + + vpermq ymm6,ymm6,144 + vpermq ymm13,ymm5,3 + vblendpd ymm6,ymm6,ymm13,1 + + vpermq ymm5,ymm5,144 + vpermq ymm13,ymm4,3 + vblendpd ymm5,ymm5,ymm13,1 + + vpermq ymm4,ymm4,144 + vpermq ymm13,ymm3,3 + vblendpd ymm4,ymm4,ymm13,1 + + vpermq ymm3,ymm3,144 + vpand ymm3,ymm3,YMMWORD[$L$high64x3] + + vmovupd YMMWORD[320+rsp],ymm3 + vmovupd YMMWORD[352+rsp],ymm4 + vmovupd YMMWORD[384+rsp],ymm5 + vmovupd YMMWORD[416+rsp],ymm6 + vmovupd YMMWORD[448+rsp],ymm7 + vmovupd YMMWORD[480+rsp],ymm8 + vmovupd YMMWORD[512+rsp],ymm9 + vmovupd YMMWORD[544+rsp],ymm10 + vmovupd YMMWORD[576+rsp],ymm11 + vmovupd YMMWORD[608+rsp],ymm12 + + vmovupd ymm3,YMMWORD[rsp] + vmovupd ymm4,YMMWORD[32+rsp] + vmovupd ymm5,YMMWORD[64+rsp] + vmovupd ymm6,YMMWORD[96+rsp] + vmovupd ymm7,YMMWORD[128+rsp] + vmovupd ymm8,YMMWORD[160+rsp] + vmovupd ymm9,YMMWORD[192+rsp] + vmovupd ymm10,YMMWORD[224+rsp] + vmovupd ymm11,YMMWORD[256+rsp] + vmovupd ymm12,YMMWORD[288+rsp] + + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + vpand ymm11,ymm11,YMMWORD[$L$mask52x4] + vpand ymm12,ymm12,YMMWORD[$L$mask52x4] + + + vpaddq ymm3,ymm3,YMMWORD[320+rsp] + vpaddq ymm4,ymm4,YMMWORD[352+rsp] + vpaddq ymm5,ymm5,YMMWORD[384+rsp] + vpaddq ymm6,ymm6,YMMWORD[416+rsp] + vpaddq ymm7,ymm7,YMMWORD[448+rsp] + vpaddq ymm8,ymm8,YMMWORD[480+rsp] + vpaddq ymm9,ymm9,YMMWORD[512+rsp] + vpaddq ymm10,ymm10,YMMWORD[544+rsp] + vpaddq ymm11,ymm11,YMMWORD[576+rsp] + vpaddq ymm12,ymm12,YMMWORD[608+rsp] + + lea rsp,[640+rsp] + + + + vpcmpgtq ymm13,ymm3,YMMWORD[$L$mask52x4] + vmovmskpd r14d,ymm13 + vpcmpgtq ymm13,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r13d,ymm13 + shl r13b,4 + or r14b,r13b + + vpcmpgtq ymm13,ymm5,YMMWORD[$L$mask52x4] + vmovmskpd r13d,ymm13 + vpcmpgtq ymm13,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd r12d,ymm13 + shl r12b,4 + or r13b,r12b + + vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4] + vmovmskpd r12d,ymm13 + vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd r11d,ymm13 + shl r11b,4 + or r12b,r11b + + vpcmpgtq ymm13,ymm9,YMMWORD[$L$mask52x4] + vmovmskpd r11d,ymm13 + vpcmpgtq ymm13,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd r10d,ymm13 + shl r10b,4 + or r11b,r10b + + vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4] + vmovmskpd r10d,ymm13 + vpcmpgtq ymm13,ymm12,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm13 + shl r9b,4 + or r10b,r9b + + add r14b,r14b + adc r13b,r13b + adc r12b,r12b + adc r11b,r11b + adc r10b,r10b + + + vpcmpeqq ymm13,ymm3,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm13 + vpcmpeqq ymm13,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r8d,ymm13 + shl r8b,4 + or r9b,r8b + + vpcmpeqq ymm13,ymm5,YMMWORD[$L$mask52x4] + vmovmskpd r8d,ymm13 + vpcmpeqq ymm13,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd edx,ymm13 + shl dl,4 + or r8b,dl + + vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4] + vmovmskpd edx,ymm13 + vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd ecx,ymm13 + shl cl,4 + or dl,cl + + vpcmpeqq ymm13,ymm9,YMMWORD[$L$mask52x4] + vmovmskpd ecx,ymm13 + vpcmpeqq ymm13,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd ebx,ymm13 + shl bl,4 + or cl,bl + + vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4] + vmovmskpd ebx,ymm13 + vpcmpeqq ymm13,ymm12,YMMWORD[$L$mask52x4] + vmovmskpd eax,ymm13 + shl al,4 + or bl,al + + add r14b,r9b + adc r13b,r8b + adc r12b,dl + adc r11b,cl + adc r10b,bl + + xor r14b,r9b + xor r13b,r8b + xor r12b,dl + xor r11b,cl + xor r10b,bl + + push r9 + push r8 + + lea r8,[$L$kmasklut] + + mov r9b,r14b + and r14,0xf + vpsubq ymm13,ymm3,YMMWORD[$L$mask52x4] + shl r14,5 + vmovapd r14,(%r8), %ymm14 + vblendvpd ymm3,ymm3,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm4,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm4,ymm4,ymm13,ymm14 + + mov r9b,r13b + and r13,0xf + vpsubq ymm13,ymm5,YMMWORD[$L$mask52x4] + shl r13,5 + vmovapd r13,(%r8), %ymm14 + vblendvpd ymm5,ymm5,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm6,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm6,ymm6,ymm13,ymm14 + + mov r9b,r12b + and r12,0xf + vpsubq ymm13,ymm7,YMMWORD[$L$mask52x4] + shl r12,5 + vmovapd r12,(%r8), %ymm14 + vblendvpd ymm7,ymm7,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm8,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm8,ymm8,ymm13,ymm14 + + mov r9b,r11b + and r11,0xf + vpsubq ymm13,ymm9,YMMWORD[$L$mask52x4] + shl r11,5 + vmovapd r11,(%r8), %ymm14 + vblendvpd ymm9,ymm9,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm10,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm10,ymm10,ymm13,ymm14 + + mov r9b,r10b + and r10,0xf + vpsubq ymm13,ymm11,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%r8), %ymm14 + vblendvpd ymm11,ymm11,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm12,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm12,ymm12,ymm13,ymm14 + + pop r8 + pop r9 + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + vpand ymm11,ymm11,YMMWORD[$L$mask52x4] + vpand ymm12,ymm12,YMMWORD[$L$mask52x4] + + pop r8 + pop rcx + pop rsi + pop r11 + + vmovdqu YMMWORD[rdi],ymm3 + vmovdqu YMMWORD[32+rdi],ymm4 + vmovdqu YMMWORD[64+rdi],ymm5 + vmovdqu YMMWORD[96+rdi],ymm6 + vmovdqu YMMWORD[128+rdi],ymm7 + vmovdqu YMMWORD[160+rdi],ymm8 + vmovdqu YMMWORD[192+rdi],ymm9 + vmovdqu YMMWORD[224+rdi],ymm10 + vmovdqu YMMWORD[256+rdi],ymm11 + vmovdqu YMMWORD[288+rdi],ymm12 + + xor r15d,r15d + + mov rax,0xfffffffffffff + + mov ebx,40 + + vpxor ymm0,ymm0,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm4,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + vmovapd ymm8,ymm0 + vmovapd ymm9,ymm0 + vmovapd ymm10,ymm0 + vmovapd ymm11,ymm0 + vmovapd ymm12,ymm0 +ALIGN 32 +$L$loop40_1: + mov r13,QWORD[r11] + + vpbroadcastq ymm1,QWORD[r11] + mov rdx,QWORD[320+rsi] + mulx r12,r13,r13 + add r9,r13 + mov r10,r12 + adc r10,0 + + mov r13,QWORD[8+r8] + imul r13,r9 + and r13,rax + + vmovq xmm2,r13 + vpbroadcastq ymm2,xmm2 + mov rdx,QWORD[320+rcx] + mulx r12,r13,r13 + add r9,r13 + adc r10,r12 + + shr r9,52 + sal r10,12 + or r9,r10 + + lea rsp,[((-328))+rsp] + +{vex} vpmadd52luq 320(%rsi), %ymm1, %ymm3 +{vex} vpmadd52luq 352(%rsi), %ymm1, %ymm4 +{vex} vpmadd52luq 384(%rsi), %ymm1, %ymm5 +{vex} vpmadd52luq 416(%rsi), %ymm1, %ymm6 +{vex} vpmadd52luq 448(%rsi), %ymm1, %ymm7 +{vex} vpmadd52luq 480(%rsi), %ymm1, %ymm8 +{vex} vpmadd52luq 512(%rsi), %ymm1, %ymm9 +{vex} vpmadd52luq 544(%rsi), %ymm1, %ymm10 +{vex} vpmadd52luq 576(%rsi), %ymm1, %ymm11 +{vex} vpmadd52luq 608(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52luq 320(%rcx), %ymm2, %ymm3 +{vex} vpmadd52luq 352(%rcx), %ymm2, %ymm4 +{vex} vpmadd52luq 384(%rcx), %ymm2, %ymm5 +{vex} vpmadd52luq 416(%rcx), %ymm2, %ymm6 +{vex} vpmadd52luq 448(%rcx), %ymm2, %ymm7 +{vex} vpmadd52luq 480(%rcx), %ymm2, %ymm8 +{vex} vpmadd52luq 512(%rcx), %ymm2, %ymm9 +{vex} vpmadd52luq 544(%rcx), %ymm2, %ymm10 +{vex} vpmadd52luq 576(%rcx), %ymm2, %ymm11 +{vex} vpmadd52luq 608(%rcx), %ymm2, %ymm12 + vmovdqu YMMWORD[rsp],ymm3 + vmovdqu YMMWORD[32+rsp],ymm4 + vmovdqu YMMWORD[64+rsp],ymm5 + vmovdqu YMMWORD[96+rsp],ymm6 + vmovdqu YMMWORD[128+rsp],ymm7 + vmovdqu YMMWORD[160+rsp],ymm8 + vmovdqu YMMWORD[192+rsp],ymm9 + vmovdqu YMMWORD[224+rsp],ymm10 + vmovdqu YMMWORD[256+rsp],ymm11 + vmovdqu YMMWORD[288+rsp],ymm12 + mov QWORD[320+rsp],0 + + vmovdqu ymm3,YMMWORD[8+rsp] + vmovdqu ymm4,YMMWORD[40+rsp] + vmovdqu ymm5,YMMWORD[72+rsp] + vmovdqu ymm6,YMMWORD[104+rsp] + vmovdqu ymm7,YMMWORD[136+rsp] + vmovdqu ymm8,YMMWORD[168+rsp] + vmovdqu ymm9,YMMWORD[200+rsp] + vmovdqu ymm10,YMMWORD[232+rsp] + vmovdqu ymm11,YMMWORD[264+rsp] + vmovdqu ymm12,YMMWORD[296+rsp] + + add r9,QWORD[8+rsp] + +{vex} vpmadd52huq 320(%rsi), %ymm1, %ymm3 +{vex} vpmadd52huq 352(%rsi), %ymm1, %ymm4 +{vex} vpmadd52huq 384(%rsi), %ymm1, %ymm5 +{vex} vpmadd52huq 416(%rsi), %ymm1, %ymm6 +{vex} vpmadd52huq 448(%rsi), %ymm1, %ymm7 +{vex} vpmadd52huq 480(%rsi), %ymm1, %ymm8 +{vex} vpmadd52huq 512(%rsi), %ymm1, %ymm9 +{vex} vpmadd52huq 544(%rsi), %ymm1, %ymm10 +{vex} vpmadd52huq 576(%rsi), %ymm1, %ymm11 +{vex} vpmadd52huq 608(%rsi), %ymm1, %ymm12 + +{vex} vpmadd52huq 320(%rcx), %ymm2, %ymm3 +{vex} vpmadd52huq 352(%rcx), %ymm2, %ymm4 +{vex} vpmadd52huq 384(%rcx), %ymm2, %ymm5 +{vex} vpmadd52huq 416(%rcx), %ymm2, %ymm6 +{vex} vpmadd52huq 448(%rcx), %ymm2, %ymm7 +{vex} vpmadd52huq 480(%rcx), %ymm2, %ymm8 +{vex} vpmadd52huq 512(%rcx), %ymm2, %ymm9 +{vex} vpmadd52huq 544(%rcx), %ymm2, %ymm10 +{vex} vpmadd52huq 576(%rcx), %ymm2, %ymm11 +{vex} vpmadd52huq 608(%rcx), %ymm2, %ymm12 + lea rsp,[328+rsp] + lea r11,[8+r11] + dec ebx + jne NEAR $L$loop40_1 + + vmovq xmm0,r9 + vpbroadcastq ymm0,xmm0 + vpblendd ymm3,ymm3,ymm0,3 + + lea rsp,[((-640))+rsp] + vmovupd YMMWORD[rsp],ymm3 + vmovupd YMMWORD[32+rsp],ymm4 + vmovupd YMMWORD[64+rsp],ymm5 + vmovupd YMMWORD[96+rsp],ymm6 + vmovupd YMMWORD[128+rsp],ymm7 + vmovupd YMMWORD[160+rsp],ymm8 + vmovupd YMMWORD[192+rsp],ymm9 + vmovupd YMMWORD[224+rsp],ymm10 + vmovupd YMMWORD[256+rsp],ymm11 + vmovupd YMMWORD[288+rsp],ymm12 + + + + vpsrlq ymm3,ymm3,52 + vpsrlq ymm4,ymm4,52 + vpsrlq ymm5,ymm5,52 + vpsrlq ymm6,ymm6,52 + vpsrlq ymm7,ymm7,52 + vpsrlq ymm8,ymm8,52 + vpsrlq ymm9,ymm9,52 + vpsrlq ymm10,ymm10,52 + vpsrlq ymm11,ymm11,52 + vpsrlq ymm12,ymm12,52 + + + vpermq ymm12,ymm12,144 + vpermq ymm13,ymm11,3 + vblendpd ymm12,ymm12,ymm13,1 + + vpermq ymm11,ymm11,144 + vpermq ymm13,ymm10,3 + vblendpd ymm11,ymm11,ymm13,1 + + vpermq ymm10,ymm10,144 + vpermq ymm13,ymm9,3 + vblendpd ymm10,ymm10,ymm13,1 + + vpermq ymm9,ymm9,144 + vpermq ymm13,ymm8,3 + vblendpd ymm9,ymm9,ymm13,1 + + vpermq ymm8,ymm8,144 + vpermq ymm13,ymm7,3 + vblendpd ymm8,ymm8,ymm13,1 + + vpermq ymm7,ymm7,144 + vpermq ymm13,ymm6,3 + vblendpd ymm7,ymm7,ymm13,1 + + vpermq ymm6,ymm6,144 + vpermq ymm13,ymm5,3 + vblendpd ymm6,ymm6,ymm13,1 + + vpermq ymm5,ymm5,144 + vpermq ymm13,ymm4,3 + vblendpd ymm5,ymm5,ymm13,1 + + vpermq ymm4,ymm4,144 + vpermq ymm13,ymm3,3 + vblendpd ymm4,ymm4,ymm13,1 + + vpermq ymm3,ymm3,144 + vpand ymm3,ymm3,YMMWORD[$L$high64x3] + + vmovupd YMMWORD[320+rsp],ymm3 + vmovupd YMMWORD[352+rsp],ymm4 + vmovupd YMMWORD[384+rsp],ymm5 + vmovupd YMMWORD[416+rsp],ymm6 + vmovupd YMMWORD[448+rsp],ymm7 + vmovupd YMMWORD[480+rsp],ymm8 + vmovupd YMMWORD[512+rsp],ymm9 + vmovupd YMMWORD[544+rsp],ymm10 + vmovupd YMMWORD[576+rsp],ymm11 + vmovupd YMMWORD[608+rsp],ymm12 + + vmovupd ymm3,YMMWORD[rsp] + vmovupd ymm4,YMMWORD[32+rsp] + vmovupd ymm5,YMMWORD[64+rsp] + vmovupd ymm6,YMMWORD[96+rsp] + vmovupd ymm7,YMMWORD[128+rsp] + vmovupd ymm8,YMMWORD[160+rsp] + vmovupd ymm9,YMMWORD[192+rsp] + vmovupd ymm10,YMMWORD[224+rsp] + vmovupd ymm11,YMMWORD[256+rsp] + vmovupd ymm12,YMMWORD[288+rsp] + + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + vpand ymm11,ymm11,YMMWORD[$L$mask52x4] + vpand ymm12,ymm12,YMMWORD[$L$mask52x4] + + + vpaddq ymm3,ymm3,YMMWORD[320+rsp] + vpaddq ymm4,ymm4,YMMWORD[352+rsp] + vpaddq ymm5,ymm5,YMMWORD[384+rsp] + vpaddq ymm6,ymm6,YMMWORD[416+rsp] + vpaddq ymm7,ymm7,YMMWORD[448+rsp] + vpaddq ymm8,ymm8,YMMWORD[480+rsp] + vpaddq ymm9,ymm9,YMMWORD[512+rsp] + vpaddq ymm10,ymm10,YMMWORD[544+rsp] + vpaddq ymm11,ymm11,YMMWORD[576+rsp] + vpaddq ymm12,ymm12,YMMWORD[608+rsp] + + lea rsp,[640+rsp] + + + + vpcmpgtq ymm13,ymm3,YMMWORD[$L$mask52x4] + vmovmskpd r14d,ymm13 + vpcmpgtq ymm13,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r13d,ymm13 + shl r13b,4 + or r14b,r13b + + vpcmpgtq ymm13,ymm5,YMMWORD[$L$mask52x4] + vmovmskpd r13d,ymm13 + vpcmpgtq ymm13,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd r12d,ymm13 + shl r12b,4 + or r13b,r12b + + vpcmpgtq ymm13,ymm7,YMMWORD[$L$mask52x4] + vmovmskpd r12d,ymm13 + vpcmpgtq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd r11d,ymm13 + shl r11b,4 + or r12b,r11b + + vpcmpgtq ymm13,ymm9,YMMWORD[$L$mask52x4] + vmovmskpd r11d,ymm13 + vpcmpgtq ymm13,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd r10d,ymm13 + shl r10b,4 + or r11b,r10b + + vpcmpgtq ymm13,ymm11,YMMWORD[$L$mask52x4] + vmovmskpd r10d,ymm13 + vpcmpgtq ymm13,ymm12,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm13 + shl r9b,4 + or r10b,r9b + + add r14b,r14b + adc r13b,r13b + adc r12b,r12b + adc r11b,r11b + adc r10b,r10b + + + vpcmpeqq ymm13,ymm3,YMMWORD[$L$mask52x4] + vmovmskpd r9d,ymm13 + vpcmpeqq ymm13,ymm4,YMMWORD[$L$mask52x4] + vmovmskpd r8d,ymm13 + shl r8b,4 + or r9b,r8b + + vpcmpeqq ymm13,ymm5,YMMWORD[$L$mask52x4] + vmovmskpd r8d,ymm13 + vpcmpeqq ymm13,ymm6,YMMWORD[$L$mask52x4] + vmovmskpd edx,ymm13 + shl dl,4 + or r8b,dl + + vpcmpeqq ymm13,ymm7,YMMWORD[$L$mask52x4] + vmovmskpd edx,ymm13 + vpcmpeqq ymm13,ymm8,YMMWORD[$L$mask52x4] + vmovmskpd ecx,ymm13 + shl cl,4 + or dl,cl + + vpcmpeqq ymm13,ymm9,YMMWORD[$L$mask52x4] + vmovmskpd ecx,ymm13 + vpcmpeqq ymm13,ymm10,YMMWORD[$L$mask52x4] + vmovmskpd ebx,ymm13 + shl bl,4 + or cl,bl + + vpcmpeqq ymm13,ymm11,YMMWORD[$L$mask52x4] + vmovmskpd ebx,ymm13 + vpcmpeqq ymm13,ymm12,YMMWORD[$L$mask52x4] + vmovmskpd eax,ymm13 + shl al,4 + or bl,al + + add r14b,r9b + adc r13b,r8b + adc r12b,dl + adc r11b,cl + adc r10b,bl + + xor r14b,r9b + xor r13b,r8b + xor r12b,dl + xor r11b,cl + xor r10b,bl + + push r9 + push r8 + + lea r8,[$L$kmasklut] + + mov r9b,r14b + and r14,0xf + vpsubq ymm13,ymm3,YMMWORD[$L$mask52x4] + shl r14,5 + vmovapd r14,(%r8), %ymm14 + vblendvpd ymm3,ymm3,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm4,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm4,ymm4,ymm13,ymm14 + + mov r9b,r13b + and r13,0xf + vpsubq ymm13,ymm5,YMMWORD[$L$mask52x4] + shl r13,5 + vmovapd r13,(%r8), %ymm14 + vblendvpd ymm5,ymm5,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm6,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm6,ymm6,ymm13,ymm14 + + mov r9b,r12b + and r12,0xf + vpsubq ymm13,ymm7,YMMWORD[$L$mask52x4] + shl r12,5 + vmovapd r12,(%r8), %ymm14 + vblendvpd ymm7,ymm7,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm8,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm8,ymm8,ymm13,ymm14 + + mov r9b,r11b + and r11,0xf + vpsubq ymm13,ymm9,YMMWORD[$L$mask52x4] + shl r11,5 + vmovapd r11,(%r8), %ymm14 + vblendvpd ymm9,ymm9,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm10,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm10,ymm10,ymm13,ymm14 + + mov r9b,r10b + and r10,0xf + vpsubq ymm13,ymm11,YMMWORD[$L$mask52x4] + shl r10,5 + vmovapd r10,(%r8), %ymm14 + vblendvpd ymm11,ymm11,ymm13,ymm14 + + shr r9b,4 + and r9,0xf + vpsubq ymm13,ymm12,YMMWORD[$L$mask52x4] + shl r9,5 + vmovapd r9,(%r8), %ymm14 + vblendvpd ymm12,ymm12,ymm13,ymm14 + + pop r8 + pop r9 + + vpand ymm3,ymm3,YMMWORD[$L$mask52x4] + vpand ymm4,ymm4,YMMWORD[$L$mask52x4] + vpand ymm5,ymm5,YMMWORD[$L$mask52x4] + vpand ymm6,ymm6,YMMWORD[$L$mask52x4] + vpand ymm7,ymm7,YMMWORD[$L$mask52x4] + vpand ymm8,ymm8,YMMWORD[$L$mask52x4] + vpand ymm9,ymm9,YMMWORD[$L$mask52x4] + + vpand ymm10,ymm10,YMMWORD[$L$mask52x4] + vpand ymm11,ymm11,YMMWORD[$L$mask52x4] + vpand ymm12,ymm12,YMMWORD[$L$mask52x4] + + vmovdqu YMMWORD[320+rdi],ymm3 + vmovdqu YMMWORD[352+rdi],ymm4 + vmovdqu YMMWORD[384+rdi],ymm5 + vmovdqu YMMWORD[416+rdi],ymm6 + vmovdqu YMMWORD[448+rdi],ymm7 + vmovdqu YMMWORD[480+rdi],ymm8 + vmovdqu YMMWORD[512+rdi],ymm9 + vmovdqu YMMWORD[544+rdi],ymm10 + vmovdqu YMMWORD[576+rdi],ymm11 + vmovdqu YMMWORD[608+rdi],ymm12 + + vzeroupper + lea rax,[rsp] + + vmovapd xmm6,XMMWORD[rax] + vmovapd xmm7,XMMWORD[16+rax] + vmovapd xmm8,XMMWORD[32+rax] + vmovapd xmm9,XMMWORD[48+rax] + vmovapd xmm10,XMMWORD[64+rax] + vmovapd xmm11,XMMWORD[80+rax] + vmovapd xmm12,XMMWORD[96+rax] + vmovapd xmm13,XMMWORD[112+rax] + vmovapd xmm14,XMMWORD[128+rax] + vmovapd xmm15,XMMWORD[144+rax] + lea rax,[168+rsp] + mov r15,QWORD[rax] + + mov r14,QWORD[8+rax] + + mov r13,QWORD[16+rax] + + mov r12,QWORD[24+rax] + + mov rbp,QWORD[32+rax] + + mov rbx,QWORD[40+rax] + + lea rsp,[48+rax] + +$L$ossl_rsaz_amm52x40_x2_avxifma256_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_ossl_rsaz_amm52x40_x2_avxifma256: +section .text code align=64 + + +ALIGN 32 +global ossl_extract_multiplier_2x40_win5_avx + +ossl_extract_multiplier_2x40_win5_avx: + +DB 243,15,30,250 + vmovapd ymm14,YMMWORD[$L$ones] + vmovq xmm10,r8 + vpbroadcastq ymm12,xmm10 + vmovq xmm10,r9 + vpbroadcastq ymm13,xmm10 + lea rax,[20480+rdx] + + + mov r10,rdx + + + vpxor xmm0,xmm0,xmm0 + vmovapd ymm1,ymm0 + vmovapd ymm2,ymm0 + vmovapd ymm3,ymm0 + vmovapd ymm4,ymm0 + vmovapd ymm5,ymm0 + vmovapd ymm6,ymm0 + vmovapd ymm7,ymm0 + vmovapd ymm8,ymm0 + vmovapd ymm9,ymm0 + vpxor ymm11,ymm11,ymm11 +ALIGN 32 +$L$loop_0: + vpcmpeqq ymm15,ymm12,ymm11 + vmovdqu ymm10,YMMWORD[rdx] + + vblendvpd ymm0,ymm0,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[32+rdx] + + vblendvpd ymm1,ymm1,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[64+rdx] + + vblendvpd ymm2,ymm2,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[96+rdx] + + vblendvpd ymm3,ymm3,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[128+rdx] + + vblendvpd ymm4,ymm4,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[160+rdx] + + vblendvpd ymm5,ymm5,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[192+rdx] + + vblendvpd ymm6,ymm6,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[224+rdx] + + vblendvpd ymm7,ymm7,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[256+rdx] + + vblendvpd ymm8,ymm8,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[288+rdx] + + vblendvpd ymm9,ymm9,ymm10,ymm15 + vpaddq ymm11,ymm11,ymm14 + add rdx,640 + cmp rax,rdx + jne NEAR $L$loop_0 + vmovdqu YMMWORD[rcx],ymm0 + vmovdqu YMMWORD[32+rcx],ymm1 + vmovdqu YMMWORD[64+rcx],ymm2 + vmovdqu YMMWORD[96+rcx],ymm3 + vmovdqu YMMWORD[128+rcx],ymm4 + vmovdqu YMMWORD[160+rcx],ymm5 + vmovdqu YMMWORD[192+rcx],ymm6 + vmovdqu YMMWORD[224+rcx],ymm7 + vmovdqu YMMWORD[256+rcx],ymm8 + vmovdqu YMMWORD[288+rcx],ymm9 + mov rdx,r10 + vpxor ymm11,ymm11,ymm11 +ALIGN 32 +$L$loop_320: + vpcmpeqq ymm15,ymm13,ymm11 + vmovdqu ymm10,YMMWORD[320+rdx] + + vblendvpd ymm0,ymm0,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[352+rdx] + + vblendvpd ymm1,ymm1,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[384+rdx] + + vblendvpd ymm2,ymm2,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[416+rdx] + + vblendvpd ymm3,ymm3,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[448+rdx] + + vblendvpd ymm4,ymm4,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[480+rdx] + + vblendvpd ymm5,ymm5,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[512+rdx] + + vblendvpd ymm6,ymm6,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[544+rdx] + + vblendvpd ymm7,ymm7,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[576+rdx] + + vblendvpd ymm8,ymm8,ymm10,ymm15 + vmovdqu ymm10,YMMWORD[608+rdx] + + vblendvpd ymm9,ymm9,ymm10,ymm15 + vpaddq ymm11,ymm11,ymm14 + add rdx,640 + cmp rax,rdx + jne NEAR $L$loop_320 + vmovdqu YMMWORD[320+rcx],ymm0 + vmovdqu YMMWORD[352+rcx],ymm1 + vmovdqu YMMWORD[384+rcx],ymm2 + vmovdqu YMMWORD[416+rcx],ymm3 + vmovdqu YMMWORD[448+rcx],ymm4 + vmovdqu YMMWORD[480+rcx],ymm5 + vmovdqu YMMWORD[512+rcx],ymm6 + vmovdqu YMMWORD[544+rcx],ymm7 + vmovdqu YMMWORD[576+rcx],ymm8 + vmovdqu YMMWORD[608+rcx],ymm9 + + DB 0F3h,0C3h ;repret + + +section .rdata rdata align=32 +ALIGN 32 +$L$ones: + DQ 1,1,1,1 +$L$zeros: + DQ 0,0,0,0 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +rsaz_avx_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + lea rsi,[rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + + lea rax,[216+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_ossl_rsaz_amm52x40_x1_avxifma256 wrt ..imagebase + DD $L$SEH_end_ossl_rsaz_amm52x40_x1_avxifma256 wrt ..imagebase + DD $L$SEH_info_ossl_rsaz_amm52x40_x1_avxifma256 wrt ..imagebase + + DD $L$SEH_begin_ossl_rsaz_amm52x40_x2_avxifma256 wrt ..imagebase + DD $L$SEH_end_ossl_rsaz_amm52x40_x2_avxifma256 wrt ..imagebase + DD $L$SEH_info_ossl_rsaz_amm52x40_x2_avxifma256 wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_ossl_rsaz_amm52x40_x1_avxifma256: +DB 9,0,0,0 + DD rsaz_avx_handler wrt ..imagebase + DD $L$ossl_rsaz_amm52x40_x1_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x40_x1_avxifma256_epilogue wrt ..imagebase +$L$SEH_info_ossl_rsaz_amm52x40_x2_avxifma256: +DB 9,0,0,0 + DD rsaz_avx_handler wrt ..imagebase + DD $L$ossl_rsaz_amm52x40_x2_avxifma256_body wrt ..imagebase,$L$ossl_rsaz_amm52x40_x2_avxifma256_epilogue wrt ..imagebase diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm index 9139d4c44a..f71708242f 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/md5/md5-x86_64.nasm @@ -210,7 +210,7 @@ $L$loop: lea eax,[((-165796510))+r10*1+rax] and r11d,ecx mov r10d,DWORD[24+rsi] - or r12d,r11d + add eax,r11d mov r11d,ecx add eax,r12d mov r12d,ecx @@ -221,7 +221,7 @@ $L$loop: lea edx,[((-1069501632))+r10*1+rdx] and r11d,ebx mov r10d,DWORD[44+rsi] - or r12d,r11d + add edx,r11d mov r11d,ebx add edx,r12d mov r12d,ebx @@ -232,7 +232,7 @@ $L$loop: lea ecx,[643717713+r10*1+rcx] and r11d,eax mov r10d,DWORD[rsi] - or r12d,r11d + add ecx,r11d mov r11d,eax add ecx,r12d mov r12d,eax @@ -243,7 +243,7 @@ $L$loop: lea ebx,[((-373897302))+r10*1+rbx] and r11d,edx mov r10d,DWORD[20+rsi] - or r12d,r11d + add ebx,r11d mov r11d,edx add ebx,r12d mov r12d,edx @@ -254,7 +254,7 @@ $L$loop: lea eax,[((-701558691))+r10*1+rax] and r11d,ecx mov r10d,DWORD[40+rsi] - or r12d,r11d + add eax,r11d mov r11d,ecx add eax,r12d mov r12d,ecx @@ -265,7 +265,7 @@ $L$loop: lea edx,[38016083+r10*1+rdx] and r11d,ebx mov r10d,DWORD[60+rsi] - or r12d,r11d + add edx,r11d mov r11d,ebx add edx,r12d mov r12d,ebx @@ -276,7 +276,7 @@ $L$loop: lea ecx,[((-660478335))+r10*1+rcx] and r11d,eax mov r10d,DWORD[16+rsi] - or r12d,r11d + add ecx,r11d mov r11d,eax add ecx,r12d mov r12d,eax @@ -287,7 +287,7 @@ $L$loop: lea ebx,[((-405537848))+r10*1+rbx] and r11d,edx mov r10d,DWORD[36+rsi] - or r12d,r11d + add ebx,r11d mov r11d,edx add ebx,r12d mov r12d,edx @@ -298,7 +298,7 @@ $L$loop: lea eax,[568446438+r10*1+rax] and r11d,ecx mov r10d,DWORD[56+rsi] - or r12d,r11d + add eax,r11d mov r11d,ecx add eax,r12d mov r12d,ecx @@ -309,7 +309,7 @@ $L$loop: lea edx,[((-1019803690))+r10*1+rdx] and r11d,ebx mov r10d,DWORD[12+rsi] - or r12d,r11d + add edx,r11d mov r11d,ebx add edx,r12d mov r12d,ebx @@ -320,7 +320,7 @@ $L$loop: lea ecx,[((-187363961))+r10*1+rcx] and r11d,eax mov r10d,DWORD[32+rsi] - or r12d,r11d + add ecx,r11d mov r11d,eax add ecx,r12d mov r12d,eax @@ -331,7 +331,7 @@ $L$loop: lea ebx,[1163531501+r10*1+rbx] and r11d,edx mov r10d,DWORD[52+rsi] - or r12d,r11d + add ebx,r11d mov r11d,edx add ebx,r12d mov r12d,edx @@ -342,7 +342,7 @@ $L$loop: lea eax,[((-1444681467))+r10*1+rax] and r11d,ecx mov r10d,DWORD[8+rsi] - or r12d,r11d + add eax,r11d mov r11d,ecx add eax,r12d mov r12d,ecx @@ -353,7 +353,7 @@ $L$loop: lea edx,[((-51403784))+r10*1+rdx] and r11d,ebx mov r10d,DWORD[28+rsi] - or r12d,r11d + add edx,r11d mov r11d,ebx add edx,r12d mov r12d,ebx @@ -364,7 +364,7 @@ $L$loop: lea ecx,[1735328473+r10*1+rcx] and r11d,eax mov r10d,DWORD[48+rsi] - or r12d,r11d + add ecx,r11d mov r11d,eax add ecx,r12d mov r12d,eax @@ -375,7 +375,7 @@ $L$loop: lea ebx,[((-1926607734))+r10*1+rbx] and r11d,edx mov r10d,DWORD[20+rsi] - or r12d,r11d + add ebx,r11d mov r11d,edx add ebx,r12d mov r12d,edx diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm index 2ce22321d7..b58086af0a 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/x86_64cpuid.nasm @@ -2,14 +2,14 @@ default rel %define XMMWORD %define YMMWORD %define ZMMWORD + EXTERN OPENSSL_cpuid_setup section .CRT$XCU rdata align=8 DQ OPENSSL_cpuid_setup -common OPENSSL_ia32cap_P 16 - +common OPENSSL_ia32cap_P 40 section .text code align=64 @@ -175,6 +175,7 @@ $L$generic: mov eax,7 xor ecx,ecx cpuid + movd xmm1,eax bt r9d,26 jc NEAR $L$notknights and ebx,0xfff7ffff @@ -185,9 +186,31 @@ $L$notknights: jne NEAR $L$notskylakex and ebx,0xfffeffff + $L$notskylakex: mov DWORD[8+rdi],ebx mov DWORD[12+rdi],ecx + mov DWORD[16+rdi],edx + + movd eax,xmm1 + cmp eax,0x1 + jb NEAR $L$no_extended_info + mov eax,0x7 + mov ecx,0x1 + cpuid + mov DWORD[20+rdi],eax + mov DWORD[24+rdi],edx + mov DWORD[28+rdi],ebx + mov DWORD[32+rdi],ecx + + and edx,0x80000 + cmp edx,0x0 + je NEAR $L$no_extended_info + mov eax,0x24 + mov ecx,0x0 + cpuid + mov DWORD[36+rdi],ebx + $L$no_extended_info: bt r9d,27 @@ -206,6 +229,9 @@ DB 0x0f,0x01,0xd0 cmp eax,6 je NEAR $L$done $L$clear_avx: + and DWORD[20+rdi],0xff7fffff + + mov eax,0xefffe7ff and r9d,eax mov eax,0x3fdeffdf diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c b/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c index fd9b9ae658..e77a242602 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/crypto/params_idx.c @@ -33,6 +33,10 @@ int ossl_param_find_pidx(const char *s) switch(s[2]) { default: break; + case 'd': + if (strcmp("itional-random", s + 3) == 0) + return PIDX_SIGNATURE_PARAM_ADD_RANDOM; + break; case '\0': return PIDX_KDF_PARAM_ARGON2_AD; } @@ -322,6 +326,10 @@ int ossl_param_find_pidx(const char *s) case 's': if (strcmp("c", s + 3) == 0) return PIDX_OBJECT_PARAM_DESC; + break; + case 't': + if (strcmp("erministic", s + 3) == 0) + return PIDX_SIGNATURE_PARAM_DETERMINISTIC; } break; case 'h': @@ -936,8 +944,17 @@ int ossl_param_find_pidx(const char *s) default: break; case '-': - if (strcmp("check", s + 4) == 0) - return PIDX_PKEY_PARAM_FIPS_KEY_CHECK; + switch(s[4]) { + default: + break; + case 'c': + if (strcmp("heck", s + 5) == 0) + return PIDX_PKEY_PARAM_FIPS_KEY_CHECK; + break; + case 'l': + if (strcmp("ength", s + 5) == 0) + return PIDX_SKEY_PARAM_KEY_LENGTH; + } break; case 'b': if (strcmp("its", s + 4) == 0) @@ -1060,8 +1077,17 @@ int ossl_param_find_pidx(const char *s) } break; case 'e': - if (strcmp("mcost", s + 2) == 0) - return PIDX_KDF_PARAM_ARGON2_MEMCOST; + switch(s[2]) { + default: + break; + case 'm': + if (strcmp("cost", s + 3) == 0) + return PIDX_KDF_PARAM_ARGON2_MEMCOST; + break; + case 's': + if (strcmp("sage-encoding", s + 3) == 0) + return PIDX_SIGNATURE_PARAM_MESSAGE_ENCODING; + } break; case 'g': switch(s[2]) { @@ -1125,6 +1151,97 @@ int ossl_param_find_pidx(const char *s) } } break; + case 'l': + switch(s[2]) { + default: + break; + case '-': + switch(s[3]) { + default: + break; + case 'd': + switch(s[4]) { + default: + break; + case 's': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case '.': + switch(s[7]) { + default: + break; + case 'i': + if (strcmp("nput_formats", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_DSA_INPUT_FORMATS; + break; + case 'o': + if (strcmp("utput_formats", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS; + break; + case 'p': + if (strcmp("refer_seed", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_DSA_PREFER_SEED; + break; + case 'r': + if (strcmp("etain_seed", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_DSA_RETAIN_SEED; + } + } + } + } + break; + case 'k': + switch(s[4]) { + default: + break; + case 'e': + switch(s[5]) { + default: + break; + case 'm': + switch(s[6]) { + default: + break; + case '.': + switch(s[7]) { + default: + break; + case 'i': + switch(s[8]) { + default: + break; + case 'm': + if (strcmp("port_pct_type", s + 9) == 0) + return PIDX_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE; + break; + case 'n': + if (strcmp("put_formats", s + 9) == 0) + return PIDX_PKEY_PARAM_ML_KEM_INPUT_FORMATS; + } + break; + case 'o': + if (strcmp("utput_formats", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS; + break; + case 'p': + if (strcmp("refer_seed", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_KEM_PREFER_SEED; + break; + case 'r': + if (strcmp("etain_seed", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_KEM_RETAIN_SEED; + } + } + } + } + } + } + break; case 'o': switch(s[2]) { default: @@ -1147,6 +1264,14 @@ int ossl_param_find_pidx(const char *s) } } break; + case 'u': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_SIGNATURE_PARAM_MU; + } + break; case '\0': return PIDX_PKEY_PARAM_EC_CHAR2_M; } @@ -1327,6 +1452,10 @@ int ossl_param_find_pidx(const char *s) if (strcmp("ounter", s + 2) == 0) return PIDX_PKEY_PARAM_FFC_PCOUNTER; break; + case 'i': + if (strcmp("peline-tag", s + 2) == 0) + return PIDX_CIPHER_PARAM_PIPELINE_AEAD_TAG; + break; case 'k': if (strcmp("cs5", s + 2) == 0) return PIDX_KDF_PARAM_PKCS5; @@ -1473,6 +1602,10 @@ int ossl_param_find_pidx(const char *s) return PIDX_DRBG_PARAM_RANDOM_DATA; } } + break; + case 'w': + if (strcmp("-bytes", s + 3) == 0) + return PIDX_SKEY_PARAM_RAW_BYTES; } break; case 'e': @@ -2064,7 +2197,7 @@ int ossl_param_find_pidx(const char *s) break; case 'e': if (strcmp("d", s + 3) == 0) - return PIDX_PKEY_PARAM_FFC_SEED; + return PIDX_PKEY_PARAM_SLH_DSA_SEED; break; case 'r': if (strcmp("ial", s + 3) == 0) @@ -2350,6 +2483,10 @@ int ossl_param_find_pidx(const char *s) switch(s[4]) { default: break; + case '-': + if (strcmp("entropy", s + 5) == 0) + return PIDX_SIGNATURE_PARAM_TEST_ENTROPY; + break; case '_': switch(s[5]) { default: @@ -2502,7 +2639,7 @@ int ossl_param_find_pidx(const char *s) break; case 'd': if (strcmp("tls", s + 9) == 0) - return PIDX_CAPABILITY_TLS_GROUP_MAX_DTLS; + return PIDX_CAPABILITY_TLS_SIGALG_MAX_DTLS; break; case 't': if (strcmp("ls", s + 9) == 0) @@ -2525,7 +2662,7 @@ int ossl_param_find_pidx(const char *s) break; case 'd': if (strcmp("tls", s + 9) == 0) - return PIDX_CAPABILITY_TLS_GROUP_MIN_DTLS; + return PIDX_CAPABILITY_TLS_SIGALG_MIN_DTLS; break; case 't': if (strcmp("ls", s + 9) == 0) diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h index 27bcea8137..10e995f20c 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/internal/param_names.h @@ -14,7 +14,7 @@ int ossl_param_find_pidx(const char *s); /* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */ -#define NUM_PIDX 329 +#define NUM_PIDX 346 #define PIDX_ALG_PARAM_ALGORITHM_ID 0 #define PIDX_ALG_PARAM_ALGORITHM_ID_PARAMS 1 @@ -55,7 +55,9 @@ int ossl_param_find_pidx(const char *s); #define PIDX_CAPABILITY_TLS_SIGALG_IANA_NAME 26 #define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE 27 #define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE_OID 28 +#define PIDX_CAPABILITY_TLS_SIGALG_MAX_DTLS 16 #define PIDX_CAPABILITY_TLS_SIGALG_MAX_TLS 17 +#define PIDX_CAPABILITY_TLS_SIGALG_MIN_DTLS 18 #define PIDX_CAPABILITY_TLS_SIGALG_MIN_TLS 19 #define PIDX_CAPABILITY_TLS_SIGALG_NAME 29 #define PIDX_CAPABILITY_TLS_SIGALG_OID 30 @@ -90,357 +92,378 @@ int ossl_param_find_pidx(const char *s); #define PIDX_CIPHER_PARAM_MODE 55 #define PIDX_CIPHER_PARAM_NUM 56 #define PIDX_CIPHER_PARAM_PADDING 57 -#define PIDX_CIPHER_PARAM_RANDOM_KEY 58 -#define PIDX_CIPHER_PARAM_RC2_KEYBITS 59 -#define PIDX_CIPHER_PARAM_ROUNDS 60 -#define PIDX_CIPHER_PARAM_SPEED 61 -#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK 62 -#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD 63 -#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN 64 -#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC 65 -#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN 66 -#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN 67 -#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE 68 -#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE 69 -#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT 70 -#define PIDX_CIPHER_PARAM_TLS_MAC 71 -#define PIDX_CIPHER_PARAM_TLS_MAC_SIZE 72 -#define PIDX_CIPHER_PARAM_TLS_VERSION 73 -#define PIDX_CIPHER_PARAM_UPDATED_IV 74 -#define PIDX_CIPHER_PARAM_USE_BITS 75 -#define PIDX_CIPHER_PARAM_XTS_STANDARD 76 +#define PIDX_CIPHER_PARAM_PIPELINE_AEAD_TAG 58 +#define PIDX_CIPHER_PARAM_RANDOM_KEY 59 +#define PIDX_CIPHER_PARAM_RC2_KEYBITS 60 +#define PIDX_CIPHER_PARAM_ROUNDS 61 +#define PIDX_CIPHER_PARAM_SPEED 62 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK 63 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD 64 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN 65 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC 66 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN 67 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN 68 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE 69 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE 70 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT 71 +#define PIDX_CIPHER_PARAM_TLS_MAC 72 +#define PIDX_CIPHER_PARAM_TLS_MAC_SIZE 73 +#define PIDX_CIPHER_PARAM_TLS_VERSION 74 +#define PIDX_CIPHER_PARAM_UPDATED_IV 75 +#define PIDX_CIPHER_PARAM_USE_BITS 76 +#define PIDX_CIPHER_PARAM_XTS_STANDARD 77 #define PIDX_DECODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES -#define PIDX_DIGEST_PARAM_ALGID_ABSENT 77 +#define PIDX_DIGEST_PARAM_ALGID_ABSENT 78 #define PIDX_DIGEST_PARAM_BLOCK_SIZE 45 -#define PIDX_DIGEST_PARAM_MICALG 78 -#define PIDX_DIGEST_PARAM_PAD_TYPE 79 -#define PIDX_DIGEST_PARAM_SIZE 80 -#define PIDX_DIGEST_PARAM_SSL3_MS 81 -#define PIDX_DIGEST_PARAM_XOF 82 -#define PIDX_DIGEST_PARAM_XOFLEN 83 +#define PIDX_DIGEST_PARAM_MICALG 79 +#define PIDX_DIGEST_PARAM_PAD_TYPE 80 +#define PIDX_DIGEST_PARAM_SIZE 81 +#define PIDX_DIGEST_PARAM_SSL3_MS 82 +#define PIDX_DIGEST_PARAM_XOF 83 +#define PIDX_DIGEST_PARAM_XOFLEN 84 #define PIDX_DRBG_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER #define PIDX_DRBG_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST -#define PIDX_DRBG_PARAM_ENTROPY_REQUIRED 84 +#define PIDX_DRBG_PARAM_ENTROPY_REQUIRED 85 #define PIDX_DRBG_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR #define PIDX_DRBG_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK #define PIDX_DRBG_PARAM_MAC PIDX_ALG_PARAM_MAC -#define PIDX_DRBG_PARAM_MAX_ADINLEN 85 -#define PIDX_DRBG_PARAM_MAX_ENTROPYLEN 86 -#define PIDX_DRBG_PARAM_MAX_LENGTH 87 -#define PIDX_DRBG_PARAM_MAX_NONCELEN 88 -#define PIDX_DRBG_PARAM_MAX_PERSLEN 89 -#define PIDX_DRBG_PARAM_MIN_ENTROPYLEN 90 -#define PIDX_DRBG_PARAM_MIN_LENGTH 91 -#define PIDX_DRBG_PARAM_MIN_NONCELEN 92 -#define PIDX_DRBG_PARAM_PREDICTION_RESISTANCE 93 +#define PIDX_DRBG_PARAM_MAX_ADINLEN 86 +#define PIDX_DRBG_PARAM_MAX_ENTROPYLEN 87 +#define PIDX_DRBG_PARAM_MAX_LENGTH 88 +#define PIDX_DRBG_PARAM_MAX_NONCELEN 89 +#define PIDX_DRBG_PARAM_MAX_PERSLEN 90 +#define PIDX_DRBG_PARAM_MIN_ENTROPYLEN 91 +#define PIDX_DRBG_PARAM_MIN_LENGTH 92 +#define PIDX_DRBG_PARAM_MIN_NONCELEN 93 +#define PIDX_DRBG_PARAM_PREDICTION_RESISTANCE 94 #define PIDX_DRBG_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES -#define PIDX_DRBG_PARAM_RANDOM_DATA 94 -#define PIDX_DRBG_PARAM_RESEED_COUNTER 95 -#define PIDX_DRBG_PARAM_RESEED_REQUESTS 96 -#define PIDX_DRBG_PARAM_RESEED_TIME 97 -#define PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL 98 -#define PIDX_DRBG_PARAM_SIZE 80 -#define PIDX_DRBG_PARAM_USE_DF 99 +#define PIDX_DRBG_PARAM_RANDOM_DATA 95 +#define PIDX_DRBG_PARAM_RESEED_COUNTER 96 +#define PIDX_DRBG_PARAM_RESEED_REQUESTS 97 +#define PIDX_DRBG_PARAM_RESEED_TIME 98 +#define PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL 99 +#define PIDX_DRBG_PARAM_SIZE 81 +#define PIDX_DRBG_PARAM_USE_DF 100 #define PIDX_ENCODER_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER -#define PIDX_ENCODER_PARAM_ENCRYPT_LEVEL 100 +#define PIDX_ENCODER_PARAM_ENCRYPT_LEVEL 101 #define PIDX_ENCODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES -#define PIDX_ENCODER_PARAM_SAVE_PARAMETERS 101 -#define PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE 102 +#define PIDX_ENCODER_PARAM_SAVE_PARAMETERS 102 +#define PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE 103 #define PIDX_EXCHANGE_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR #define PIDX_EXCHANGE_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK #define PIDX_EXCHANGE_PARAM_FIPS_ECDH_COFACTOR_CHECK PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK #define PIDX_EXCHANGE_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK -#define PIDX_EXCHANGE_PARAM_KDF_DIGEST 103 -#define PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS 104 -#define PIDX_EXCHANGE_PARAM_KDF_OUTLEN 105 -#define PIDX_EXCHANGE_PARAM_KDF_TYPE 106 -#define PIDX_EXCHANGE_PARAM_KDF_UKM 107 -#define PIDX_EXCHANGE_PARAM_PAD 108 -#define PIDX_GEN_PARAM_ITERATION 109 -#define PIDX_GEN_PARAM_POTENTIAL 110 -#define PIDX_KDF_PARAM_ARGON2_AD 111 -#define PIDX_KDF_PARAM_ARGON2_LANES 112 -#define PIDX_KDF_PARAM_ARGON2_MEMCOST 113 -#define PIDX_KDF_PARAM_ARGON2_VERSION 114 -#define PIDX_KDF_PARAM_CEK_ALG 115 +#define PIDX_EXCHANGE_PARAM_KDF_DIGEST 104 +#define PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS 105 +#define PIDX_EXCHANGE_PARAM_KDF_OUTLEN 106 +#define PIDX_EXCHANGE_PARAM_KDF_TYPE 107 +#define PIDX_EXCHANGE_PARAM_KDF_UKM 108 +#define PIDX_EXCHANGE_PARAM_PAD 109 +#define PIDX_GEN_PARAM_ITERATION 110 +#define PIDX_GEN_PARAM_POTENTIAL 111 +#define PIDX_KDF_PARAM_ARGON2_AD 112 +#define PIDX_KDF_PARAM_ARGON2_LANES 113 +#define PIDX_KDF_PARAM_ARGON2_MEMCOST 114 +#define PIDX_KDF_PARAM_ARGON2_VERSION 115 +#define PIDX_KDF_PARAM_CEK_ALG 116 #define PIDX_KDF_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER -#define PIDX_KDF_PARAM_CONSTANT 116 -#define PIDX_KDF_PARAM_DATA 117 +#define PIDX_KDF_PARAM_CONSTANT 117 +#define PIDX_KDF_PARAM_DATA 118 #define PIDX_KDF_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST -#define PIDX_KDF_PARAM_EARLY_CLEAN 118 +#define PIDX_KDF_PARAM_EARLY_CLEAN 119 #define PIDX_KDF_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR #define PIDX_KDF_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK -#define PIDX_KDF_PARAM_FIPS_EMS_CHECK 119 +#define PIDX_KDF_PARAM_FIPS_EMS_CHECK 120 #define PIDX_KDF_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK -#define PIDX_KDF_PARAM_HMACDRBG_ENTROPY 120 -#define PIDX_KDF_PARAM_HMACDRBG_NONCE 121 -#define PIDX_KDF_PARAM_INFO 122 -#define PIDX_KDF_PARAM_ITER 123 -#define PIDX_KDF_PARAM_KBKDF_R 124 -#define PIDX_KDF_PARAM_KBKDF_USE_L 125 -#define PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR 126 -#define PIDX_KDF_PARAM_KEY 127 -#define PIDX_KDF_PARAM_LABEL 128 +#define PIDX_KDF_PARAM_HMACDRBG_ENTROPY 121 +#define PIDX_KDF_PARAM_HMACDRBG_NONCE 122 +#define PIDX_KDF_PARAM_INFO 123 +#define PIDX_KDF_PARAM_ITER 124 +#define PIDX_KDF_PARAM_KBKDF_R 125 +#define PIDX_KDF_PARAM_KBKDF_USE_L 126 +#define PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR 127 +#define PIDX_KDF_PARAM_KEY 128 +#define PIDX_KDF_PARAM_LABEL 129 #define PIDX_KDF_PARAM_MAC PIDX_ALG_PARAM_MAC -#define PIDX_KDF_PARAM_MAC_SIZE 129 +#define PIDX_KDF_PARAM_MAC_SIZE 130 #define PIDX_KDF_PARAM_MODE 55 -#define PIDX_KDF_PARAM_PASSWORD 130 -#define PIDX_KDF_PARAM_PKCS12_ID 131 -#define PIDX_KDF_PARAM_PKCS5 132 -#define PIDX_KDF_PARAM_PREFIX 133 +#define PIDX_KDF_PARAM_PASSWORD 131 +#define PIDX_KDF_PARAM_PKCS12_ID 132 +#define PIDX_KDF_PARAM_PKCS5 133 +#define PIDX_KDF_PARAM_PREFIX 134 #define PIDX_KDF_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES -#define PIDX_KDF_PARAM_SALT 134 -#define PIDX_KDF_PARAM_SCRYPT_MAXMEM 135 -#define PIDX_KDF_PARAM_SCRYPT_N 136 -#define PIDX_KDF_PARAM_SCRYPT_P 137 -#define PIDX_KDF_PARAM_SCRYPT_R 124 -#define PIDX_KDF_PARAM_SECRET 138 -#define PIDX_KDF_PARAM_SEED 139 -#define PIDX_KDF_PARAM_SIZE 80 -#define PIDX_KDF_PARAM_SSHKDF_SESSION_ID 140 -#define PIDX_KDF_PARAM_SSHKDF_TYPE 141 -#define PIDX_KDF_PARAM_SSHKDF_XCGHASH 142 -#define PIDX_KDF_PARAM_THREADS 143 -#define PIDX_KDF_PARAM_UKM 144 -#define PIDX_KDF_PARAM_X942_ACVPINFO 145 -#define PIDX_KDF_PARAM_X942_PARTYUINFO 146 -#define PIDX_KDF_PARAM_X942_PARTYVINFO 147 -#define PIDX_KDF_PARAM_X942_SUPP_PRIVINFO 148 -#define PIDX_KDF_PARAM_X942_SUPP_PUBINFO 149 -#define PIDX_KDF_PARAM_X942_USE_KEYBITS 150 +#define PIDX_KDF_PARAM_SALT 135 +#define PIDX_KDF_PARAM_SCRYPT_MAXMEM 136 +#define PIDX_KDF_PARAM_SCRYPT_N 137 +#define PIDX_KDF_PARAM_SCRYPT_P 138 +#define PIDX_KDF_PARAM_SCRYPT_R 125 +#define PIDX_KDF_PARAM_SECRET 139 +#define PIDX_KDF_PARAM_SEED 140 +#define PIDX_KDF_PARAM_SIZE 81 +#define PIDX_KDF_PARAM_SSHKDF_SESSION_ID 141 +#define PIDX_KDF_PARAM_SSHKDF_TYPE 142 +#define PIDX_KDF_PARAM_SSHKDF_XCGHASH 143 +#define PIDX_KDF_PARAM_THREADS 144 +#define PIDX_KDF_PARAM_UKM 145 +#define PIDX_KDF_PARAM_X942_ACVPINFO 146 +#define PIDX_KDF_PARAM_X942_PARTYUINFO 147 +#define PIDX_KDF_PARAM_X942_PARTYVINFO 148 +#define PIDX_KDF_PARAM_X942_SUPP_PRIVINFO 149 +#define PIDX_KDF_PARAM_X942_SUPP_PUBINFO 150 +#define PIDX_KDF_PARAM_X942_USE_KEYBITS 151 #define PIDX_KEM_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR #define PIDX_KEM_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK -#define PIDX_KEM_PARAM_IKME 151 -#define PIDX_KEM_PARAM_OPERATION 152 -#define PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING 153 -#define PIDX_LIBSSL_RECORD_LAYER_PARAM_HS_PADDING 154 -#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA 155 -#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN 156 +#define PIDX_KEM_PARAM_IKME 152 +#define PIDX_KEM_PARAM_OPERATION 153 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING 154 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_HS_PADDING 155 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA 156 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN 157 #define PIDX_LIBSSL_RECORD_LAYER_PARAM_MODE 55 -#define PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS 157 -#define PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD 158 -#define PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC 159 -#define PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE 160 -#define PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM 161 -#define PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN 162 -#define PIDX_MAC_PARAM_BLOCK_SIZE 163 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS 158 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD 159 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC 160 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE 161 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM 162 +#define PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN 163 +#define PIDX_MAC_PARAM_BLOCK_SIZE 164 #define PIDX_MAC_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER -#define PIDX_MAC_PARAM_CUSTOM 164 -#define PIDX_MAC_PARAM_C_ROUNDS 165 +#define PIDX_MAC_PARAM_CUSTOM 165 +#define PIDX_MAC_PARAM_C_ROUNDS 166 #define PIDX_MAC_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST -#define PIDX_MAC_PARAM_DIGEST_NOINIT 166 -#define PIDX_MAC_PARAM_DIGEST_ONESHOT 167 -#define PIDX_MAC_PARAM_D_ROUNDS 168 +#define PIDX_MAC_PARAM_DIGEST_NOINIT 167 +#define PIDX_MAC_PARAM_DIGEST_ONESHOT 168 +#define PIDX_MAC_PARAM_D_ROUNDS 169 #define PIDX_MAC_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR #define PIDX_MAC_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK #define PIDX_MAC_PARAM_FIPS_NO_SHORT_MAC PIDX_PROV_PARAM_NO_SHORT_MAC #define PIDX_MAC_PARAM_IV 52 -#define PIDX_MAC_PARAM_KEY 127 +#define PIDX_MAC_PARAM_KEY 128 #define PIDX_MAC_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES -#define PIDX_MAC_PARAM_SALT 134 -#define PIDX_MAC_PARAM_SIZE 80 -#define PIDX_MAC_PARAM_TLS_DATA_SIZE 169 -#define PIDX_MAC_PARAM_XOF 82 -#define PIDX_OBJECT_PARAM_DATA 117 -#define PIDX_OBJECT_PARAM_DATA_STRUCTURE 170 -#define PIDX_OBJECT_PARAM_DATA_TYPE 171 -#define PIDX_OBJECT_PARAM_DESC 172 -#define PIDX_OBJECT_PARAM_REFERENCE 173 -#define PIDX_OBJECT_PARAM_TYPE 141 -#define PIDX_PASSPHRASE_PARAM_INFO 122 +#define PIDX_MAC_PARAM_SALT 135 +#define PIDX_MAC_PARAM_SIZE 81 +#define PIDX_MAC_PARAM_TLS_DATA_SIZE 170 +#define PIDX_MAC_PARAM_XOF 83 +#define PIDX_OBJECT_PARAM_DATA 118 +#define PIDX_OBJECT_PARAM_DATA_STRUCTURE 171 +#define PIDX_OBJECT_PARAM_DATA_TYPE 172 +#define PIDX_OBJECT_PARAM_DESC 173 +#define PIDX_OBJECT_PARAM_INPUT_TYPE 174 +#define PIDX_OBJECT_PARAM_REFERENCE 175 +#define PIDX_OBJECT_PARAM_TYPE 142 +#define PIDX_PASSPHRASE_PARAM_INFO 123 #define PIDX_PKEY_PARAM_ALGORITHM_ID PIDX_ALG_PARAM_ALGORITHM_ID #define PIDX_PKEY_PARAM_ALGORITHM_ID_PARAMS PIDX_ALG_PARAM_ALGORITHM_ID_PARAMS -#define PIDX_PKEY_PARAM_BITS 174 +#define PIDX_PKEY_PARAM_BITS 176 #define PIDX_PKEY_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER -#define PIDX_PKEY_PARAM_DEFAULT_DIGEST 175 -#define PIDX_PKEY_PARAM_DHKEM_IKM 176 -#define PIDX_PKEY_PARAM_DH_GENERATOR 177 -#define PIDX_PKEY_PARAM_DH_PRIV_LEN 178 +#define PIDX_PKEY_PARAM_DEFAULT_DIGEST 177 +#define PIDX_PKEY_PARAM_DHKEM_IKM 178 +#define PIDX_PKEY_PARAM_DH_GENERATOR 179 +#define PIDX_PKEY_PARAM_DH_PRIV_LEN 180 #define PIDX_PKEY_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST -#define PIDX_PKEY_PARAM_DIGEST_SIZE 179 -#define PIDX_PKEY_PARAM_DIST_ID 180 -#define PIDX_PKEY_PARAM_EC_A 181 -#define PIDX_PKEY_PARAM_EC_B 182 -#define PIDX_PKEY_PARAM_EC_CHAR2_M 183 -#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K1 184 -#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K2 185 -#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K3 186 -#define PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS 187 -#define PIDX_PKEY_PARAM_EC_CHAR2_TYPE 188 -#define PIDX_PKEY_PARAM_EC_COFACTOR 189 -#define PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS 190 -#define PIDX_PKEY_PARAM_EC_ENCODING 191 -#define PIDX_PKEY_PARAM_EC_FIELD_TYPE 192 -#define PIDX_PKEY_PARAM_EC_GENERATOR 193 -#define PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE 194 -#define PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC 195 -#define PIDX_PKEY_PARAM_EC_ORDER 196 -#define PIDX_PKEY_PARAM_EC_P 137 -#define PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT 197 -#define PIDX_PKEY_PARAM_EC_PUB_X 198 -#define PIDX_PKEY_PARAM_EC_PUB_Y 199 -#define PIDX_PKEY_PARAM_EC_SEED 139 -#define PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY 200 +#define PIDX_PKEY_PARAM_DIGEST_SIZE 181 +#define PIDX_PKEY_PARAM_DIST_ID 182 +#define PIDX_PKEY_PARAM_EC_A 183 +#define PIDX_PKEY_PARAM_EC_B 184 +#define PIDX_PKEY_PARAM_EC_CHAR2_M 185 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K1 186 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K2 187 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K3 188 +#define PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS 189 +#define PIDX_PKEY_PARAM_EC_CHAR2_TYPE 190 +#define PIDX_PKEY_PARAM_EC_COFACTOR 191 +#define PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS 192 +#define PIDX_PKEY_PARAM_EC_ENCODING 193 +#define PIDX_PKEY_PARAM_EC_FIELD_TYPE 194 +#define PIDX_PKEY_PARAM_EC_GENERATOR 195 +#define PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE 196 +#define PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC 197 +#define PIDX_PKEY_PARAM_EC_ORDER 198 +#define PIDX_PKEY_PARAM_EC_P 138 +#define PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT 199 +#define PIDX_PKEY_PARAM_EC_PUB_X 200 +#define PIDX_PKEY_PARAM_EC_PUB_Y 201 +#define PIDX_PKEY_PARAM_EC_SEED 140 +#define PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY 202 #define PIDX_PKEY_PARAM_ENGINE PIDX_ALG_PARAM_ENGINE -#define PIDX_PKEY_PARAM_FFC_COFACTOR 201 +#define PIDX_PKEY_PARAM_FFC_COFACTOR 203 #define PIDX_PKEY_PARAM_FFC_DIGEST PIDX_PKEY_PARAM_DIGEST #define PIDX_PKEY_PARAM_FFC_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES -#define PIDX_PKEY_PARAM_FFC_G 202 -#define PIDX_PKEY_PARAM_FFC_GINDEX 203 -#define PIDX_PKEY_PARAM_FFC_H 204 -#define PIDX_PKEY_PARAM_FFC_P 137 -#define PIDX_PKEY_PARAM_FFC_PBITS 205 -#define PIDX_PKEY_PARAM_FFC_PCOUNTER 206 -#define PIDX_PKEY_PARAM_FFC_Q 207 -#define PIDX_PKEY_PARAM_FFC_QBITS 208 -#define PIDX_PKEY_PARAM_FFC_SEED 139 -#define PIDX_PKEY_PARAM_FFC_TYPE 141 -#define PIDX_PKEY_PARAM_FFC_VALIDATE_G 209 -#define PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY 210 -#define PIDX_PKEY_PARAM_FFC_VALIDATE_PQ 211 +#define PIDX_PKEY_PARAM_FFC_G 204 +#define PIDX_PKEY_PARAM_FFC_GINDEX 205 +#define PIDX_PKEY_PARAM_FFC_H 206 +#define PIDX_PKEY_PARAM_FFC_P 138 +#define PIDX_PKEY_PARAM_FFC_PBITS 207 +#define PIDX_PKEY_PARAM_FFC_PCOUNTER 208 +#define PIDX_PKEY_PARAM_FFC_Q 209 +#define PIDX_PKEY_PARAM_FFC_QBITS 210 +#define PIDX_PKEY_PARAM_FFC_SEED 140 +#define PIDX_PKEY_PARAM_FFC_TYPE 142 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_G 211 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY 212 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_PQ 213 #define PIDX_PKEY_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR -#define PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK 212 -#define PIDX_PKEY_PARAM_FIPS_KEY_CHECK 213 -#define PIDX_PKEY_PARAM_FIPS_SIGN_CHECK 214 -#define PIDX_PKEY_PARAM_GROUP_NAME 215 +#define PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK 214 +#define PIDX_PKEY_PARAM_FIPS_KEY_CHECK 215 +#define PIDX_PKEY_PARAM_FIPS_SIGN_CHECK 216 +#define PIDX_PKEY_PARAM_GROUP_NAME 217 #define PIDX_PKEY_PARAM_IMPLICIT_REJECTION 8 -#define PIDX_PKEY_PARAM_MANDATORY_DIGEST 216 -#define PIDX_PKEY_PARAM_MASKGENFUNC 217 -#define PIDX_PKEY_PARAM_MAX_SIZE 218 -#define PIDX_PKEY_PARAM_MGF1_DIGEST 219 -#define PIDX_PKEY_PARAM_MGF1_PROPERTIES 220 -#define PIDX_PKEY_PARAM_PAD_MODE 221 -#define PIDX_PKEY_PARAM_PRIV_KEY 222 +#define PIDX_PKEY_PARAM_MANDATORY_DIGEST 218 +#define PIDX_PKEY_PARAM_MASKGENFUNC 219 +#define PIDX_PKEY_PARAM_MAX_SIZE 220 +#define PIDX_PKEY_PARAM_MGF1_DIGEST 221 +#define PIDX_PKEY_PARAM_MGF1_PROPERTIES 222 +#define PIDX_PKEY_PARAM_ML_DSA_INPUT_FORMATS 223 +#define PIDX_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS 224 +#define PIDX_PKEY_PARAM_ML_DSA_PREFER_SEED 225 +#define PIDX_PKEY_PARAM_ML_DSA_RETAIN_SEED 226 +#define PIDX_PKEY_PARAM_ML_DSA_SEED 140 +#define PIDX_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE 227 +#define PIDX_PKEY_PARAM_ML_KEM_INPUT_FORMATS 228 +#define PIDX_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS 229 +#define PIDX_PKEY_PARAM_ML_KEM_PREFER_SEED 230 +#define PIDX_PKEY_PARAM_ML_KEM_RETAIN_SEED 231 +#define PIDX_PKEY_PARAM_ML_KEM_SEED 140 +#define PIDX_PKEY_PARAM_PAD_MODE 232 +#define PIDX_PKEY_PARAM_PRIV_KEY 233 #define PIDX_PKEY_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES -#define PIDX_PKEY_PARAM_PUB_KEY 223 +#define PIDX_PKEY_PARAM_PUB_KEY 234 #define PIDX_PKEY_PARAM_RSA_BITS PIDX_PKEY_PARAM_BITS -#define PIDX_PKEY_PARAM_RSA_COEFFICIENT 224 -#define PIDX_PKEY_PARAM_RSA_COEFFICIENT1 225 -#define PIDX_PKEY_PARAM_RSA_COEFFICIENT2 226 -#define PIDX_PKEY_PARAM_RSA_COEFFICIENT3 227 -#define PIDX_PKEY_PARAM_RSA_COEFFICIENT4 228 -#define PIDX_PKEY_PARAM_RSA_COEFFICIENT5 229 -#define PIDX_PKEY_PARAM_RSA_COEFFICIENT6 230 -#define PIDX_PKEY_PARAM_RSA_COEFFICIENT7 231 -#define PIDX_PKEY_PARAM_RSA_COEFFICIENT8 232 -#define PIDX_PKEY_PARAM_RSA_COEFFICIENT9 233 -#define PIDX_PKEY_PARAM_RSA_D 234 -#define PIDX_PKEY_PARAM_RSA_DERIVE_FROM_PQ 235 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT 235 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT1 236 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT2 237 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT3 238 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT4 239 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT5 240 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT6 241 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT7 242 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT8 243 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT9 244 +#define PIDX_PKEY_PARAM_RSA_D 245 +#define PIDX_PKEY_PARAM_RSA_DERIVE_FROM_PQ 246 #define PIDX_PKEY_PARAM_RSA_DIGEST PIDX_PKEY_PARAM_DIGEST #define PIDX_PKEY_PARAM_RSA_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES -#define PIDX_PKEY_PARAM_RSA_E 236 -#define PIDX_PKEY_PARAM_RSA_EXPONENT 237 -#define PIDX_PKEY_PARAM_RSA_EXPONENT1 238 -#define PIDX_PKEY_PARAM_RSA_EXPONENT10 239 -#define PIDX_PKEY_PARAM_RSA_EXPONENT2 240 -#define PIDX_PKEY_PARAM_RSA_EXPONENT3 241 -#define PIDX_PKEY_PARAM_RSA_EXPONENT4 242 -#define PIDX_PKEY_PARAM_RSA_EXPONENT5 243 -#define PIDX_PKEY_PARAM_RSA_EXPONENT6 244 -#define PIDX_PKEY_PARAM_RSA_EXPONENT7 245 -#define PIDX_PKEY_PARAM_RSA_EXPONENT8 246 -#define PIDX_PKEY_PARAM_RSA_EXPONENT9 247 -#define PIDX_PKEY_PARAM_RSA_FACTOR 248 -#define PIDX_PKEY_PARAM_RSA_FACTOR1 249 -#define PIDX_PKEY_PARAM_RSA_FACTOR10 250 -#define PIDX_PKEY_PARAM_RSA_FACTOR2 251 -#define PIDX_PKEY_PARAM_RSA_FACTOR3 252 -#define PIDX_PKEY_PARAM_RSA_FACTOR4 253 -#define PIDX_PKEY_PARAM_RSA_FACTOR5 254 -#define PIDX_PKEY_PARAM_RSA_FACTOR6 255 -#define PIDX_PKEY_PARAM_RSA_FACTOR7 256 -#define PIDX_PKEY_PARAM_RSA_FACTOR8 257 -#define PIDX_PKEY_PARAM_RSA_FACTOR9 258 +#define PIDX_PKEY_PARAM_RSA_E 247 +#define PIDX_PKEY_PARAM_RSA_EXPONENT 248 +#define PIDX_PKEY_PARAM_RSA_EXPONENT1 249 +#define PIDX_PKEY_PARAM_RSA_EXPONENT10 250 +#define PIDX_PKEY_PARAM_RSA_EXPONENT2 251 +#define PIDX_PKEY_PARAM_RSA_EXPONENT3 252 +#define PIDX_PKEY_PARAM_RSA_EXPONENT4 253 +#define PIDX_PKEY_PARAM_RSA_EXPONENT5 254 +#define PIDX_PKEY_PARAM_RSA_EXPONENT6 255 +#define PIDX_PKEY_PARAM_RSA_EXPONENT7 256 +#define PIDX_PKEY_PARAM_RSA_EXPONENT8 257 +#define PIDX_PKEY_PARAM_RSA_EXPONENT9 258 +#define PIDX_PKEY_PARAM_RSA_FACTOR 259 +#define PIDX_PKEY_PARAM_RSA_FACTOR1 260 +#define PIDX_PKEY_PARAM_RSA_FACTOR10 261 +#define PIDX_PKEY_PARAM_RSA_FACTOR2 262 +#define PIDX_PKEY_PARAM_RSA_FACTOR3 263 +#define PIDX_PKEY_PARAM_RSA_FACTOR4 264 +#define PIDX_PKEY_PARAM_RSA_FACTOR5 265 +#define PIDX_PKEY_PARAM_RSA_FACTOR6 266 +#define PIDX_PKEY_PARAM_RSA_FACTOR7 267 +#define PIDX_PKEY_PARAM_RSA_FACTOR8 268 +#define PIDX_PKEY_PARAM_RSA_FACTOR9 269 #define PIDX_PKEY_PARAM_RSA_MASKGENFUNC PIDX_PKEY_PARAM_MASKGENFUNC #define PIDX_PKEY_PARAM_RSA_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST -#define PIDX_PKEY_PARAM_RSA_N 136 -#define PIDX_PKEY_PARAM_RSA_PRIMES 259 -#define PIDX_PKEY_PARAM_RSA_PSS_SALTLEN 260 -#define PIDX_PKEY_PARAM_RSA_TEST_P1 261 -#define PIDX_PKEY_PARAM_RSA_TEST_P2 262 -#define PIDX_PKEY_PARAM_RSA_TEST_Q1 263 -#define PIDX_PKEY_PARAM_RSA_TEST_Q2 264 -#define PIDX_PKEY_PARAM_RSA_TEST_XP 265 -#define PIDX_PKEY_PARAM_RSA_TEST_XP1 266 -#define PIDX_PKEY_PARAM_RSA_TEST_XP2 267 -#define PIDX_PKEY_PARAM_RSA_TEST_XQ 268 -#define PIDX_PKEY_PARAM_RSA_TEST_XQ1 269 -#define PIDX_PKEY_PARAM_RSA_TEST_XQ2 270 -#define PIDX_PKEY_PARAM_SECURITY_BITS 271 +#define PIDX_PKEY_PARAM_RSA_N 137 +#define PIDX_PKEY_PARAM_RSA_PRIMES 270 +#define PIDX_PKEY_PARAM_RSA_PSS_SALTLEN 271 +#define PIDX_PKEY_PARAM_RSA_TEST_P1 272 +#define PIDX_PKEY_PARAM_RSA_TEST_P2 273 +#define PIDX_PKEY_PARAM_RSA_TEST_Q1 274 +#define PIDX_PKEY_PARAM_RSA_TEST_Q2 275 +#define PIDX_PKEY_PARAM_RSA_TEST_XP 276 +#define PIDX_PKEY_PARAM_RSA_TEST_XP1 277 +#define PIDX_PKEY_PARAM_RSA_TEST_XP2 278 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ 279 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ1 280 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ2 281 +#define PIDX_PKEY_PARAM_SECURITY_BITS 282 +#define PIDX_PKEY_PARAM_SLH_DSA_SEED 140 #define PIDX_PKEY_PARAM_USE_COFACTOR_ECDH PIDX_PKEY_PARAM_USE_COFACTOR_FLAG -#define PIDX_PKEY_PARAM_USE_COFACTOR_FLAG 272 -#define PIDX_PROV_PARAM_BUILDINFO 273 -#define PIDX_PROV_PARAM_CORE_MODULE_FILENAME 274 -#define PIDX_PROV_PARAM_CORE_PROV_NAME 275 -#define PIDX_PROV_PARAM_CORE_VERSION 276 -#define PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST 277 -#define PIDX_PROV_PARAM_DSA_SIGN_DISABLED 278 -#define PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK 279 -#define PIDX_PROV_PARAM_HKDF_DIGEST_CHECK 280 -#define PIDX_PROV_PARAM_HKDF_KEY_CHECK 281 -#define PIDX_PROV_PARAM_HMAC_KEY_CHECK 282 -#define PIDX_PROV_PARAM_KBKDF_KEY_CHECK 283 -#define PIDX_PROV_PARAM_KMAC_KEY_CHECK 284 -#define PIDX_PROV_PARAM_NAME 285 -#define PIDX_PROV_PARAM_NO_SHORT_MAC 286 -#define PIDX_PROV_PARAM_PBKDF2_LOWER_BOUND_CHECK 287 -#define PIDX_PROV_PARAM_RSA_PKCS15_PAD_DISABLED 288 -#define PIDX_PROV_PARAM_RSA_PSS_SALTLEN_CHECK 289 -#define PIDX_PROV_PARAM_RSA_SIGN_X931_PAD_DISABLED 290 -#define PIDX_PROV_PARAM_SECURITY_CHECKS 291 -#define PIDX_PROV_PARAM_SELF_TEST_DESC 292 -#define PIDX_PROV_PARAM_SELF_TEST_PHASE 293 -#define PIDX_PROV_PARAM_SELF_TEST_TYPE 294 -#define PIDX_PROV_PARAM_SIGNATURE_DIGEST_CHECK 295 -#define PIDX_PROV_PARAM_SSHKDF_DIGEST_CHECK 296 -#define PIDX_PROV_PARAM_SSHKDF_KEY_CHECK 297 -#define PIDX_PROV_PARAM_SSKDF_DIGEST_CHECK 298 -#define PIDX_PROV_PARAM_SSKDF_KEY_CHECK 299 -#define PIDX_PROV_PARAM_STATUS 300 -#define PIDX_PROV_PARAM_TDES_ENCRYPT_DISABLED 301 -#define PIDX_PROV_PARAM_TLS13_KDF_DIGEST_CHECK 302 -#define PIDX_PROV_PARAM_TLS13_KDF_KEY_CHECK 303 -#define PIDX_PROV_PARAM_TLS1_PRF_DIGEST_CHECK 304 -#define PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK 305 -#define PIDX_PROV_PARAM_TLS1_PRF_KEY_CHECK 306 -#define PIDX_PROV_PARAM_VERSION 114 -#define PIDX_PROV_PARAM_X942KDF_KEY_CHECK 307 -#define PIDX_PROV_PARAM_X963KDF_DIGEST_CHECK 308 -#define PIDX_PROV_PARAM_X963KDF_KEY_CHECK 309 +#define PIDX_PKEY_PARAM_USE_COFACTOR_FLAG 283 +#define PIDX_PROV_PARAM_BUILDINFO 284 +#define PIDX_PROV_PARAM_CORE_MODULE_FILENAME 285 +#define PIDX_PROV_PARAM_CORE_PROV_NAME 286 +#define PIDX_PROV_PARAM_CORE_VERSION 287 +#define PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST 288 +#define PIDX_PROV_PARAM_DSA_SIGN_DISABLED 289 +#define PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK 290 +#define PIDX_PROV_PARAM_HKDF_DIGEST_CHECK 291 +#define PIDX_PROV_PARAM_HKDF_KEY_CHECK 292 +#define PIDX_PROV_PARAM_HMAC_KEY_CHECK 293 +#define PIDX_PROV_PARAM_KBKDF_KEY_CHECK 294 +#define PIDX_PROV_PARAM_KMAC_KEY_CHECK 295 +#define PIDX_PROV_PARAM_NAME 296 +#define PIDX_PROV_PARAM_NO_SHORT_MAC 297 +#define PIDX_PROV_PARAM_PBKDF2_LOWER_BOUND_CHECK 298 +#define PIDX_PROV_PARAM_RSA_PKCS15_PAD_DISABLED 299 +#define PIDX_PROV_PARAM_RSA_PSS_SALTLEN_CHECK 300 +#define PIDX_PROV_PARAM_RSA_SIGN_X931_PAD_DISABLED 301 +#define PIDX_PROV_PARAM_SECURITY_CHECKS 302 +#define PIDX_PROV_PARAM_SELF_TEST_DESC 303 +#define PIDX_PROV_PARAM_SELF_TEST_PHASE 304 +#define PIDX_PROV_PARAM_SELF_TEST_TYPE 305 +#define PIDX_PROV_PARAM_SIGNATURE_DIGEST_CHECK 306 +#define PIDX_PROV_PARAM_SSHKDF_DIGEST_CHECK 307 +#define PIDX_PROV_PARAM_SSHKDF_KEY_CHECK 308 +#define PIDX_PROV_PARAM_SSKDF_DIGEST_CHECK 309 +#define PIDX_PROV_PARAM_SSKDF_KEY_CHECK 310 +#define PIDX_PROV_PARAM_STATUS 311 +#define PIDX_PROV_PARAM_TDES_ENCRYPT_DISABLED 312 +#define PIDX_PROV_PARAM_TLS13_KDF_DIGEST_CHECK 313 +#define PIDX_PROV_PARAM_TLS13_KDF_KEY_CHECK 314 +#define PIDX_PROV_PARAM_TLS1_PRF_DIGEST_CHECK 315 +#define PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK 316 +#define PIDX_PROV_PARAM_TLS1_PRF_KEY_CHECK 317 +#define PIDX_PROV_PARAM_VERSION 115 +#define PIDX_PROV_PARAM_X942KDF_KEY_CHECK 318 +#define PIDX_PROV_PARAM_X963KDF_DIGEST_CHECK 319 +#define PIDX_PROV_PARAM_X963KDF_KEY_CHECK 320 #define PIDX_RAND_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR -#define PIDX_RAND_PARAM_GENERATE 310 -#define PIDX_RAND_PARAM_MAX_REQUEST 311 -#define PIDX_RAND_PARAM_STATE 312 -#define PIDX_RAND_PARAM_STRENGTH 313 -#define PIDX_RAND_PARAM_TEST_ENTROPY 314 -#define PIDX_RAND_PARAM_TEST_NONCE 315 +#define PIDX_RAND_PARAM_GENERATE 321 +#define PIDX_RAND_PARAM_MAX_REQUEST 322 +#define PIDX_RAND_PARAM_STATE 323 +#define PIDX_RAND_PARAM_STRENGTH 324 +#define PIDX_RAND_PARAM_TEST_ENTROPY 325 +#define PIDX_RAND_PARAM_TEST_NONCE 326 +#define PIDX_SIGNATURE_PARAM_ADD_RANDOM 327 #define PIDX_SIGNATURE_PARAM_ALGORITHM_ID PIDX_PKEY_PARAM_ALGORITHM_ID #define PIDX_SIGNATURE_PARAM_ALGORITHM_ID_PARAMS PIDX_PKEY_PARAM_ALGORITHM_ID_PARAMS -#define PIDX_SIGNATURE_PARAM_CONTEXT_STRING 316 +#define PIDX_SIGNATURE_PARAM_CONTEXT_STRING 328 +#define PIDX_SIGNATURE_PARAM_DETERMINISTIC 329 #define PIDX_SIGNATURE_PARAM_DIGEST PIDX_PKEY_PARAM_DIGEST #define PIDX_SIGNATURE_PARAM_DIGEST_SIZE PIDX_PKEY_PARAM_DIGEST_SIZE #define PIDX_SIGNATURE_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR #define PIDX_SIGNATURE_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK #define PIDX_SIGNATURE_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK -#define PIDX_SIGNATURE_PARAM_FIPS_RSA_PSS_SALTLEN_CHECK 289 +#define PIDX_SIGNATURE_PARAM_FIPS_RSA_PSS_SALTLEN_CHECK 300 #define PIDX_SIGNATURE_PARAM_FIPS_SIGN_CHECK PIDX_PKEY_PARAM_FIPS_SIGN_CHECK -#define PIDX_SIGNATURE_PARAM_FIPS_SIGN_X931_PAD_CHECK 317 -#define PIDX_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE 318 -#define PIDX_SIGNATURE_PARAM_INSTANCE 319 -#define PIDX_SIGNATURE_PARAM_KAT 320 +#define PIDX_SIGNATURE_PARAM_FIPS_SIGN_X931_PAD_CHECK 330 +#define PIDX_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE 331 +#define PIDX_SIGNATURE_PARAM_INSTANCE 332 +#define PIDX_SIGNATURE_PARAM_KAT 333 +#define PIDX_SIGNATURE_PARAM_MESSAGE_ENCODING 334 #define PIDX_SIGNATURE_PARAM_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST #define PIDX_SIGNATURE_PARAM_MGF1_PROPERTIES PIDX_PKEY_PARAM_MGF1_PROPERTIES -#define PIDX_SIGNATURE_PARAM_NONCE_TYPE 321 +#define PIDX_SIGNATURE_PARAM_MU 335 +#define PIDX_SIGNATURE_PARAM_NONCE_TYPE 336 #define PIDX_SIGNATURE_PARAM_PAD_MODE PIDX_PKEY_PARAM_PAD_MODE #define PIDX_SIGNATURE_PARAM_PROPERTIES PIDX_PKEY_PARAM_PROPERTIES -#define PIDX_SIGNATURE_PARAM_PSS_SALTLEN 260 -#define PIDX_SIGNATURE_PARAM_SIGNATURE 322 -#define PIDX_STORE_PARAM_ALIAS 323 +#define PIDX_SIGNATURE_PARAM_PSS_SALTLEN 271 +#define PIDX_SIGNATURE_PARAM_SIGNATURE 337 +#define PIDX_SIGNATURE_PARAM_TEST_ENTROPY 338 +#define PIDX_SKEY_PARAM_KEY_LENGTH 339 +#define PIDX_SKEY_PARAM_RAW_BYTES 340 +#define PIDX_STORE_PARAM_ALIAS 341 #define PIDX_STORE_PARAM_DIGEST 3 -#define PIDX_STORE_PARAM_EXPECT 324 -#define PIDX_STORE_PARAM_FINGERPRINT 325 -#define PIDX_STORE_PARAM_INPUT_TYPE 326 -#define PIDX_STORE_PARAM_ISSUER 285 +#define PIDX_STORE_PARAM_EXPECT 342 +#define PIDX_STORE_PARAM_FINGERPRINT 343 +#define PIDX_STORE_PARAM_INPUT_TYPE 174 +#define PIDX_STORE_PARAM_ISSUER 296 #define PIDX_STORE_PARAM_PROPERTIES 7 -#define PIDX_STORE_PARAM_SERIAL 327 -#define PIDX_STORE_PARAM_SUBJECT 328 +#define PIDX_STORE_PARAM_SERIAL 344 +#define PIDX_STORE_PARAM_SUBJECT 345 diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h index 2425fa10cf..d6c943ac69 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/asn1.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by Makefile from include/openssl/asn1.h.in * - * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -53,14 +53,14 @@ extern "C" { # define V_ASN1_PRIMITIVE_TAG 0x1f # define V_ASN1_PRIMATIVE_TAG /*compat*/ V_ASN1_PRIMITIVE_TAG -# define V_ASN1_APP_CHOOSE -2/* let the recipient choose */ -# define V_ASN1_OTHER -3/* used in ASN1_TYPE */ -# define V_ASN1_ANY -4/* used in ASN1 template code */ +# define V_ASN1_APP_CHOOSE -2 /* let the recipient choose */ +# define V_ASN1_OTHER -3 /* used in ASN1_TYPE */ +# define V_ASN1_ANY -4 /* used in ASN1 template code */ # define V_ASN1_UNDEF -1 /* ASN.1 tag values */ # define V_ASN1_EOC 0 -# define V_ASN1_BOOLEAN 1 /**/ +# define V_ASN1_BOOLEAN 1 # define V_ASN1_INTEGER 2 # define V_ASN1_BIT_STRING 3 # define V_ASN1_OCTET_STRING 4 @@ -73,19 +73,19 @@ extern "C" { # define V_ASN1_UTF8STRING 12 # define V_ASN1_SEQUENCE 16 # define V_ASN1_SET 17 -# define V_ASN1_NUMERICSTRING 18 /**/ +# define V_ASN1_NUMERICSTRING 18 # define V_ASN1_PRINTABLESTRING 19 # define V_ASN1_T61STRING 20 -# define V_ASN1_TELETEXSTRING 20/* alias */ -# define V_ASN1_VIDEOTEXSTRING 21 /**/ +# define V_ASN1_TELETEXSTRING 20 /* alias */ +# define V_ASN1_VIDEOTEXSTRING 21 # define V_ASN1_IA5STRING 22 # define V_ASN1_UTCTIME 23 -# define V_ASN1_GENERALIZEDTIME 24 /**/ -# define V_ASN1_GRAPHICSTRING 25 /**/ -# define V_ASN1_ISO64STRING 26 /**/ -# define V_ASN1_VISIBLESTRING 26/* alias */ -# define V_ASN1_GENERALSTRING 27 /**/ -# define V_ASN1_UNIVERSALSTRING 28 /**/ +# define V_ASN1_GENERALIZEDTIME 24 +# define V_ASN1_GRAPHICSTRING 25 +# define V_ASN1_ISO64STRING 26 +# define V_ASN1_VISIBLESTRING 26 /* alias */ +# define V_ASN1_GENERALSTRING 27 +# define V_ASN1_UNIVERSALSTRING 28 # define V_ASN1_BMPSTRING 30 /* @@ -278,7 +278,7 @@ typedef struct ASN1_TLC_st ASN1_TLC; /* This is just an opaque pointer */ typedef struct ASN1_VALUE_st ASN1_VALUE; -/* Declare ASN1 functions: the implement macro in in asn1t.h */ +/* Declare ASN1 functions: the implement macro is in asn1t.h */ /* * The mysterious 'extern' that's passed to some macros is innocuous, @@ -371,6 +371,7 @@ typedef struct ASN1_VALUE_st ASN1_VALUE; typedef void *d2i_of_void(void **, const unsigned char **, long); typedef int i2d_of_void(const void *, unsigned char **); +typedef int OSSL_i2d_of_void_ctx(const void *, unsigned char **, void *vctx); /*- * The following macros and typedefs allow an ASN1_ITEM diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h index 89ed6c060d..8a1f9f039b 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/bio.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by Makefile from include/openssl/bio.h.in * - * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -193,6 +193,7 @@ extern "C" { # define BIO_CTRL_GET_RPOLL_DESCRIPTOR 91 # define BIO_CTRL_GET_WPOLL_DESCRIPTOR 92 # define BIO_CTRL_DGRAM_DETECT_PEER_ADDR 93 +# define BIO_CTRL_DGRAM_SET0_LOCAL_ADDR 94 # define BIO_DGRAM_CAP_NONE 0U # define BIO_DGRAM_CAP_HANDLES_SRC_ADDR (1U << 0) @@ -693,6 +694,8 @@ int BIO_ctrl_reset_read_request(BIO *b); (unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_MTU, 0, NULL) # define BIO_dgram_set_mtu(b, mtu) \ (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_MTU, (mtu), NULL) +# define BIO_dgram_set0_local_addr(b, addr) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET0_LOCAL_ADDR, 0, (addr)) /* ctrl macros for BIO_f_prefix */ # define BIO_set_prefix(b,p) BIO_ctrl((b), BIO_CTRL_SET_PREFIX, 0, (void *)(p)) @@ -965,9 +968,6 @@ ossl_bio__attr__((__format__(ossl_bio__printf__, 3, 0))); BIO_METHOD *BIO_meth_new(int type, const char *name); void BIO_meth_free(BIO_METHOD *biom); -int (*BIO_meth_get_write(const BIO_METHOD *biom)) (BIO *, const char *, int); -int (*BIO_meth_get_write_ex(const BIO_METHOD *biom)) (BIO *, const char *, size_t, - size_t *); int BIO_meth_set_write(BIO_METHOD *biom, int (*write) (BIO *, const char *, int)); int BIO_meth_set_write_ex(BIO_METHOD *biom, @@ -975,11 +975,6 @@ int BIO_meth_set_write_ex(BIO_METHOD *biom, int BIO_meth_set_sendmmsg(BIO_METHOD *biom, int (*f) (BIO *, BIO_MSG *, size_t, size_t, uint64_t, size_t *)); -int (*BIO_meth_get_sendmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *, - size_t, size_t, - uint64_t, size_t *); -int (*BIO_meth_get_read(const BIO_METHOD *biom)) (BIO *, char *, int); -int (*BIO_meth_get_read_ex(const BIO_METHOD *biom)) (BIO *, char *, size_t, size_t *); int BIO_meth_set_read(BIO_METHOD *biom, int (*read) (BIO *, char *, int)); int BIO_meth_set_read_ex(BIO_METHOD *biom, @@ -987,28 +982,40 @@ int BIO_meth_set_read_ex(BIO_METHOD *biom, int BIO_meth_set_recvmmsg(BIO_METHOD *biom, int (*f) (BIO *, BIO_MSG *, size_t, size_t, uint64_t, size_t *)); -int (*BIO_meth_get_recvmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *, - size_t, size_t, - uint64_t, size_t *); -int (*BIO_meth_get_puts(const BIO_METHOD *biom)) (BIO *, const char *); int BIO_meth_set_puts(BIO_METHOD *biom, int (*puts) (BIO *, const char *)); -int (*BIO_meth_get_gets(const BIO_METHOD *biom)) (BIO *, char *, int); int BIO_meth_set_gets(BIO_METHOD *biom, int (*ossl_gets) (BIO *, char *, int)); -long (*BIO_meth_get_ctrl(const BIO_METHOD *biom)) (BIO *, int, long, void *); int BIO_meth_set_ctrl(BIO_METHOD *biom, long (*ctrl) (BIO *, int, long, void *)); -int (*BIO_meth_get_create(const BIO_METHOD *bion)) (BIO *); int BIO_meth_set_create(BIO_METHOD *biom, int (*create) (BIO *)); -int (*BIO_meth_get_destroy(const BIO_METHOD *biom)) (BIO *); int BIO_meth_set_destroy(BIO_METHOD *biom, int (*destroy) (BIO *)); -long (*BIO_meth_get_callback_ctrl(const BIO_METHOD *biom)) - (BIO *, int, BIO_info_cb *); int BIO_meth_set_callback_ctrl(BIO_METHOD *biom, long (*callback_ctrl) (BIO *, int, BIO_info_cb *)); - +# ifndef OPENSSL_NO_DEPRECATED_3_5 +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_write(const BIO_METHOD *biom)) (BIO *, const char *, + int); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_write_ex(const BIO_METHOD *biom)) (BIO *, const char *, + size_t, size_t *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_sendmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *, + size_t, size_t, + uint64_t, size_t *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_read(const BIO_METHOD *biom)) (BIO *, char *, int); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_read_ex(const BIO_METHOD *biom)) (BIO *, char *, + size_t, size_t *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_recvmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *, + size_t, size_t, + uint64_t, size_t *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_puts(const BIO_METHOD *biom)) (BIO *, const char *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_gets(const BIO_METHOD *biom)) (BIO *, char *, int); +OSSL_DEPRECATEDIN_3_5 long (*BIO_meth_get_ctrl(const BIO_METHOD *biom)) (BIO *, int, + long, void *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_create(const BIO_METHOD *bion)) (BIO *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_destroy(const BIO_METHOD *biom)) (BIO *); +OSSL_DEPRECATEDIN_3_5 long (*BIO_meth_get_callback_ctrl(const BIO_METHOD *biom)) (BIO *, int, + BIO_info_cb *); +# endif # ifdef __cplusplus } # endif diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h index 0f21a51930..6713419cfc 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/cms.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by Makefile from include/openssl/cms.h.in * - * Copyright 2008-2022 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2008-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -155,6 +155,8 @@ DECLARE_ASN1_FUNCTIONS(CMS_ContentInfo) DECLARE_ASN1_FUNCTIONS(CMS_ReceiptRequest) DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo) +DECLARE_ASN1_DUP_FUNCTION(CMS_EnvelopedData) + CMS_ContentInfo *CMS_ContentInfo_new_ex(OSSL_LIB_CTX *libctx, const char *propq); # define CMS_SIGNERINFO_ISSUER_SERIAL 0 @@ -194,6 +196,7 @@ CMS_ContentInfo *CMS_ContentInfo_new_ex(OSSL_LIB_CTX *libctx, const char *propq) # define CMS_ASCIICRLF 0x80000 # define CMS_CADES 0x100000 # define CMS_USE_ORIGINATOR_KEYID 0x200000 +# define CMS_NO_SIGNING_TIME 0x400000 const ASN1_OBJECT *CMS_get0_type(const CMS_ContentInfo *cms); diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h index a292da4b5b..809941506e 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-ec.h @@ -166,6 +166,9 @@ extern "C" { # ifndef OPENSSL_NO_FILENAMES # define OPENSSL_NO_FILENAMES # endif +# ifndef OPENSSL_NO_FIPS_JITTER +# define OPENSSL_NO_FIPS_JITTER +# endif # ifndef OPENSSL_NO_FIPS_POST # define OPENSSL_NO_FIPS_POST # endif @@ -184,6 +187,9 @@ extern "C" { # ifndef OPENSSL_NO_H3DEMO # define OPENSSL_NO_H3DEMO # endif +# ifndef OPENSSL_NO_HQINTEROP +# define OPENSSL_NO_HQINTEROP +# endif # ifndef OPENSSL_NO_IDEA # define OPENSSL_NO_IDEA # endif @@ -295,6 +301,9 @@ extern "C" { # ifndef OPENSSL_NO_SSL3_METHOD # define OPENSSL_NO_SSL3_METHOD # endif +# ifndef OPENSSL_NO_SSLKEYLOG +# define OPENSSL_NO_SSLKEYLOG +# endif # ifndef OPENSSL_NO_STDIO # define OPENSSL_NO_STDIO # endif @@ -307,6 +316,9 @@ extern "C" { # ifndef OPENSSL_NO_THREAD_POOL # define OPENSSL_NO_THREAD_POOL # endif +# ifndef OPENSSL_NO_TLS_DEPRECATED_EC +# define OPENSSL_NO_TLS_DEPRECATED_EC +# endif # ifndef OPENSSL_NO_TLS1_3 # define OPENSSL_NO_TLS1_3 # endif diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h index 14bb6dca8b..7bf4eb30b4 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/configuration-noec.h @@ -178,6 +178,9 @@ extern "C" { # ifndef OPENSSL_NO_FILENAMES # define OPENSSL_NO_FILENAMES # endif +# ifndef OPENSSL_NO_FIPS_JITTER +# define OPENSSL_NO_FIPS_JITTER +# endif # ifndef OPENSSL_NO_FIPS_POST # define OPENSSL_NO_FIPS_POST # endif @@ -196,6 +199,9 @@ extern "C" { # ifndef OPENSSL_NO_H3DEMO # define OPENSSL_NO_H3DEMO # endif +# ifndef OPENSSL_NO_HQINTEROP +# define OPENSSL_NO_HQINTEROP +# endif # ifndef OPENSSL_NO_IDEA # define OPENSSL_NO_IDEA # endif @@ -307,6 +313,9 @@ extern "C" { # ifndef OPENSSL_NO_SSL3_METHOD # define OPENSSL_NO_SSL3_METHOD # endif +# ifndef OPENSSL_NO_SSLKEYLOG +# define OPENSSL_NO_SSLKEYLOG +# endif # ifndef OPENSSL_NO_STDIO # define OPENSSL_NO_STDIO # endif @@ -319,6 +328,9 @@ extern "C" { # ifndef OPENSSL_NO_THREAD_POOL # define OPENSSL_NO_THREAD_POOL # endif +# ifndef OPENSSL_NO_TLS_DEPRECATED_EC +# define OPENSSL_NO_TLS_DEPRECATED_EC +# endif # ifndef OPENSSL_NO_TLS1_3 # define OPENSSL_NO_TLS1_3 # endif diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h index 072a6b8a8c..3ed524600b 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/core_names.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by Makefile from include/openssl/core_names.h.in * - * Copyright 2019-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2019-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -104,10 +104,17 @@ extern "C" { # define OSSL_PKEY_EC_GROUP_CHECK_NAMED "named" # define OSSL_PKEY_EC_GROUP_CHECK_NAMED_NIST "named-nist" +/* PROV_SKEY well known key types */ +# define OSSL_SKEY_TYPE_GENERIC "GENERIC-SECRET" +# define OSSL_SKEY_TYPE_AES "AES" + /* OSSL_KEM_PARAM_OPERATION values */ #define OSSL_KEM_PARAM_OPERATION_RSASVE "RSASVE" #define OSSL_KEM_PARAM_OPERATION_DHKEM "DHKEM" +/* Provider configuration variables */ +#define OSSL_PKEY_RETAIN_SEED "pkey_retain_seed" + /* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */ # define OSSL_ALG_PARAM_ALGORITHM_ID "algorithm-id" # define OSSL_ALG_PARAM_ALGORITHM_ID_PARAMS "algorithm-id-params" @@ -148,7 +155,9 @@ extern "C" { # define OSSL_CAPABILITY_TLS_SIGALG_IANA_NAME "tls-sigalg-iana-name" # define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE "tls-sigalg-keytype" # define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE_OID "tls-sigalg-keytype-oid" +# define OSSL_CAPABILITY_TLS_SIGALG_MAX_DTLS "tls-max-dtls" # define OSSL_CAPABILITY_TLS_SIGALG_MAX_TLS "tls-max-tls" +# define OSSL_CAPABILITY_TLS_SIGALG_MIN_DTLS "tls-min-dtls" # define OSSL_CAPABILITY_TLS_SIGALG_MIN_TLS "tls-min-tls" # define OSSL_CAPABILITY_TLS_SIGALG_NAME "tls-sigalg-name" # define OSSL_CAPABILITY_TLS_SIGALG_OID "tls-sigalg-oid" @@ -183,6 +192,7 @@ extern "C" { # define OSSL_CIPHER_PARAM_MODE "mode" # define OSSL_CIPHER_PARAM_NUM "num" # define OSSL_CIPHER_PARAM_PADDING "padding" +# define OSSL_CIPHER_PARAM_PIPELINE_AEAD_TAG "pipeline-tag" # define OSSL_CIPHER_PARAM_RANDOM_KEY "randkey" # define OSSL_CIPHER_PARAM_RC2_KEYBITS "keybits" # define OSSL_CIPHER_PARAM_ROUNDS "rounds" @@ -338,6 +348,7 @@ extern "C" { # define OSSL_OBJECT_PARAM_DATA_STRUCTURE "data-structure" # define OSSL_OBJECT_PARAM_DATA_TYPE "data-type" # define OSSL_OBJECT_PARAM_DESC "desc" +# define OSSL_OBJECT_PARAM_INPUT_TYPE "input-type" # define OSSL_OBJECT_PARAM_REFERENCE "reference" # define OSSL_OBJECT_PARAM_TYPE "type" # define OSSL_PASSPHRASE_PARAM_INFO "info" @@ -402,6 +413,17 @@ extern "C" { # define OSSL_PKEY_PARAM_MAX_SIZE "max-size" # define OSSL_PKEY_PARAM_MGF1_DIGEST "mgf1-digest" # define OSSL_PKEY_PARAM_MGF1_PROPERTIES "mgf1-properties" +# define OSSL_PKEY_PARAM_ML_DSA_INPUT_FORMATS "ml-dsa.input_formats" +# define OSSL_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS "ml-dsa.output_formats" +# define OSSL_PKEY_PARAM_ML_DSA_PREFER_SEED "ml-dsa.prefer_seed" +# define OSSL_PKEY_PARAM_ML_DSA_RETAIN_SEED "ml-dsa.retain_seed" +# define OSSL_PKEY_PARAM_ML_DSA_SEED "seed" +# define OSSL_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE "ml-kem.import_pct_type" +# define OSSL_PKEY_PARAM_ML_KEM_INPUT_FORMATS "ml-kem.input_formats" +# define OSSL_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS "ml-kem.output_formats" +# define OSSL_PKEY_PARAM_ML_KEM_PREFER_SEED "ml-kem.prefer_seed" +# define OSSL_PKEY_PARAM_ML_KEM_RETAIN_SEED "ml-kem.retain_seed" +# define OSSL_PKEY_PARAM_ML_KEM_SEED "seed" # define OSSL_PKEY_PARAM_PAD_MODE "pad-mode" # define OSSL_PKEY_PARAM_PRIV_KEY "priv" # define OSSL_PKEY_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES @@ -460,6 +482,7 @@ extern "C" { # define OSSL_PKEY_PARAM_RSA_TEST_XQ1 "xq1" # define OSSL_PKEY_PARAM_RSA_TEST_XQ2 "xq2" # define OSSL_PKEY_PARAM_SECURITY_BITS "security-bits" +# define OSSL_PKEY_PARAM_SLH_DSA_SEED "seed" # define OSSL_PKEY_PARAM_USE_COFACTOR_ECDH OSSL_PKEY_PARAM_USE_COFACTOR_FLAG # define OSSL_PKEY_PARAM_USE_COFACTOR_FLAG "use-cofactor-flag" # define OSSL_PROV_PARAM_BUILDINFO "buildinfo" @@ -507,9 +530,11 @@ extern "C" { # define OSSL_RAND_PARAM_STRENGTH "strength" # define OSSL_RAND_PARAM_TEST_ENTROPY "test_entropy" # define OSSL_RAND_PARAM_TEST_NONCE "test_nonce" +# define OSSL_SIGNATURE_PARAM_ADD_RANDOM "additional-random" # define OSSL_SIGNATURE_PARAM_ALGORITHM_ID OSSL_PKEY_PARAM_ALGORITHM_ID # define OSSL_SIGNATURE_PARAM_ALGORITHM_ID_PARAMS OSSL_PKEY_PARAM_ALGORITHM_ID_PARAMS # define OSSL_SIGNATURE_PARAM_CONTEXT_STRING "context-string" +# define OSSL_SIGNATURE_PARAM_DETERMINISTIC "deterministic" # define OSSL_SIGNATURE_PARAM_DIGEST OSSL_PKEY_PARAM_DIGEST # define OSSL_SIGNATURE_PARAM_DIGEST_SIZE OSSL_PKEY_PARAM_DIGEST_SIZE # define OSSL_SIGNATURE_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR @@ -521,13 +546,18 @@ extern "C" { # define OSSL_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE "verify-message" # define OSSL_SIGNATURE_PARAM_INSTANCE "instance" # define OSSL_SIGNATURE_PARAM_KAT "kat" +# define OSSL_SIGNATURE_PARAM_MESSAGE_ENCODING "message-encoding" # define OSSL_SIGNATURE_PARAM_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST # define OSSL_SIGNATURE_PARAM_MGF1_PROPERTIES OSSL_PKEY_PARAM_MGF1_PROPERTIES +# define OSSL_SIGNATURE_PARAM_MU "mu" # define OSSL_SIGNATURE_PARAM_NONCE_TYPE "nonce-type" # define OSSL_SIGNATURE_PARAM_PAD_MODE OSSL_PKEY_PARAM_PAD_MODE # define OSSL_SIGNATURE_PARAM_PROPERTIES OSSL_PKEY_PARAM_PROPERTIES # define OSSL_SIGNATURE_PARAM_PSS_SALTLEN "saltlen" # define OSSL_SIGNATURE_PARAM_SIGNATURE "signature" +# define OSSL_SIGNATURE_PARAM_TEST_ENTROPY "test-entropy" +# define OSSL_SKEY_PARAM_KEY_LENGTH "key-length" +# define OSSL_SKEY_PARAM_RAW_BYTES "raw-bytes" # define OSSL_STORE_PARAM_ALIAS "alias" # define OSSL_STORE_PARAM_DIGEST "digest" # define OSSL_STORE_PARAM_EXPECT "expect" diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h index 9900edfdde..551394d314 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crmf.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by Makefile from include/openssl/crmf.h.in * - * Copyright 2007-2024 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2025 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -26,6 +26,7 @@ # include # include # include /* for GENERAL_NAME etc. */ +# include /* explicit #includes not strictly needed since implied by the above: */ # include @@ -44,8 +45,11 @@ extern "C" { # define OSSL_CRMF_SUBSEQUENTMESSAGE_ENCRCERT 0 # define OSSL_CRMF_SUBSEQUENTMESSAGE_CHALLENGERESP 1 typedef struct ossl_crmf_encryptedvalue_st OSSL_CRMF_ENCRYPTEDVALUE; - DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_ENCRYPTEDVALUE) + +typedef struct ossl_crmf_encryptedkey_st OSSL_CRMF_ENCRYPTEDKEY; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_ENCRYPTEDKEY) + typedef struct ossl_crmf_msg_st OSSL_CRMF_MSG; DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_MSG) DECLARE_ASN1_DUP_FUNCTION(OSSL_CRMF_MSG) @@ -248,10 +252,24 @@ int OSSL_CRMF_CERTTEMPLATE_fill(OSSL_CRMF_CERTTEMPLATE *tmpl, const X509_NAME *subject, const X509_NAME *issuer, const ASN1_INTEGER *serial); -X509 -*OSSL_CRMF_ENCRYPTEDVALUE_get1_encCert(const OSSL_CRMF_ENCRYPTEDVALUE *ecert, - OSSL_LIB_CTX *libctx, const char *propq, - EVP_PKEY *pkey); +X509 *OSSL_CRMF_ENCRYPTEDVALUE_get1_encCert(const OSSL_CRMF_ENCRYPTEDVALUE *ecert, + OSSL_LIB_CTX *libctx, const char *propq, + EVP_PKEY *pkey); +X509 *OSSL_CRMF_ENCRYPTEDKEY_get1_encCert(const OSSL_CRMF_ENCRYPTEDKEY *ecert, + OSSL_LIB_CTX *libctx, const char *propq, + EVP_PKEY *pkey, unsigned int flags); +unsigned char +*OSSL_CRMF_ENCRYPTEDVALUE_decrypt(const OSSL_CRMF_ENCRYPTEDVALUE *enc, + OSSL_LIB_CTX *libctx, const char *propq, + EVP_PKEY *pkey, int *outlen); +EVP_PKEY *OSSL_CRMF_ENCRYPTEDKEY_get1_pkey(const OSSL_CRMF_ENCRYPTEDKEY *encryptedKey, + X509_STORE *ts, STACK_OF(X509) *extra, EVP_PKEY *pkey, + X509 *cert, ASN1_OCTET_STRING *secret, + OSSL_LIB_CTX *libctx, const char *propq); +int OSSL_CRMF_MSG_centralkeygen_requested(const OSSL_CRMF_MSG *crm, const X509_REQ *p10cr); +# ifndef OPENSSL_NO_CMS +OSSL_CRMF_ENCRYPTEDKEY *OSSL_CRMF_ENCRYPTEDKEY_init_envdata(CMS_EnvelopedData *envdata); +# endif # ifdef __cplusplus } diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h index ae7e30a26e..bba69ec2e1 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/crypto.h @@ -391,6 +391,9 @@ void OPENSSL_cleanse(void *ptr, size_t len); # define CRYPTO_MEM_CHECK_ENABLE 0x2 /* Control and mode bit */ # define CRYPTO_MEM_CHECK_DISABLE 0x3 /* Control only */ +/* max allowed length for value of OPENSSL_MALLOC_FAILURES env var. */ +# define CRYPTO_MEM_CHECK_MAX_FS 256 + void CRYPTO_get_alloc_counts(int *mcount, int *rcount, int *fcount); # ifndef OPENSSL_NO_DEPRECATED_3_0 # define OPENSSL_mem_debug_push(info) \ diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h index cf4bdbcea4..03ed187898 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/opensslv.h @@ -28,7 +28,7 @@ extern "C" { * These macros express version number MAJOR.MINOR.PATCH exactly */ # define OPENSSL_VERSION_MAJOR 3 -# define OPENSSL_VERSION_MINOR 4 +# define OPENSSL_VERSION_MINOR 5 # define OPENSSL_VERSION_PATCH 1 /* @@ -39,7 +39,7 @@ extern "C" { */ /* Could be: #define OPENSSL_VERSION_PRE_RELEASE "-alpha.1" */ -# define OPENSSL_VERSION_PRE_RELEASE "" +# define OPENSSL_VERSION_PRE_RELEASE "-dev" /* Could be: #define OPENSSL_VERSION_BUILD_METADATA "+fips" */ /* Could be: #define OPENSSL_VERSION_BUILD_METADATA "+vendor.1" */ # define OPENSSL_VERSION_BUILD_METADATA "" @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.4.1" -# define OPENSSL_FULL_VERSION_STR "3.4.1" +# define OPENSSL_VERSION_STR "3.5.1" +# define OPENSSL_FULL_VERSION_STR "3.5.1-dev" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "11 Feb 2025" +# define OPENSSL_RELEASE_DATE "" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.4.1 11 Feb 2025" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.1-dev " /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h index 9741f3a18a..4701ff69d5 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/ssl.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by Makefile from include/openssl/ssl.h.in * - * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * Copyright 2005 Nokia. All rights reserved. * @@ -409,7 +409,7 @@ typedef int (*SSL_async_callback_fn)(SSL *s, void *arg); */ # define SSL_OP_CIPHER_SERVER_PREFERENCE SSL_OP_BIT(22) /* - * If set, a server will allow a client to issue a SSLv3.0 version + * If set, a server will allow a client to issue an SSLv3.0 version * number as latest version supported in the premaster secret, even when * TLSv1.0 (version 3.1) was announced in the client hello. Normally * this is forbidden to prevent version rollback attacks. @@ -1383,6 +1383,9 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) # define SSL_CTRL_SET_RETRY_VERIFY 136 # define SSL_CTRL_GET_VERIFY_CERT_STORE 137 # define SSL_CTRL_GET_CHAIN_CERT_STORE 138 +# define SSL_CTRL_GET0_IMPLEMENTED_GROUPS 139 +# define SSL_CTRL_GET_SIGNATURE_NAME 140 +# define SSL_CTRL_GET_PEER_SIGNATURE_NAME 141 # define SSL_CERT_SET_FIRST 1 # define SSL_CERT_SET_NEXT 2 # define SSL_CERT_SET_SERVER 3 @@ -1491,6 +1494,9 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS,glistlen,(int *)(glist)) # define SSL_CTX_set1_groups_list(ctx, s) \ SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS_LIST,0,(char *)(s)) +# define SSL_CTX_get0_implemented_groups(ctx, all, out) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET0_IMPLEMENTED_GROUPS, all, \ + (STACK_OF(OPENSSL_CSTRING) *)(out)) # define SSL_set1_groups(s, glist, glistlen) \ SSL_ctrl(s,SSL_CTRL_SET_GROUPS,glistlen,(char *)(glist)) # define SSL_set1_groups_list(s, str) \ @@ -1522,8 +1528,12 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) (char *)(clist)) # define SSL_set1_client_certificate_types(s, clist, clistlen) \ SSL_ctrl(s,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen,(char *)(clist)) +# define SSL_get0_signature_name(s, str) \ + SSL_ctrl(s,SSL_CTRL_GET_SIGNATURE_NAME,0,(1?(str):(const char **)NULL)) # define SSL_get_signature_nid(s, pn) \ SSL_ctrl(s,SSL_CTRL_GET_SIGNATURE_NID,0,pn) +# define SSL_get0_peer_signature_name(s, str) \ + SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NAME,0,(1?(str):(const char **)NULL)) # define SSL_get_peer_signature_nid(s, pn) \ SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NID,0,pn) # define SSL_get_peer_tmp_key(s, pk) \ @@ -1944,6 +1954,11 @@ OSSL_DEPRECATEDIN_3_0 __owur char *SSL_get_srp_userinfo(SSL *s); typedef int (*SSL_client_hello_cb_fn) (SSL *s, int *al, void *arg); void SSL_CTX_set_client_hello_cb(SSL_CTX *c, SSL_client_hello_cb_fn cb, void *arg); +typedef int (*SSL_new_pending_conn_cb_fn) (SSL_CTX *ctx, SSL *new_ssl, + void *arg); +void SSL_CTX_set_new_pending_conn_cb(SSL_CTX *c, SSL_new_pending_conn_cb_fn cb, + void *arg); + int SSL_client_hello_isv2(SSL *s); unsigned int SSL_client_hello_get0_legacy_version(SSL *s); size_t SSL_client_hello_get0_random(SSL *s, const unsigned char **out); @@ -2339,6 +2354,31 @@ __owur int SSL_set1_initial_peer_addr(SSL *s, const BIO_ADDR *peer_addr); __owur SSL *SSL_get0_connection(SSL *s); __owur int SSL_is_connection(SSL *s); +__owur int SSL_is_listener(SSL *ssl); +__owur SSL *SSL_get0_listener(SSL *s); +#define SSL_LISTENER_FLAG_NO_VALIDATE (1UL << 1) +__owur SSL *SSL_new_listener(SSL_CTX *ctx, uint64_t flags); +__owur SSL *SSL_new_listener_from(SSL *ssl, uint64_t flags); +__owur SSL *SSL_new_from_listener(SSL *ssl, uint64_t flags); +#define SSL_ACCEPT_CONNECTION_NO_BLOCK (1UL << 0) +__owur SSL *SSL_accept_connection(SSL *ssl, uint64_t flags); +__owur size_t SSL_get_accept_connection_queue_len(SSL *ssl); +__owur int SSL_listen(SSL *ssl); + +__owur int SSL_is_domain(SSL *s); +__owur SSL *SSL_get0_domain(SSL *s); +__owur SSL *SSL_new_domain(SSL_CTX *ctx, uint64_t flags); + +#define SSL_DOMAIN_FLAG_SINGLE_THREAD (1U << 0) +#define SSL_DOMAIN_FLAG_MULTI_THREAD (1U << 1) +#define SSL_DOMAIN_FLAG_THREAD_ASSISTED (1U << 2) +#define SSL_DOMAIN_FLAG_BLOCKING (1U << 3) +#define SSL_DOMAIN_FLAG_LEGACY_BLOCKING (1U << 4) + +__owur int SSL_CTX_set_domain_flags(SSL_CTX *ctx, uint64_t domain_flags); +__owur int SSL_CTX_get_domain_flags(const SSL_CTX *ctx, uint64_t *domain_flags); +__owur int SSL_get_domain_flags(const SSL *ssl, uint64_t *domain_flags); + #define SSL_STREAM_TYPE_NONE 0 #define SSL_STREAM_TYPE_READ (1U << 0) #define SSL_STREAM_TYPE_WRITE (1U << 1) @@ -2872,6 +2912,21 @@ __owur int SSL_get0_server_cert_type(const SSL *s, unsigned char **t, size_t *le __owur int SSL_CTX_get0_client_cert_type(const SSL_CTX *ctx, unsigned char **t, size_t *len); __owur int SSL_CTX_get0_server_cert_type(const SSL_CTX *s, unsigned char **t, size_t *len); +/* + * Protection level. For <= TLSv1.2 only "NONE" and "APPLICATION" are used. + */ +# define OSSL_RECORD_PROTECTION_LEVEL_NONE 0 +# define OSSL_RECORD_PROTECTION_LEVEL_EARLY 1 +# define OSSL_RECORD_PROTECTION_LEVEL_HANDSHAKE 2 +# define OSSL_RECORD_PROTECTION_LEVEL_APPLICATION 3 + +int SSL_set_quic_tls_cbs(SSL *s, const OSSL_DISPATCH *qtdis, void *arg); +int SSL_set_quic_tls_transport_params(SSL *s, + const unsigned char *params, + size_t params_len); + +int SSL_set_quic_tls_early_data_enabled(SSL *s, int enabled); + # ifdef __cplusplus } # endif diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h index 86babde0d8..4eaac6f955 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_acert.h @@ -260,4 +260,35 @@ DECLARE_ASN1_FUNCTIONS(OSSL_TARGET) DECLARE_ASN1_FUNCTIONS(OSSL_TARGETS) DECLARE_ASN1_FUNCTIONS(OSSL_TARGETING_INFORMATION) +typedef STACK_OF(OSSL_ISSUER_SERIAL) OSSL_AUTHORITY_ATTRIBUTE_ID_SYNTAX; +DECLARE_ASN1_FUNCTIONS(OSSL_AUTHORITY_ATTRIBUTE_ID_SYNTAX) + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ISSUER_SERIAL, OSSL_ISSUER_SERIAL, OSSL_ISSUER_SERIAL) +#define sk_OSSL_ISSUER_SERIAL_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk)) +#define sk_OSSL_ISSUER_SERIAL_value(sk, idx) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_value(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk), (idx))) +#define sk_OSSL_ISSUER_SERIAL_new(cmp) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new(ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp))) +#define sk_OSSL_ISSUER_SERIAL_new_null() ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new_null()) +#define sk_OSSL_ISSUER_SERIAL_new_reserve(cmp, n) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp), (n))) +#define sk_OSSL_ISSUER_SERIAL_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (n)) +#define sk_OSSL_ISSUER_SERIAL_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk)) +#define sk_OSSL_ISSUER_SERIAL_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk)) +#define sk_OSSL_ISSUER_SERIAL_delete(sk, i) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_delete(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (i))) +#define sk_OSSL_ISSUER_SERIAL_delete_ptr(sk, ptr) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))) +#define sk_OSSL_ISSUER_SERIAL_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)) +#define sk_OSSL_ISSUER_SERIAL_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)) +#define sk_OSSL_ISSUER_SERIAL_pop(sk) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_pop(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk))) +#define sk_OSSL_ISSUER_SERIAL_shift(sk) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_shift(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk))) +#define sk_OSSL_ISSUER_SERIAL_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk),ossl_check_OSSL_ISSUER_SERIAL_freefunc_type(freefunc)) +#define sk_OSSL_ISSUER_SERIAL_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr), (idx)) +#define sk_OSSL_ISSUER_SERIAL_set(sk, idx, ptr) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_set(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (idx), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))) +#define sk_OSSL_ISSUER_SERIAL_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)) +#define sk_OSSL_ISSUER_SERIAL_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)) +#define sk_OSSL_ISSUER_SERIAL_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr), pnum) +#define sk_OSSL_ISSUER_SERIAL_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk)) +#define sk_OSSL_ISSUER_SERIAL_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk)) +#define sk_OSSL_ISSUER_SERIAL_dup(sk) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk))) +#define sk_OSSL_ISSUER_SERIAL_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_copyfunc_type(copyfunc), ossl_check_OSSL_ISSUER_SERIAL_freefunc_type(freefunc))) +#define sk_OSSL_ISSUER_SERIAL_set_cmp_func(sk, cmp) ((sk_OSSL_ISSUER_SERIAL_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp))) + + #endif diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h index 68b20ee5f2..de63bf0184 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509_vfy.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by Makefile from include/openssl/x509_vfy.h.in * - * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -812,6 +812,7 @@ int X509_VERIFY_PARAM_clear_flags(X509_VERIFY_PARAM *param, unsigned long flags); unsigned long X509_VERIFY_PARAM_get_flags(const X509_VERIFY_PARAM *param); int X509_VERIFY_PARAM_set_purpose(X509_VERIFY_PARAM *param, int purpose); +int X509_VERIFY_PARAM_get_purpose(const X509_VERIFY_PARAM *param); int X509_VERIFY_PARAM_set_trust(X509_VERIFY_PARAM *param, int trust); void X509_VERIFY_PARAM_set_depth(X509_VERIFY_PARAM *param, int depth); void X509_VERIFY_PARAM_set_auth_level(X509_VERIFY_PARAM *param, int auth_level); diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h index 5fd66fbda3..718157ebfa 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/include/openssl/x509v3.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2024 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -741,7 +741,7 @@ SKM_DEFINE_STACK_OF_INTERNAL(X509_PURPOSE, X509_PURPOSE, X509_PURPOSE) #define sk_X509_PURPOSE_set_cmp_func(sk, cmp) ((sk_X509_PURPOSE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_compfunc_type(cmp))) - +# define X509_PURPOSE_DEFAULT_ANY 0 # define X509_PURPOSE_SSL_CLIENT 1 # define X509_PURPOSE_SSL_SERVER 2 # define X509_PURPOSE_NS_SSL_SERVER 3 @@ -990,7 +990,6 @@ int X509V3_extensions_print(BIO *out, const char *title, int X509_check_ca(X509 *x); int X509_check_purpose(X509 *x, int id, int ca); int X509_supported_extension(X509_EXTENSION *ex); -int X509_PURPOSE_set(int *p, int purpose); int X509_check_issued(X509 *issuer, X509 *subject); int X509_check_akid(const X509 *issuer, const AUTHORITY_KEYID *akid); void X509_set_proxy_flag(X509 *x); @@ -1006,22 +1005,26 @@ const GENERAL_NAMES *X509_get0_authority_issuer(X509 *x); const ASN1_INTEGER *X509_get0_authority_serial(X509 *x); int X509_PURPOSE_get_count(void); -X509_PURPOSE *X509_PURPOSE_get0(int idx); +int X509_PURPOSE_get_unused_id(OSSL_LIB_CTX *libctx); int X509_PURPOSE_get_by_sname(const char *sname); int X509_PURPOSE_get_by_id(int id); int X509_PURPOSE_add(int id, int trust, int flags, int (*ck) (const X509_PURPOSE *, const X509 *, int), const char *name, const char *sname, void *arg); +void X509_PURPOSE_cleanup(void); + +X509_PURPOSE *X509_PURPOSE_get0(int idx); +int X509_PURPOSE_get_id(const X509_PURPOSE *); char *X509_PURPOSE_get0_name(const X509_PURPOSE *xp); char *X509_PURPOSE_get0_sname(const X509_PURPOSE *xp); int X509_PURPOSE_get_trust(const X509_PURPOSE *xp); -void X509_PURPOSE_cleanup(void); -int X509_PURPOSE_get_id(const X509_PURPOSE *); +int X509_PURPOSE_set(int *p, int purpose); STACK_OF(OPENSSL_STRING) *X509_get1_email(X509 *x); STACK_OF(OPENSSL_STRING) *X509_REQ_get1_email(X509_REQ *x); void X509_email_free(STACK_OF(OPENSSL_STRING) *sk); STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(X509 *x); + /* Flags for X509_check_* functions */ /* @@ -1494,6 +1497,471 @@ SKM_DEFINE_STACK_OF_INTERNAL(USERNOTICE, USERNOTICE, USERNOTICE) #define sk_USERNOTICE_set_cmp_func(sk, cmp) ((sk_USERNOTICE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_compfunc_type(cmp))) +typedef struct OSSL_ROLE_SPEC_CERT_ID_st { + GENERAL_NAME *roleName; + GENERAL_NAME *roleCertIssuer; + ASN1_INTEGER *roleCertSerialNumber; + GENERAL_NAMES *roleCertLocator; +} OSSL_ROLE_SPEC_CERT_ID; + +DECLARE_ASN1_FUNCTIONS(OSSL_ROLE_SPEC_CERT_ID) + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ROLE_SPEC_CERT_ID, OSSL_ROLE_SPEC_CERT_ID, OSSL_ROLE_SPEC_CERT_ID) +#define sk_OSSL_ROLE_SPEC_CERT_ID_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_value(sk, idx) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_value(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (idx))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_new(cmp) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new(ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_new_null() ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new_null()) +#define sk_OSSL_ROLE_SPEC_CERT_ID_new_reserve(cmp, n) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp), (n))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (n)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_delete(sk, i) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_delete(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (i))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_delete_ptr(sk, ptr) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_pop(sk) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_pop(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_shift(sk) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_shift(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk),ossl_check_OSSL_ROLE_SPEC_CERT_ID_freefunc_type(freefunc)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr), (idx)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_set(sk, idx, ptr) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_set(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (idx), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr), pnum) +#define sk_OSSL_ROLE_SPEC_CERT_ID_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_dup(sk) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_copyfunc_type(copyfunc), ossl_check_OSSL_ROLE_SPEC_CERT_ID_freefunc_type(freefunc))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_set_cmp_func(sk, cmp) ((sk_OSSL_ROLE_SPEC_CERT_ID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp))) + + +typedef STACK_OF(OSSL_ROLE_SPEC_CERT_ID) OSSL_ROLE_SPEC_CERT_ID_SYNTAX; + +DECLARE_ASN1_FUNCTIONS(OSSL_ROLE_SPEC_CERT_ID_SYNTAX) +typedef struct OSSL_HASH_st { + X509_ALGOR *algorithmIdentifier; + ASN1_BIT_STRING *hashValue; +} OSSL_HASH; + +typedef struct OSSL_INFO_SYNTAX_POINTER_st { + GENERAL_NAMES *name; + OSSL_HASH *hash; +} OSSL_INFO_SYNTAX_POINTER; + +# define OSSL_INFO_SYNTAX_TYPE_CONTENT 0 +# define OSSL_INFO_SYNTAX_TYPE_POINTER 1 + +typedef struct OSSL_INFO_SYNTAX_st { + int type; + union { + ASN1_STRING *content; + OSSL_INFO_SYNTAX_POINTER *pointer; + } choice; +} OSSL_INFO_SYNTAX; + +typedef struct OSSL_PRIVILEGE_POLICY_ID_st { + ASN1_OBJECT *privilegePolicy; + OSSL_INFO_SYNTAX *privPolSyntax; +} OSSL_PRIVILEGE_POLICY_ID; + +typedef struct OSSL_ATTRIBUTE_DESCRIPTOR_st { + ASN1_OBJECT *identifier; + ASN1_STRING *attributeSyntax; + ASN1_UTF8STRING *name; + ASN1_UTF8STRING *description; + OSSL_PRIVILEGE_POLICY_ID *dominationRule; +} OSSL_ATTRIBUTE_DESCRIPTOR; + +DECLARE_ASN1_FUNCTIONS(OSSL_HASH) +DECLARE_ASN1_FUNCTIONS(OSSL_INFO_SYNTAX) +DECLARE_ASN1_FUNCTIONS(OSSL_INFO_SYNTAX_POINTER) +DECLARE_ASN1_FUNCTIONS(OSSL_PRIVILEGE_POLICY_ID) +DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_DESCRIPTOR) + +typedef struct OSSL_TIME_SPEC_ABSOLUTE_st { + ASN1_GENERALIZEDTIME *startTime; + ASN1_GENERALIZEDTIME *endTime; +} OSSL_TIME_SPEC_ABSOLUTE; + +typedef struct OSSL_DAY_TIME_st { + ASN1_INTEGER *hour; + ASN1_INTEGER *minute; + ASN1_INTEGER *second; +} OSSL_DAY_TIME; + +typedef struct OSSL_DAY_TIME_BAND_st { + OSSL_DAY_TIME *startDayTime; + OSSL_DAY_TIME *endDayTime; +} OSSL_DAY_TIME_BAND; + +# define OSSL_NAMED_DAY_TYPE_INT 0 +# define OSSL_NAMED_DAY_TYPE_BIT 1 +# define OSSL_NAMED_DAY_INT_SUN 1 +# define OSSL_NAMED_DAY_INT_MON 2 +# define OSSL_NAMED_DAY_INT_TUE 3 +# define OSSL_NAMED_DAY_INT_WED 4 +# define OSSL_NAMED_DAY_INT_THU 5 +# define OSSL_NAMED_DAY_INT_FRI 6 +# define OSSL_NAMED_DAY_INT_SAT 7 +# define OSSL_NAMED_DAY_BIT_SUN 0 +# define OSSL_NAMED_DAY_BIT_MON 1 +# define OSSL_NAMED_DAY_BIT_TUE 2 +# define OSSL_NAMED_DAY_BIT_WED 3 +# define OSSL_NAMED_DAY_BIT_THU 4 +# define OSSL_NAMED_DAY_BIT_FRI 5 +# define OSSL_NAMED_DAY_BIT_SAT 6 + +typedef struct OSSL_NAMED_DAY_st { + int type; + union { + ASN1_INTEGER *intNamedDays; + ASN1_BIT_STRING *bitNamedDays; + } choice; +} OSSL_NAMED_DAY; + +# define OSSL_TIME_SPEC_X_DAY_OF_FIRST 0 +# define OSSL_TIME_SPEC_X_DAY_OF_SECOND 1 +# define OSSL_TIME_SPEC_X_DAY_OF_THIRD 2 +# define OSSL_TIME_SPEC_X_DAY_OF_FOURTH 3 +# define OSSL_TIME_SPEC_X_DAY_OF_FIFTH 4 + +typedef struct OSSL_TIME_SPEC_X_DAY_OF_st { + int type; + union { + OSSL_NAMED_DAY *first; + OSSL_NAMED_DAY *second; + OSSL_NAMED_DAY *third; + OSSL_NAMED_DAY *fourth; + OSSL_NAMED_DAY *fifth; + } choice; +} OSSL_TIME_SPEC_X_DAY_OF; + +# define OSSL_TIME_SPEC_DAY_TYPE_INT 0 +# define OSSL_TIME_SPEC_DAY_TYPE_BIT 1 +# define OSSL_TIME_SPEC_DAY_TYPE_DAY_OF 2 +# define OSSL_TIME_SPEC_DAY_BIT_SUN 0 +# define OSSL_TIME_SPEC_DAY_BIT_MON 1 +# define OSSL_TIME_SPEC_DAY_BIT_TUE 2 +# define OSSL_TIME_SPEC_DAY_BIT_WED 3 +# define OSSL_TIME_SPEC_DAY_BIT_THU 4 +# define OSSL_TIME_SPEC_DAY_BIT_FRI 5 +# define OSSL_TIME_SPEC_DAY_BIT_SAT 6 +# define OSSL_TIME_SPEC_DAY_INT_SUN 1 +# define OSSL_TIME_SPEC_DAY_INT_MON 2 +# define OSSL_TIME_SPEC_DAY_INT_TUE 3 +# define OSSL_TIME_SPEC_DAY_INT_WED 4 +# define OSSL_TIME_SPEC_DAY_INT_THU 5 +# define OSSL_TIME_SPEC_DAY_INT_FRI 6 +# define OSSL_TIME_SPEC_DAY_INT_SAT 7 + +typedef struct OSSL_TIME_SPEC_DAY_st { + int type; + union { + STACK_OF(ASN1_INTEGER) *intDay; + ASN1_BIT_STRING *bitDay; + OSSL_TIME_SPEC_X_DAY_OF *dayOf; + } choice; +} OSSL_TIME_SPEC_DAY; + +# define OSSL_TIME_SPEC_WEEKS_TYPE_ALL 0 +# define OSSL_TIME_SPEC_WEEKS_TYPE_INT 1 +# define OSSL_TIME_SPEC_WEEKS_TYPE_BIT 2 +# define OSSL_TIME_SPEC_BIT_WEEKS_1 0 +# define OSSL_TIME_SPEC_BIT_WEEKS_2 1 +# define OSSL_TIME_SPEC_BIT_WEEKS_3 2 +# define OSSL_TIME_SPEC_BIT_WEEKS_4 3 +# define OSSL_TIME_SPEC_BIT_WEEKS_5 4 + +typedef struct OSSL_TIME_SPEC_WEEKS_st { + int type; + union { + ASN1_NULL *allWeeks; + STACK_OF(ASN1_INTEGER) *intWeek; + ASN1_BIT_STRING *bitWeek; + } choice; +} OSSL_TIME_SPEC_WEEKS; + +# define OSSL_TIME_SPEC_MONTH_TYPE_ALL 0 +# define OSSL_TIME_SPEC_MONTH_TYPE_INT 1 +# define OSSL_TIME_SPEC_MONTH_TYPE_BIT 2 +# define OSSL_TIME_SPEC_INT_MONTH_JAN 1 +# define OSSL_TIME_SPEC_INT_MONTH_FEB 2 +# define OSSL_TIME_SPEC_INT_MONTH_MAR 3 +# define OSSL_TIME_SPEC_INT_MONTH_APR 4 +# define OSSL_TIME_SPEC_INT_MONTH_MAY 5 +# define OSSL_TIME_SPEC_INT_MONTH_JUN 6 +# define OSSL_TIME_SPEC_INT_MONTH_JUL 7 +# define OSSL_TIME_SPEC_INT_MONTH_AUG 8 +# define OSSL_TIME_SPEC_INT_MONTH_SEP 9 +# define OSSL_TIME_SPEC_INT_MONTH_OCT 10 +# define OSSL_TIME_SPEC_INT_MONTH_NOV 11 +# define OSSL_TIME_SPEC_INT_MONTH_DEC 12 +# define OSSL_TIME_SPEC_BIT_MONTH_JAN 0 +# define OSSL_TIME_SPEC_BIT_MONTH_FEB 1 +# define OSSL_TIME_SPEC_BIT_MONTH_MAR 2 +# define OSSL_TIME_SPEC_BIT_MONTH_APR 3 +# define OSSL_TIME_SPEC_BIT_MONTH_MAY 4 +# define OSSL_TIME_SPEC_BIT_MONTH_JUN 5 +# define OSSL_TIME_SPEC_BIT_MONTH_JUL 6 +# define OSSL_TIME_SPEC_BIT_MONTH_AUG 7 +# define OSSL_TIME_SPEC_BIT_MONTH_SEP 8 +# define OSSL_TIME_SPEC_BIT_MONTH_OCT 9 +# define OSSL_TIME_SPEC_BIT_MONTH_NOV 10 +# define OSSL_TIME_SPEC_BIT_MONTH_DEC 11 + +typedef struct OSSL_TIME_SPEC_MONTH_st { + int type; + union { + ASN1_NULL *allMonths; + STACK_OF(ASN1_INTEGER) *intMonth; + ASN1_BIT_STRING *bitMonth; + } choice; +} OSSL_TIME_SPEC_MONTH; + +typedef struct OSSL_TIME_PERIOD_st { + STACK_OF(OSSL_DAY_TIME_BAND) *timesOfDay; + OSSL_TIME_SPEC_DAY *days; + OSSL_TIME_SPEC_WEEKS *weeks; + OSSL_TIME_SPEC_MONTH *months; + STACK_OF(ASN1_INTEGER) *years; +} OSSL_TIME_PERIOD; + +# define OSSL_TIME_SPEC_TIME_TYPE_ABSOLUTE 0 +# define OSSL_TIME_SPEC_TIME_TYPE_PERIODIC 1 + +typedef struct OSSL_TIME_SPEC_TIME_st { + int type; + union { + OSSL_TIME_SPEC_ABSOLUTE *absolute; + STACK_OF(OSSL_TIME_PERIOD) *periodic; + } choice; +} OSSL_TIME_SPEC_TIME; + +typedef struct OSSL_TIME_SPEC_st { + OSSL_TIME_SPEC_TIME *time; + ASN1_BOOLEAN notThisTime; + ASN1_INTEGER *timeZone; +} OSSL_TIME_SPEC; + +DECLARE_ASN1_FUNCTIONS(OSSL_DAY_TIME) +DECLARE_ASN1_FUNCTIONS(OSSL_DAY_TIME_BAND) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_DAY) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_WEEKS) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_MONTH) +DECLARE_ASN1_FUNCTIONS(OSSL_NAMED_DAY) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_X_DAY_OF) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_ABSOLUTE) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_TIME) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_PERIOD) + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_TIME_PERIOD, OSSL_TIME_PERIOD, OSSL_TIME_PERIOD) +#define sk_OSSL_TIME_PERIOD_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk)) +#define sk_OSSL_TIME_PERIOD_value(sk, idx) ((OSSL_TIME_PERIOD *)OPENSSL_sk_value(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk), (idx))) +#define sk_OSSL_TIME_PERIOD_new(cmp) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new(ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp))) +#define sk_OSSL_TIME_PERIOD_new_null() ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new_null()) +#define sk_OSSL_TIME_PERIOD_new_reserve(cmp, n) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp), (n))) +#define sk_OSSL_TIME_PERIOD_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (n)) +#define sk_OSSL_TIME_PERIOD_free(sk) OPENSSL_sk_free(ossl_check_OSSL_TIME_PERIOD_sk_type(sk)) +#define sk_OSSL_TIME_PERIOD_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_TIME_PERIOD_sk_type(sk)) +#define sk_OSSL_TIME_PERIOD_delete(sk, i) ((OSSL_TIME_PERIOD *)OPENSSL_sk_delete(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (i))) +#define sk_OSSL_TIME_PERIOD_delete_ptr(sk, ptr) ((OSSL_TIME_PERIOD *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr))) +#define sk_OSSL_TIME_PERIOD_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr)) +#define sk_OSSL_TIME_PERIOD_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr)) +#define sk_OSSL_TIME_PERIOD_pop(sk) ((OSSL_TIME_PERIOD *)OPENSSL_sk_pop(ossl_check_OSSL_TIME_PERIOD_sk_type(sk))) +#define sk_OSSL_TIME_PERIOD_shift(sk) ((OSSL_TIME_PERIOD *)OPENSSL_sk_shift(ossl_check_OSSL_TIME_PERIOD_sk_type(sk))) +#define sk_OSSL_TIME_PERIOD_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_TIME_PERIOD_sk_type(sk),ossl_check_OSSL_TIME_PERIOD_freefunc_type(freefunc)) +#define sk_OSSL_TIME_PERIOD_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr), (idx)) +#define sk_OSSL_TIME_PERIOD_set(sk, idx, ptr) ((OSSL_TIME_PERIOD *)OPENSSL_sk_set(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (idx), ossl_check_OSSL_TIME_PERIOD_type(ptr))) +#define sk_OSSL_TIME_PERIOD_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr)) +#define sk_OSSL_TIME_PERIOD_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr)) +#define sk_OSSL_TIME_PERIOD_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr), pnum) +#define sk_OSSL_TIME_PERIOD_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_TIME_PERIOD_sk_type(sk)) +#define sk_OSSL_TIME_PERIOD_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk)) +#define sk_OSSL_TIME_PERIOD_dup(sk) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_dup(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk))) +#define sk_OSSL_TIME_PERIOD_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_copyfunc_type(copyfunc), ossl_check_OSSL_TIME_PERIOD_freefunc_type(freefunc))) +#define sk_OSSL_TIME_PERIOD_set_cmp_func(sk, cmp) ((sk_OSSL_TIME_PERIOD_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp))) + + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_DAY_TIME_BAND, OSSL_DAY_TIME_BAND, OSSL_DAY_TIME_BAND) +#define sk_OSSL_DAY_TIME_BAND_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk)) +#define sk_OSSL_DAY_TIME_BAND_value(sk, idx) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_value(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk), (idx))) +#define sk_OSSL_DAY_TIME_BAND_new(cmp) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new(ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp))) +#define sk_OSSL_DAY_TIME_BAND_new_null() ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new_null()) +#define sk_OSSL_DAY_TIME_BAND_new_reserve(cmp, n) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp), (n))) +#define sk_OSSL_DAY_TIME_BAND_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (n)) +#define sk_OSSL_DAY_TIME_BAND_free(sk) OPENSSL_sk_free(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk)) +#define sk_OSSL_DAY_TIME_BAND_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk)) +#define sk_OSSL_DAY_TIME_BAND_delete(sk, i) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_delete(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (i))) +#define sk_OSSL_DAY_TIME_BAND_delete_ptr(sk, ptr) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))) +#define sk_OSSL_DAY_TIME_BAND_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)) +#define sk_OSSL_DAY_TIME_BAND_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)) +#define sk_OSSL_DAY_TIME_BAND_pop(sk) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_pop(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk))) +#define sk_OSSL_DAY_TIME_BAND_shift(sk) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_shift(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk))) +#define sk_OSSL_DAY_TIME_BAND_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk),ossl_check_OSSL_DAY_TIME_BAND_freefunc_type(freefunc)) +#define sk_OSSL_DAY_TIME_BAND_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr), (idx)) +#define sk_OSSL_DAY_TIME_BAND_set(sk, idx, ptr) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_set(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (idx), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))) +#define sk_OSSL_DAY_TIME_BAND_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)) +#define sk_OSSL_DAY_TIME_BAND_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)) +#define sk_OSSL_DAY_TIME_BAND_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr), pnum) +#define sk_OSSL_DAY_TIME_BAND_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk)) +#define sk_OSSL_DAY_TIME_BAND_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk)) +#define sk_OSSL_DAY_TIME_BAND_dup(sk) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_dup(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk))) +#define sk_OSSL_DAY_TIME_BAND_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_copyfunc_type(copyfunc), ossl_check_OSSL_DAY_TIME_BAND_freefunc_type(freefunc))) +#define sk_OSSL_DAY_TIME_BAND_set_cmp_func(sk, cmp) ((sk_OSSL_DAY_TIME_BAND_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp))) + + +/* Attribute Type and Value */ +typedef struct atav_st { + ASN1_OBJECT *type; + ASN1_TYPE *value; +} OSSL_ATAV; + +typedef struct ATTRIBUTE_TYPE_MAPPING_st { + ASN1_OBJECT *local; + ASN1_OBJECT *remote; +} OSSL_ATTRIBUTE_TYPE_MAPPING; + +typedef struct ATTRIBUTE_VALUE_MAPPING_st { + OSSL_ATAV *local; + OSSL_ATAV *remote; +} OSSL_ATTRIBUTE_VALUE_MAPPING; + +# define OSSL_ATTR_MAP_TYPE 0 +# define OSSL_ATTR_MAP_VALUE 1 + +typedef struct ATTRIBUTE_MAPPING_st { + int type; + union { + OSSL_ATTRIBUTE_TYPE_MAPPING *typeMappings; + OSSL_ATTRIBUTE_VALUE_MAPPING *typeValueMappings; + } choice; +} OSSL_ATTRIBUTE_MAPPING; + +typedef STACK_OF(OSSL_ATTRIBUTE_MAPPING) OSSL_ATTRIBUTE_MAPPINGS; +DECLARE_ASN1_FUNCTIONS(OSSL_ATAV) +DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_TYPE_MAPPING) +DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_VALUE_MAPPING) +DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_MAPPING) +DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_MAPPINGS) + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ATTRIBUTE_MAPPING, OSSL_ATTRIBUTE_MAPPING, OSSL_ATTRIBUTE_MAPPING) +#define sk_OSSL_ATTRIBUTE_MAPPING_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)) +#define sk_OSSL_ATTRIBUTE_MAPPING_value(sk, idx) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_value(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (idx))) +#define sk_OSSL_ATTRIBUTE_MAPPING_new(cmp) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new(ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp))) +#define sk_OSSL_ATTRIBUTE_MAPPING_new_null() ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new_null()) +#define sk_OSSL_ATTRIBUTE_MAPPING_new_reserve(cmp, n) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp), (n))) +#define sk_OSSL_ATTRIBUTE_MAPPING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (n)) +#define sk_OSSL_ATTRIBUTE_MAPPING_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)) +#define sk_OSSL_ATTRIBUTE_MAPPING_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)) +#define sk_OSSL_ATTRIBUTE_MAPPING_delete(sk, i) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_delete(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (i))) +#define sk_OSSL_ATTRIBUTE_MAPPING_delete_ptr(sk, ptr) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))) +#define sk_OSSL_ATTRIBUTE_MAPPING_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)) +#define sk_OSSL_ATTRIBUTE_MAPPING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)) +#define sk_OSSL_ATTRIBUTE_MAPPING_pop(sk) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_pop(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))) +#define sk_OSSL_ATTRIBUTE_MAPPING_shift(sk) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_shift(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))) +#define sk_OSSL_ATTRIBUTE_MAPPING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk),ossl_check_OSSL_ATTRIBUTE_MAPPING_freefunc_type(freefunc)) +#define sk_OSSL_ATTRIBUTE_MAPPING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr), (idx)) +#define sk_OSSL_ATTRIBUTE_MAPPING_set(sk, idx, ptr) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_set(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (idx), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))) +#define sk_OSSL_ATTRIBUTE_MAPPING_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)) +#define sk_OSSL_ATTRIBUTE_MAPPING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)) +#define sk_OSSL_ATTRIBUTE_MAPPING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr), pnum) +#define sk_OSSL_ATTRIBUTE_MAPPING_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)) +#define sk_OSSL_ATTRIBUTE_MAPPING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)) +#define sk_OSSL_ATTRIBUTE_MAPPING_dup(sk) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))) +#define sk_OSSL_ATTRIBUTE_MAPPING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_copyfunc_type(copyfunc), ossl_check_OSSL_ATTRIBUTE_MAPPING_freefunc_type(freefunc))) +#define sk_OSSL_ATTRIBUTE_MAPPING_set_cmp_func(sk, cmp) ((sk_OSSL_ATTRIBUTE_MAPPING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp))) + + +# define OSSL_AAA_ATTRIBUTE_TYPE 0 +# define OSSL_AAA_ATTRIBUTE_VALUES 1 + +typedef struct ALLOWED_ATTRIBUTES_CHOICE_st { + int type; + union { + ASN1_OBJECT *attributeType; + X509_ATTRIBUTE *attributeTypeandValues; + } choice; +} OSSL_ALLOWED_ATTRIBUTES_CHOICE; + +typedef struct ALLOWED_ATTRIBUTES_ITEM_st { + STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *attributes; + GENERAL_NAME *holderDomain; +} OSSL_ALLOWED_ATTRIBUTES_ITEM; + +typedef STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) OSSL_ALLOWED_ATTRIBUTES_SYNTAX; + +DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_CHOICE) +DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_ITEM) +DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_SYNTAX) + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ALLOWED_ATTRIBUTES_CHOICE, OSSL_ALLOWED_ATTRIBUTES_CHOICE, OSSL_ALLOWED_ATTRIBUTES_CHOICE) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_value(sk, idx) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_value(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (idx))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new(cmp) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new_null() ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new_null()) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new_reserve(cmp, n) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp), (n))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (n)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_delete(sk, i) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_delete(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (i))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_delete_ptr(sk, ptr) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_pop(sk) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_pop(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_shift(sk) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_shift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk),ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_freefunc_type(freefunc)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr), (idx)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_set(sk, idx, ptr) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_set(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (idx), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr), pnum) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_dup(sk) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_copyfunc_type(copyfunc), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_freefunc_type(freefunc))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_set_cmp_func(sk, cmp) ((sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp))) + + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ALLOWED_ATTRIBUTES_ITEM, OSSL_ALLOWED_ATTRIBUTES_ITEM, OSSL_ALLOWED_ATTRIBUTES_ITEM) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_value(sk, idx) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_value(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (idx))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new(cmp) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new_null() ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new_null()) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new_reserve(cmp, n) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp), (n))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (n)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_delete(sk, i) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_delete(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (i))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_delete_ptr(sk, ptr) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_pop(sk) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_pop(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_shift(sk) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_shift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk),ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_freefunc_type(freefunc)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr), (idx)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_set(sk, idx, ptr) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_set(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (idx), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr), pnum) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_dup(sk) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_copyfunc_type(copyfunc), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_freefunc_type(freefunc))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_set_cmp_func(sk, cmp) ((sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp))) + + +typedef struct AA_DIST_POINT_st { + DIST_POINT_NAME *distpoint; + ASN1_BIT_STRING *reasons; + int dp_reasons; + ASN1_BOOLEAN indirectCRL; + ASN1_BOOLEAN containsUserAttributeCerts; + ASN1_BOOLEAN containsAACerts; + ASN1_BOOLEAN containsSOAPublicKeyCerts; +} OSSL_AA_DIST_POINT; + +DECLARE_ASN1_FUNCTIONS(OSSL_AA_DIST_POINT) + # ifdef __cplusplus } # endif diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c new file mode 100644 index 0000000000..d4c6cfc61e --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_ml_dsa_gen.c @@ -0,0 +1,37 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/der/der_ml_dsa_gen.c.in + * + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "prov/der_ml_dsa.h" + +/* Well known OIDs precompiled */ + +/* + * id-ml-dsa-44 OBJECT IDENTIFIER ::= { sigAlgs 17 } + */ +const unsigned char ossl_der_oid_id_ml_dsa_44[DER_OID_SZ_id_ml_dsa_44] = { + DER_OID_V_id_ml_dsa_44 +}; + +/* + * id-ml-dsa-65 OBJECT IDENTIFIER ::= { sigAlgs 18 } + */ +const unsigned char ossl_der_oid_id_ml_dsa_65[DER_OID_SZ_id_ml_dsa_65] = { + DER_OID_V_id_ml_dsa_65 +}; + +/* + * id-ml-dsa-87 OBJECT IDENTIFIER ::= { sigAlgs 19 } + */ +const unsigned char ossl_der_oid_id_ml_dsa_87[DER_OID_SZ_id_ml_dsa_87] = { + DER_OID_V_id_ml_dsa_87 +}; + diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c new file mode 100644 index 0000000000..f9fb0bdc51 --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/der/der_slh_dsa_gen.c @@ -0,0 +1,100 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/der/der_slh_dsa_gen.c.in + * + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "prov/der_slh_dsa.h" + +/* Well known OIDs precompiled */ + +/* + * id-slh-dsa-sha2-128s OBJECT IDENTIFIER ::= { sigAlgs 20 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_128s[DER_OID_SZ_id_slh_dsa_sha2_128s] = { + DER_OID_V_id_slh_dsa_sha2_128s +}; + +/* + * id-slh-dsa-sha2-128f OBJECT IDENTIFIER ::= { sigAlgs 21 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_128f[DER_OID_SZ_id_slh_dsa_sha2_128f] = { + DER_OID_V_id_slh_dsa_sha2_128f +}; + +/* + * id-slh-dsa-sha2-192s OBJECT IDENTIFIER ::= { sigAlgs 22 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_192s[DER_OID_SZ_id_slh_dsa_sha2_192s] = { + DER_OID_V_id_slh_dsa_sha2_192s +}; + +/* + * id-slh-dsa-sha2-192f OBJECT IDENTIFIER ::= { sigAlgs 23 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_192f[DER_OID_SZ_id_slh_dsa_sha2_192f] = { + DER_OID_V_id_slh_dsa_sha2_192f +}; + +/* + * id-slh-dsa-sha2-256s OBJECT IDENTIFIER ::= { sigAlgs 24 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_256s[DER_OID_SZ_id_slh_dsa_sha2_256s] = { + DER_OID_V_id_slh_dsa_sha2_256s +}; + +/* + * id-slh-dsa-sha2-256f OBJECT IDENTIFIER ::= { sigAlgs 25 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_256f[DER_OID_SZ_id_slh_dsa_sha2_256f] = { + DER_OID_V_id_slh_dsa_sha2_256f +}; + +/* + * id-slh-dsa-shake-128s OBJECT IDENTIFIER ::= { sigAlgs 26 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_128s[DER_OID_SZ_id_slh_dsa_shake_128s] = { + DER_OID_V_id_slh_dsa_shake_128s +}; + +/* + * id-slh-dsa-shake-128f OBJECT IDENTIFIER ::= { sigAlgs 27 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_128f[DER_OID_SZ_id_slh_dsa_shake_128f] = { + DER_OID_V_id_slh_dsa_shake_128f +}; + +/* + * id-slh-dsa-shake-192s OBJECT IDENTIFIER ::= { sigAlgs 28 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_192s[DER_OID_SZ_id_slh_dsa_shake_192s] = { + DER_OID_V_id_slh_dsa_shake_192s +}; + +/* + * id-slh-dsa-shake-192f OBJECT IDENTIFIER ::= { sigAlgs 29 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_192f[DER_OID_SZ_id_slh_dsa_shake_192f] = { + DER_OID_V_id_slh_dsa_shake_192f +}; + +/* + * id-slh-dsa-shake-256s OBJECT IDENTIFIER ::= { sigAlgs 30 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_256s[DER_OID_SZ_id_slh_dsa_shake_256s] = { + DER_OID_V_id_slh_dsa_shake_256s +}; + +/* + * id-slh-dsa-shake-256f OBJECT IDENTIFIER ::= { sigAlgs 31 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_256f[DER_OID_SZ_id_slh_dsa_shake_256f] = { + DER_OID_V_id_slh_dsa_shake_256f +}; + diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h new file mode 100644 index 0000000000..636054f781 --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_ml_dsa.h @@ -0,0 +1,40 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_ml_dsa.h.in + * + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" +#include "crypto/ml_dsa.h" + +/* Well known OIDs precompiled */ + +/* + * id-ml-dsa-44 OBJECT IDENTIFIER ::= { sigAlgs 17 } + */ +#define DER_OID_V_id_ml_dsa_44 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x11 +#define DER_OID_SZ_id_ml_dsa_44 11 +extern const unsigned char ossl_der_oid_id_ml_dsa_44[DER_OID_SZ_id_ml_dsa_44]; + +/* + * id-ml-dsa-65 OBJECT IDENTIFIER ::= { sigAlgs 18 } + */ +#define DER_OID_V_id_ml_dsa_65 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x12 +#define DER_OID_SZ_id_ml_dsa_65 11 +extern const unsigned char ossl_der_oid_id_ml_dsa_65[DER_OID_SZ_id_ml_dsa_65]; + +/* + * id-ml-dsa-87 OBJECT IDENTIFIER ::= { sigAlgs 19 } + */ +#define DER_OID_V_id_ml_dsa_87 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x13 +#define DER_OID_SZ_id_ml_dsa_87 11 +extern const unsigned char ossl_der_oid_id_ml_dsa_87[DER_OID_SZ_id_ml_dsa_87]; + + +int ossl_DER_w_algorithmIdentifier_ML_DSA(WPACKET *pkt, int tag, ML_DSA_KEY *key); diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h new file mode 100644 index 0000000000..0da6cdd7b1 --- /dev/null +++ b/CryptoPkg/Library/OpensslLib/OpensslGen/providers/common/include/prov/der_slh_dsa.h @@ -0,0 +1,103 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_slh_dsa.h.in + * + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" +#include "crypto/slh_dsa.h" + +/* Well known OIDs precompiled */ + +/* + * id-slh-dsa-sha2-128s OBJECT IDENTIFIER ::= { sigAlgs 20 } + */ +#define DER_OID_V_id_slh_dsa_sha2_128s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x14 +#define DER_OID_SZ_id_slh_dsa_sha2_128s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_128s[DER_OID_SZ_id_slh_dsa_sha2_128s]; + +/* + * id-slh-dsa-sha2-128f OBJECT IDENTIFIER ::= { sigAlgs 21 } + */ +#define DER_OID_V_id_slh_dsa_sha2_128f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x15 +#define DER_OID_SZ_id_slh_dsa_sha2_128f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_128f[DER_OID_SZ_id_slh_dsa_sha2_128f]; + +/* + * id-slh-dsa-sha2-192s OBJECT IDENTIFIER ::= { sigAlgs 22 } + */ +#define DER_OID_V_id_slh_dsa_sha2_192s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x16 +#define DER_OID_SZ_id_slh_dsa_sha2_192s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_192s[DER_OID_SZ_id_slh_dsa_sha2_192s]; + +/* + * id-slh-dsa-sha2-192f OBJECT IDENTIFIER ::= { sigAlgs 23 } + */ +#define DER_OID_V_id_slh_dsa_sha2_192f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x17 +#define DER_OID_SZ_id_slh_dsa_sha2_192f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_192f[DER_OID_SZ_id_slh_dsa_sha2_192f]; + +/* + * id-slh-dsa-sha2-256s OBJECT IDENTIFIER ::= { sigAlgs 24 } + */ +#define DER_OID_V_id_slh_dsa_sha2_256s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x18 +#define DER_OID_SZ_id_slh_dsa_sha2_256s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_256s[DER_OID_SZ_id_slh_dsa_sha2_256s]; + +/* + * id-slh-dsa-sha2-256f OBJECT IDENTIFIER ::= { sigAlgs 25 } + */ +#define DER_OID_V_id_slh_dsa_sha2_256f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x19 +#define DER_OID_SZ_id_slh_dsa_sha2_256f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_256f[DER_OID_SZ_id_slh_dsa_sha2_256f]; + +/* + * id-slh-dsa-shake-128s OBJECT IDENTIFIER ::= { sigAlgs 26 } + */ +#define DER_OID_V_id_slh_dsa_shake_128s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1A +#define DER_OID_SZ_id_slh_dsa_shake_128s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_128s[DER_OID_SZ_id_slh_dsa_shake_128s]; + +/* + * id-slh-dsa-shake-128f OBJECT IDENTIFIER ::= { sigAlgs 27 } + */ +#define DER_OID_V_id_slh_dsa_shake_128f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1B +#define DER_OID_SZ_id_slh_dsa_shake_128f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_128f[DER_OID_SZ_id_slh_dsa_shake_128f]; + +/* + * id-slh-dsa-shake-192s OBJECT IDENTIFIER ::= { sigAlgs 28 } + */ +#define DER_OID_V_id_slh_dsa_shake_192s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1C +#define DER_OID_SZ_id_slh_dsa_shake_192s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_192s[DER_OID_SZ_id_slh_dsa_shake_192s]; + +/* + * id-slh-dsa-shake-192f OBJECT IDENTIFIER ::= { sigAlgs 29 } + */ +#define DER_OID_V_id_slh_dsa_shake_192f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1D +#define DER_OID_SZ_id_slh_dsa_shake_192f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_192f[DER_OID_SZ_id_slh_dsa_shake_192f]; + +/* + * id-slh-dsa-shake-256s OBJECT IDENTIFIER ::= { sigAlgs 30 } + */ +#define DER_OID_V_id_slh_dsa_shake_256s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1E +#define DER_OID_SZ_id_slh_dsa_shake_256s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_256s[DER_OID_SZ_id_slh_dsa_shake_256s]; + +/* + * id-slh-dsa-shake-256f OBJECT IDENTIFIER ::= { sigAlgs 31 } + */ +#define DER_OID_V_id_slh_dsa_shake_256f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1F +#define DER_OID_SZ_id_slh_dsa_shake_256f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_256f[DER_OID_SZ_id_slh_dsa_shake_256f]; + + +int ossl_DER_w_algorithmIdentifier_SLH_DSA(WPACKET *pkt, int tag, SLH_DSA_KEY *key); diff --git a/CryptoPkg/Library/OpensslLib/OpensslLib.inf b/CryptoPkg/Library/OpensslLib/OpensslLib.inf index 1aa22f9905..1e1b732b3e 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslLib.inf +++ b/CryptoPkg/Library/OpensslLib/OpensslLib.inf @@ -289,7 +289,9 @@ $(OPENSSL_PATH)/crypto/evp/pmeth_check.c $(OPENSSL_PATH)/crypto/evp/pmeth_gn.c $(OPENSSL_PATH)/crypto/evp/pmeth_lib.c + $(OPENSSL_PATH)/crypto/evp/s_lib.c $(OPENSSL_PATH)/crypto/evp/signature.c + $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c $(OPENSSL_PATH)/crypto/ffc/ffc_backend.c $(OPENSSL_PATH)/crypto/ffc/ffc_dh.c $(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c @@ -297,6 +299,7 @@ $(OPENSSL_PATH)/crypto/ffc/ffc_params.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c + $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c $(OPENSSL_PATH)/crypto/hashtable/hashtable.c $(OPENSSL_PATH)/crypto/hmac/hmac.c $(OPENSSL_PATH)/crypto/hpke/hpke.c @@ -353,6 +356,7 @@ $(OPENSSL_PATH)/crypto/self_test_core.c $(OPENSSL_PATH)/crypto/sleep.c $(OPENSSL_PATH)/crypto/sparse_array.c + $(OPENSSL_PATH)/crypto/ssl_err.c $(OPENSSL_PATH)/crypto/threads_lib.c $(OPENSSL_PATH)/crypto/threads_none.c $(OPENSSL_PATH)/crypto/threads_pthread.c @@ -363,6 +367,15 @@ $(OPENSSL_PATH)/crypto/md5/md5_dgst.c $(OPENSSL_PATH)/crypto/md5/md5_one.c $(OPENSSL_PATH)/crypto/md5/md5_sha1.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c + $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c $(OPENSSL_PATH)/crypto/modes/cbc128.c $(OPENSSL_PATH)/crypto/modes/ccm128.c $(OPENSSL_PATH)/crypto/modes/cfb128.c @@ -443,6 +456,16 @@ $(OPENSSL_PATH)/crypto/sha/sha256.c $(OPENSSL_PATH)/crypto/sha/sha3.c $(OPENSSL_PATH)/crypto/sha/sha512.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c $(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c $(OPENSSL_PATH)/crypto/sm3/sm3.c $(OPENSSL_PATH)/crypto/stack/stack.c @@ -467,13 +490,17 @@ $(OPENSSL_PATH)/crypto/x509/t_crl.c $(OPENSSL_PATH)/crypto/x509/t_req.c $(OPENSSL_PATH)/crypto/x509/t_x509.c + $(OPENSSL_PATH)/crypto/x509/v3_aaa.c $(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c $(OPENSSL_PATH)/crypto/x509/v3_addr.c $(OPENSSL_PATH)/crypto/x509/v3_admis.c $(OPENSSL_PATH)/crypto/x509/v3_akeya.c $(OPENSSL_PATH)/crypto/x509/v3_akid.c $(OPENSSL_PATH)/crypto/x509/v3_asid.c + $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c + $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c $(OPENSSL_PATH)/crypto/x509/v3_audit_id.c + $(OPENSSL_PATH)/crypto/x509/v3_authattid.c $(OPENSSL_PATH)/crypto/x509/v3_battcons.c $(OPENSSL_PATH)/crypto/x509/v3_bcons.c $(OPENSSL_PATH)/crypto/x509/v3_bitst.c @@ -501,12 +528,14 @@ $(OPENSSL_PATH)/crypto/x509/v3_pmaps.c $(OPENSSL_PATH)/crypto/x509/v3_prn.c $(OPENSSL_PATH)/crypto/x509/v3_purp.c + $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c $(OPENSSL_PATH)/crypto/x509/v3_san.c $(OPENSSL_PATH)/crypto/x509/v3_sda.c $(OPENSSL_PATH)/crypto/x509/v3_single_use.c $(OPENSSL_PATH)/crypto/x509/v3_skid.c $(OPENSSL_PATH)/crypto/x509/v3_soa_id.c $(OPENSSL_PATH)/crypto/x509/v3_sxnet.c + $(OPENSSL_PATH)/crypto/x509/v3_timespec.c $(OPENSSL_PATH)/crypto/x509/v3_tlsf.c $(OPENSSL_PATH)/crypto/x509/v3_usernotice.c $(OPENSSL_PATH)/crypto/x509/v3_utf8.c @@ -585,6 +614,9 @@ $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c $(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c $(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/kdf_exch.c $(OPENSSL_PATH)/providers/implementations/kdfs/argon2.c @@ -600,11 +632,15 @@ $(OPENSSL_PATH)/providers/implementations/kdfs/sskdf.c $(OPENSSL_PATH)/providers/implementations/kdfs/tls1_prf.c $(OPENSSL_PATH)/providers/implementations/kdfs/x942kdf.c + $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c $(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c $(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c $(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c @@ -620,10 +656,16 @@ $(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_unix.c $(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c $(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c $(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c $(OPENSSL_GEN_PATH)/crypto/params_idx.c + $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c $(OPENSSL_PATH)/providers/common/der/der_rsa_key.c + $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c $(OPENSSL_PATH)/providers/common/provider_ctx.c $(OPENSSL_PATH)/providers/common/provider_err.c $(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c @@ -636,7 +678,9 @@ $(OPENSSL_PATH)/providers/implementations/digests/digestcommon.c $(OPENSSL_PATH)/ssl/record/methods/tls_pad.c $(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c $(OPENSSL_PATH)/ssl/bio_ssl.c $(OPENSSL_PATH)/ssl/d1_lib.c @@ -652,7 +696,6 @@ $(OPENSSL_PATH)/ssl/ssl_cert_comp.c $(OPENSSL_PATH)/ssl/ssl_ciph.c $(OPENSSL_PATH)/ssl/ssl_conf.c - $(OPENSSL_PATH)/ssl/ssl_err.c $(OPENSSL_PATH)/ssl/ssl_err_legacy.c $(OPENSSL_PATH)/ssl/ssl_init.c $(OPENSSL_PATH)/ssl/ssl_lib.c @@ -669,6 +712,8 @@ $(OPENSSL_PATH)/ssl/tls13_enc.c $(OPENSSL_PATH)/ssl/tls_depr.c $(OPENSSL_PATH)/ssl/tls_srp.c + $(OPENSSL_PATH)/ssl/quic/quic_tls.c + $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c $(OPENSSL_PATH)/ssl/record/rec_layer_d1.c $(OPENSSL_PATH)/ssl/record/rec_layer_s3.c $(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf b/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf index 1138211ff4..94eebf8dfd 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf +++ b/CryptoPkg/Library/OpensslLib/OpensslLibAccel.inf @@ -306,7 +306,9 @@ $(OPENSSL_PATH)/crypto/evp/pmeth_check.c $(OPENSSL_PATH)/crypto/evp/pmeth_gn.c $(OPENSSL_PATH)/crypto/evp/pmeth_lib.c + $(OPENSSL_PATH)/crypto/evp/s_lib.c $(OPENSSL_PATH)/crypto/evp/signature.c + $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c $(OPENSSL_PATH)/crypto/ffc/ffc_backend.c $(OPENSSL_PATH)/crypto/ffc/ffc_dh.c $(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c @@ -314,6 +316,7 @@ $(OPENSSL_PATH)/crypto/ffc/ffc_params.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c + $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c $(OPENSSL_PATH)/crypto/hashtable/hashtable.c $(OPENSSL_PATH)/crypto/hmac/hmac.c $(OPENSSL_PATH)/crypto/hpke/hpke.c @@ -369,6 +372,7 @@ $(OPENSSL_PATH)/crypto/self_test_core.c $(OPENSSL_PATH)/crypto/sleep.c $(OPENSSL_PATH)/crypto/sparse_array.c + $(OPENSSL_PATH)/crypto/ssl_err.c $(OPENSSL_PATH)/crypto/threads_lib.c $(OPENSSL_PATH)/crypto/threads_none.c $(OPENSSL_PATH)/crypto/threads_pthread.c @@ -379,6 +383,15 @@ $(OPENSSL_PATH)/crypto/md5/md5_dgst.c $(OPENSSL_PATH)/crypto/md5/md5_one.c $(OPENSSL_PATH)/crypto/md5/md5_sha1.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c + $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c $(OPENSSL_PATH)/crypto/modes/cbc128.c $(OPENSSL_PATH)/crypto/modes/ccm128.c $(OPENSSL_PATH)/crypto/modes/cfb128.c @@ -459,6 +472,16 @@ $(OPENSSL_PATH)/crypto/sha/sha256.c $(OPENSSL_PATH)/crypto/sha/sha3.c $(OPENSSL_PATH)/crypto/sha/sha512.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c $(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c $(OPENSSL_PATH)/crypto/sm3/sm3.c $(OPENSSL_PATH)/crypto/stack/stack.c @@ -483,13 +506,17 @@ $(OPENSSL_PATH)/crypto/x509/t_crl.c $(OPENSSL_PATH)/crypto/x509/t_req.c $(OPENSSL_PATH)/crypto/x509/t_x509.c + $(OPENSSL_PATH)/crypto/x509/v3_aaa.c $(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c $(OPENSSL_PATH)/crypto/x509/v3_addr.c $(OPENSSL_PATH)/crypto/x509/v3_admis.c $(OPENSSL_PATH)/crypto/x509/v3_akeya.c $(OPENSSL_PATH)/crypto/x509/v3_akid.c $(OPENSSL_PATH)/crypto/x509/v3_asid.c + $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c + $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c $(OPENSSL_PATH)/crypto/x509/v3_audit_id.c + $(OPENSSL_PATH)/crypto/x509/v3_authattid.c $(OPENSSL_PATH)/crypto/x509/v3_battcons.c $(OPENSSL_PATH)/crypto/x509/v3_bcons.c $(OPENSSL_PATH)/crypto/x509/v3_bitst.c @@ -517,12 +544,14 @@ $(OPENSSL_PATH)/crypto/x509/v3_pmaps.c $(OPENSSL_PATH)/crypto/x509/v3_prn.c $(OPENSSL_PATH)/crypto/x509/v3_purp.c + $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c $(OPENSSL_PATH)/crypto/x509/v3_san.c $(OPENSSL_PATH)/crypto/x509/v3_sda.c $(OPENSSL_PATH)/crypto/x509/v3_single_use.c $(OPENSSL_PATH)/crypto/x509/v3_skid.c $(OPENSSL_PATH)/crypto/x509/v3_soa_id.c $(OPENSSL_PATH)/crypto/x509/v3_sxnet.c + $(OPENSSL_PATH)/crypto/x509/v3_timespec.c $(OPENSSL_PATH)/crypto/x509/v3_tlsf.c $(OPENSSL_PATH)/crypto/x509/v3_usernotice.c $(OPENSSL_PATH)/crypto/x509/v3_utf8.c @@ -601,6 +630,9 @@ $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c $(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c $(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/kdf_exch.c $(OPENSSL_PATH)/providers/implementations/kdfs/argon2.c @@ -616,11 +648,15 @@ $(OPENSSL_PATH)/providers/implementations/kdfs/sskdf.c $(OPENSSL_PATH)/providers/implementations/kdfs/tls1_prf.c $(OPENSSL_PATH)/providers/implementations/kdfs/x942kdf.c + $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c $(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c $(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c $(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c @@ -636,10 +672,16 @@ $(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_unix.c $(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c $(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c $(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c $(OPENSSL_GEN_PATH)/crypto/params_idx.c + $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c $(OPENSSL_PATH)/providers/common/der/der_rsa_key.c + $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c $(OPENSSL_PATH)/providers/common/provider_ctx.c $(OPENSSL_PATH)/providers/common/provider_err.c $(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c @@ -652,7 +694,9 @@ $(OPENSSL_PATH)/providers/implementations/digests/digestcommon.c $(OPENSSL_PATH)/ssl/record/methods/tls_pad.c $(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c $(OPENSSL_PATH)/ssl/bio_ssl.c $(OPENSSL_PATH)/ssl/d1_lib.c @@ -668,7 +712,6 @@ $(OPENSSL_PATH)/ssl/ssl_cert_comp.c $(OPENSSL_PATH)/ssl/ssl_ciph.c $(OPENSSL_PATH)/ssl/ssl_conf.c - $(OPENSSL_PATH)/ssl/ssl_err.c $(OPENSSL_PATH)/ssl/ssl_err_legacy.c $(OPENSSL_PATH)/ssl/ssl_init.c $(OPENSSL_PATH)/ssl/ssl_lib.c @@ -685,6 +728,8 @@ $(OPENSSL_PATH)/ssl/tls13_enc.c $(OPENSSL_PATH)/ssl/tls_depr.c $(OPENSSL_PATH)/ssl/tls_srp.c + $(OPENSSL_PATH)/ssl/quic/quic_tls.c + $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c $(OPENSSL_PATH)/ssl/record/rec_layer_d1.c $(OPENSSL_PATH)/ssl/record/rec_layer_s3.c $(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c @@ -982,7 +1027,9 @@ $(OPENSSL_PATH)/crypto/evp/pmeth_check.c $(OPENSSL_PATH)/crypto/evp/pmeth_gn.c $(OPENSSL_PATH)/crypto/evp/pmeth_lib.c + $(OPENSSL_PATH)/crypto/evp/s_lib.c $(OPENSSL_PATH)/crypto/evp/signature.c + $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c $(OPENSSL_PATH)/crypto/ffc/ffc_backend.c $(OPENSSL_PATH)/crypto/ffc/ffc_dh.c $(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c @@ -990,6 +1037,7 @@ $(OPENSSL_PATH)/crypto/ffc/ffc_params.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c + $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c $(OPENSSL_PATH)/crypto/hashtable/hashtable.c $(OPENSSL_PATH)/crypto/hmac/hmac.c $(OPENSSL_PATH)/crypto/hpke/hpke.c @@ -1045,6 +1093,7 @@ $(OPENSSL_PATH)/crypto/self_test_core.c $(OPENSSL_PATH)/crypto/sleep.c $(OPENSSL_PATH)/crypto/sparse_array.c + $(OPENSSL_PATH)/crypto/ssl_err.c $(OPENSSL_PATH)/crypto/threads_lib.c $(OPENSSL_PATH)/crypto/threads_none.c $(OPENSSL_PATH)/crypto/threads_pthread.c @@ -1055,6 +1104,15 @@ $(OPENSSL_PATH)/crypto/md5/md5_dgst.c $(OPENSSL_PATH)/crypto/md5/md5_one.c $(OPENSSL_PATH)/crypto/md5/md5_sha1.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c + $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c $(OPENSSL_PATH)/crypto/modes/cbc128.c $(OPENSSL_PATH)/crypto/modes/ccm128.c $(OPENSSL_PATH)/crypto/modes/cfb128.c @@ -1134,6 +1192,16 @@ $(OPENSSL_PATH)/crypto/sha/sha256.c $(OPENSSL_PATH)/crypto/sha/sha3.c $(OPENSSL_PATH)/crypto/sha/sha512.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c $(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c $(OPENSSL_PATH)/crypto/sm3/sm3.c $(OPENSSL_PATH)/crypto/stack/stack.c @@ -1158,13 +1226,17 @@ $(OPENSSL_PATH)/crypto/x509/t_crl.c $(OPENSSL_PATH)/crypto/x509/t_req.c $(OPENSSL_PATH)/crypto/x509/t_x509.c + $(OPENSSL_PATH)/crypto/x509/v3_aaa.c $(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c $(OPENSSL_PATH)/crypto/x509/v3_addr.c $(OPENSSL_PATH)/crypto/x509/v3_admis.c $(OPENSSL_PATH)/crypto/x509/v3_akeya.c $(OPENSSL_PATH)/crypto/x509/v3_akid.c $(OPENSSL_PATH)/crypto/x509/v3_asid.c + $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c + $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c $(OPENSSL_PATH)/crypto/x509/v3_audit_id.c + $(OPENSSL_PATH)/crypto/x509/v3_authattid.c $(OPENSSL_PATH)/crypto/x509/v3_battcons.c $(OPENSSL_PATH)/crypto/x509/v3_bcons.c $(OPENSSL_PATH)/crypto/x509/v3_bitst.c @@ -1192,12 +1264,14 @@ $(OPENSSL_PATH)/crypto/x509/v3_pmaps.c $(OPENSSL_PATH)/crypto/x509/v3_prn.c $(OPENSSL_PATH)/crypto/x509/v3_purp.c + $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c $(OPENSSL_PATH)/crypto/x509/v3_san.c $(OPENSSL_PATH)/crypto/x509/v3_sda.c $(OPENSSL_PATH)/crypto/x509/v3_single_use.c $(OPENSSL_PATH)/crypto/x509/v3_skid.c $(OPENSSL_PATH)/crypto/x509/v3_soa_id.c $(OPENSSL_PATH)/crypto/x509/v3_sxnet.c + $(OPENSSL_PATH)/crypto/x509/v3_timespec.c $(OPENSSL_PATH)/crypto/x509/v3_tlsf.c $(OPENSSL_PATH)/crypto/x509/v3_usernotice.c $(OPENSSL_PATH)/crypto/x509/v3_utf8.c @@ -1276,6 +1350,9 @@ $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c $(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c $(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/kdf_exch.c $(OPENSSL_PATH)/providers/implementations/kdfs/argon2.c @@ -1291,11 +1368,15 @@ $(OPENSSL_PATH)/providers/implementations/kdfs/sskdf.c $(OPENSSL_PATH)/providers/implementations/kdfs/tls1_prf.c $(OPENSSL_PATH)/providers/implementations/kdfs/x942kdf.c + $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c $(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c $(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c $(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c @@ -1311,10 +1392,16 @@ $(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_unix.c $(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c $(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c $(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c $(OPENSSL_GEN_PATH)/crypto/params_idx.c + $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c $(OPENSSL_PATH)/providers/common/der/der_rsa_key.c + $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c $(OPENSSL_PATH)/providers/common/provider_ctx.c $(OPENSSL_PATH)/providers/common/provider_err.c $(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c @@ -1327,7 +1414,9 @@ $(OPENSSL_PATH)/providers/implementations/digests/digestcommon.c $(OPENSSL_PATH)/ssl/record/methods/tls_pad.c $(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c $(OPENSSL_PATH)/ssl/bio_ssl.c $(OPENSSL_PATH)/ssl/d1_lib.c @@ -1343,7 +1432,6 @@ $(OPENSSL_PATH)/ssl/ssl_cert_comp.c $(OPENSSL_PATH)/ssl/ssl_ciph.c $(OPENSSL_PATH)/ssl/ssl_conf.c - $(OPENSSL_PATH)/ssl/ssl_err.c $(OPENSSL_PATH)/ssl/ssl_err_legacy.c $(OPENSSL_PATH)/ssl/ssl_init.c $(OPENSSL_PATH)/ssl/ssl_lib.c @@ -1360,6 +1448,8 @@ $(OPENSSL_PATH)/ssl/tls13_enc.c $(OPENSSL_PATH)/ssl/tls_depr.c $(OPENSSL_PATH)/ssl/tls_srp.c + $(OPENSSL_PATH)/ssl/quic/quic_tls.c + $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c $(OPENSSL_PATH)/ssl/record/rec_layer_d1.c $(OPENSSL_PATH)/ssl/record/rec_layer_s3.c $(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c @@ -1382,6 +1472,7 @@ $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha1-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm + $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/bsaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/vpaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/x86_64cpuid.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm @@ -1400,6 +1491,7 @@ $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha1-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha256-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm + $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-xts-avx512.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/bsaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/vpaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-GCC/crypto/x86_64cpuid.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm @@ -1676,7 +1768,9 @@ $(OPENSSL_PATH)/crypto/evp/pmeth_check.c $(OPENSSL_PATH)/crypto/evp/pmeth_gn.c $(OPENSSL_PATH)/crypto/evp/pmeth_lib.c + $(OPENSSL_PATH)/crypto/evp/s_lib.c $(OPENSSL_PATH)/crypto/evp/signature.c + $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c $(OPENSSL_PATH)/crypto/ffc/ffc_backend.c $(OPENSSL_PATH)/crypto/ffc/ffc_dh.c $(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c @@ -1684,6 +1778,7 @@ $(OPENSSL_PATH)/crypto/ffc/ffc_params.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c + $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c $(OPENSSL_PATH)/crypto/hashtable/hashtable.c $(OPENSSL_PATH)/crypto/hmac/hmac.c $(OPENSSL_PATH)/crypto/hpke/hpke.c @@ -1739,6 +1834,7 @@ $(OPENSSL_PATH)/crypto/self_test_core.c $(OPENSSL_PATH)/crypto/sleep.c $(OPENSSL_PATH)/crypto/sparse_array.c + $(OPENSSL_PATH)/crypto/ssl_err.c $(OPENSSL_PATH)/crypto/threads_lib.c $(OPENSSL_PATH)/crypto/threads_none.c $(OPENSSL_PATH)/crypto/threads_pthread.c @@ -1749,6 +1845,15 @@ $(OPENSSL_PATH)/crypto/md5/md5_dgst.c $(OPENSSL_PATH)/crypto/md5/md5_one.c $(OPENSSL_PATH)/crypto/md5/md5_sha1.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c + $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c $(OPENSSL_PATH)/crypto/modes/cbc128.c $(OPENSSL_PATH)/crypto/modes/ccm128.c $(OPENSSL_PATH)/crypto/modes/cfb128.c @@ -1828,6 +1933,16 @@ $(OPENSSL_PATH)/crypto/sha/sha256.c $(OPENSSL_PATH)/crypto/sha/sha3.c $(OPENSSL_PATH)/crypto/sha/sha512.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c $(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c $(OPENSSL_PATH)/crypto/sm3/sm3.c $(OPENSSL_PATH)/crypto/stack/stack.c @@ -1852,13 +1967,17 @@ $(OPENSSL_PATH)/crypto/x509/t_crl.c $(OPENSSL_PATH)/crypto/x509/t_req.c $(OPENSSL_PATH)/crypto/x509/t_x509.c + $(OPENSSL_PATH)/crypto/x509/v3_aaa.c $(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c $(OPENSSL_PATH)/crypto/x509/v3_addr.c $(OPENSSL_PATH)/crypto/x509/v3_admis.c $(OPENSSL_PATH)/crypto/x509/v3_akeya.c $(OPENSSL_PATH)/crypto/x509/v3_akid.c $(OPENSSL_PATH)/crypto/x509/v3_asid.c + $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c + $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c $(OPENSSL_PATH)/crypto/x509/v3_audit_id.c + $(OPENSSL_PATH)/crypto/x509/v3_authattid.c $(OPENSSL_PATH)/crypto/x509/v3_battcons.c $(OPENSSL_PATH)/crypto/x509/v3_bcons.c $(OPENSSL_PATH)/crypto/x509/v3_bitst.c @@ -1886,12 +2005,14 @@ $(OPENSSL_PATH)/crypto/x509/v3_pmaps.c $(OPENSSL_PATH)/crypto/x509/v3_prn.c $(OPENSSL_PATH)/crypto/x509/v3_purp.c + $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c $(OPENSSL_PATH)/crypto/x509/v3_san.c $(OPENSSL_PATH)/crypto/x509/v3_sda.c $(OPENSSL_PATH)/crypto/x509/v3_single_use.c $(OPENSSL_PATH)/crypto/x509/v3_skid.c $(OPENSSL_PATH)/crypto/x509/v3_soa_id.c $(OPENSSL_PATH)/crypto/x509/v3_sxnet.c + $(OPENSSL_PATH)/crypto/x509/v3_timespec.c $(OPENSSL_PATH)/crypto/x509/v3_tlsf.c $(OPENSSL_PATH)/crypto/x509/v3_usernotice.c $(OPENSSL_PATH)/crypto/x509/v3_utf8.c @@ -1970,6 +2091,9 @@ $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c $(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c $(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/kdf_exch.c $(OPENSSL_PATH)/providers/implementations/kdfs/argon2.c @@ -1985,11 +2109,15 @@ $(OPENSSL_PATH)/providers/implementations/kdfs/sskdf.c $(OPENSSL_PATH)/providers/implementations/kdfs/tls1_prf.c $(OPENSSL_PATH)/providers/implementations/kdfs/x942kdf.c + $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c $(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c $(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c $(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c @@ -2005,10 +2133,16 @@ $(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_unix.c $(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c $(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c $(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c $(OPENSSL_GEN_PATH)/crypto/params_idx.c + $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c $(OPENSSL_PATH)/providers/common/der/der_rsa_key.c + $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c $(OPENSSL_PATH)/providers/common/provider_ctx.c $(OPENSSL_PATH)/providers/common/provider_err.c $(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c @@ -2021,7 +2155,9 @@ $(OPENSSL_PATH)/providers/implementations/digests/digestcommon.c $(OPENSSL_PATH)/ssl/record/methods/tls_pad.c $(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c $(OPENSSL_PATH)/ssl/bio_ssl.c $(OPENSSL_PATH)/ssl/d1_lib.c @@ -2037,7 +2173,6 @@ $(OPENSSL_PATH)/ssl/ssl_cert_comp.c $(OPENSSL_PATH)/ssl/ssl_ciph.c $(OPENSSL_PATH)/ssl/ssl_conf.c - $(OPENSSL_PATH)/ssl/ssl_err.c $(OPENSSL_PATH)/ssl/ssl_err_legacy.c $(OPENSSL_PATH)/ssl/ssl_init.c $(OPENSSL_PATH)/ssl/ssl_lib.c @@ -2054,6 +2189,8 @@ $(OPENSSL_PATH)/ssl/tls13_enc.c $(OPENSSL_PATH)/ssl/tls_depr.c $(OPENSSL_PATH)/ssl/tls_srp.c + $(OPENSSL_PATH)/ssl/quic/quic_tls.c + $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c $(OPENSSL_PATH)/ssl/record/rec_layer_d1.c $(OPENSSL_PATH)/ssl/record/rec_layer_s3.c $(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf b/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf index a5dd36432d..122b359c02 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf +++ b/CryptoPkg/Library/OpensslLib/OpensslLibCrypto.inf @@ -290,7 +290,9 @@ $(OPENSSL_PATH)/crypto/evp/pmeth_check.c $(OPENSSL_PATH)/crypto/evp/pmeth_gn.c $(OPENSSL_PATH)/crypto/evp/pmeth_lib.c + $(OPENSSL_PATH)/crypto/evp/s_lib.c $(OPENSSL_PATH)/crypto/evp/signature.c + $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c $(OPENSSL_PATH)/crypto/ffc/ffc_backend.c $(OPENSSL_PATH)/crypto/ffc/ffc_dh.c $(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c @@ -298,6 +300,7 @@ $(OPENSSL_PATH)/crypto/ffc/ffc_params.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c + $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c $(OPENSSL_PATH)/crypto/hashtable/hashtable.c $(OPENSSL_PATH)/crypto/hmac/hmac.c $(OPENSSL_PATH)/crypto/hpke/hpke.c @@ -354,6 +357,7 @@ $(OPENSSL_PATH)/crypto/self_test_core.c $(OPENSSL_PATH)/crypto/sleep.c $(OPENSSL_PATH)/crypto/sparse_array.c + $(OPENSSL_PATH)/crypto/ssl_err.c $(OPENSSL_PATH)/crypto/threads_lib.c $(OPENSSL_PATH)/crypto/threads_none.c $(OPENSSL_PATH)/crypto/threads_pthread.c @@ -364,6 +368,15 @@ $(OPENSSL_PATH)/crypto/md5/md5_dgst.c $(OPENSSL_PATH)/crypto/md5/md5_one.c $(OPENSSL_PATH)/crypto/md5/md5_sha1.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c + $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c $(OPENSSL_PATH)/crypto/modes/cbc128.c $(OPENSSL_PATH)/crypto/modes/ccm128.c $(OPENSSL_PATH)/crypto/modes/cfb128.c @@ -444,6 +457,16 @@ $(OPENSSL_PATH)/crypto/sha/sha256.c $(OPENSSL_PATH)/crypto/sha/sha3.c $(OPENSSL_PATH)/crypto/sha/sha512.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c $(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c $(OPENSSL_PATH)/crypto/sm3/sm3.c $(OPENSSL_PATH)/crypto/stack/stack.c @@ -468,13 +491,17 @@ $(OPENSSL_PATH)/crypto/x509/t_crl.c $(OPENSSL_PATH)/crypto/x509/t_req.c $(OPENSSL_PATH)/crypto/x509/t_x509.c + $(OPENSSL_PATH)/crypto/x509/v3_aaa.c $(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c $(OPENSSL_PATH)/crypto/x509/v3_addr.c $(OPENSSL_PATH)/crypto/x509/v3_admis.c $(OPENSSL_PATH)/crypto/x509/v3_akeya.c $(OPENSSL_PATH)/crypto/x509/v3_akid.c $(OPENSSL_PATH)/crypto/x509/v3_asid.c + $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c + $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c $(OPENSSL_PATH)/crypto/x509/v3_audit_id.c + $(OPENSSL_PATH)/crypto/x509/v3_authattid.c $(OPENSSL_PATH)/crypto/x509/v3_battcons.c $(OPENSSL_PATH)/crypto/x509/v3_bcons.c $(OPENSSL_PATH)/crypto/x509/v3_bitst.c @@ -502,12 +529,14 @@ $(OPENSSL_PATH)/crypto/x509/v3_pmaps.c $(OPENSSL_PATH)/crypto/x509/v3_prn.c $(OPENSSL_PATH)/crypto/x509/v3_purp.c + $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c $(OPENSSL_PATH)/crypto/x509/v3_san.c $(OPENSSL_PATH)/crypto/x509/v3_sda.c $(OPENSSL_PATH)/crypto/x509/v3_single_use.c $(OPENSSL_PATH)/crypto/x509/v3_skid.c $(OPENSSL_PATH)/crypto/x509/v3_soa_id.c $(OPENSSL_PATH)/crypto/x509/v3_sxnet.c + $(OPENSSL_PATH)/crypto/x509/v3_timespec.c $(OPENSSL_PATH)/crypto/x509/v3_tlsf.c $(OPENSSL_PATH)/crypto/x509/v3_usernotice.c $(OPENSSL_PATH)/crypto/x509/v3_utf8.c @@ -586,6 +615,9 @@ $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c $(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c $(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/kdf_exch.c $(OPENSSL_PATH)/providers/implementations/kdfs/argon2.c @@ -601,11 +633,15 @@ $(OPENSSL_PATH)/providers/implementations/kdfs/sskdf.c $(OPENSSL_PATH)/providers/implementations/kdfs/tls1_prf.c $(OPENSSL_PATH)/providers/implementations/kdfs/x942kdf.c + $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c $(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c $(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c $(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c @@ -621,10 +657,16 @@ $(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_unix.c $(OPENSSL_PATH)/providers/implementations/rands/seeding/rand_win.c $(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c $(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c $(OPENSSL_GEN_PATH)/crypto/params_idx.c + $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c $(OPENSSL_PATH)/providers/common/der/der_rsa_key.c + $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c $(OPENSSL_PATH)/providers/common/provider_ctx.c $(OPENSSL_PATH)/providers/common/provider_err.c $(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c @@ -637,7 +679,9 @@ $(OPENSSL_PATH)/providers/implementations/digests/digestcommon.c $(OPENSSL_PATH)/ssl/record/methods/tls_pad.c $(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c # Autogenerated files list ends here buildinf.h diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf b/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf index 75f40b8abf..c1823fe406 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf +++ b/CryptoPkg/Library/OpensslLib/OpensslLibFull.inf @@ -332,7 +332,9 @@ $(OPENSSL_PATH)/crypto/evp/pmeth_check.c $(OPENSSL_PATH)/crypto/evp/pmeth_gn.c $(OPENSSL_PATH)/crypto/evp/pmeth_lib.c + $(OPENSSL_PATH)/crypto/evp/s_lib.c $(OPENSSL_PATH)/crypto/evp/signature.c + $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c $(OPENSSL_PATH)/crypto/ffc/ffc_backend.c $(OPENSSL_PATH)/crypto/ffc/ffc_dh.c $(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c @@ -340,6 +342,7 @@ $(OPENSSL_PATH)/crypto/ffc/ffc_params.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c + $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c $(OPENSSL_PATH)/crypto/hashtable/hashtable.c $(OPENSSL_PATH)/crypto/hmac/hmac.c $(OPENSSL_PATH)/crypto/hpke/hpke.c @@ -396,6 +399,7 @@ $(OPENSSL_PATH)/crypto/self_test_core.c $(OPENSSL_PATH)/crypto/sleep.c $(OPENSSL_PATH)/crypto/sparse_array.c + $(OPENSSL_PATH)/crypto/ssl_err.c $(OPENSSL_PATH)/crypto/threads_lib.c $(OPENSSL_PATH)/crypto/threads_none.c $(OPENSSL_PATH)/crypto/threads_pthread.c @@ -406,6 +410,15 @@ $(OPENSSL_PATH)/crypto/md5/md5_dgst.c $(OPENSSL_PATH)/crypto/md5/md5_one.c $(OPENSSL_PATH)/crypto/md5/md5_sha1.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c + $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c $(OPENSSL_PATH)/crypto/modes/cbc128.c $(OPENSSL_PATH)/crypto/modes/ccm128.c $(OPENSSL_PATH)/crypto/modes/cfb128.c @@ -486,6 +499,16 @@ $(OPENSSL_PATH)/crypto/sha/sha256.c $(OPENSSL_PATH)/crypto/sha/sha3.c $(OPENSSL_PATH)/crypto/sha/sha512.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c $(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c $(OPENSSL_PATH)/crypto/sm3/sm3.c $(OPENSSL_PATH)/crypto/stack/stack.c @@ -510,13 +533,17 @@ $(OPENSSL_PATH)/crypto/x509/t_crl.c $(OPENSSL_PATH)/crypto/x509/t_req.c $(OPENSSL_PATH)/crypto/x509/t_x509.c + $(OPENSSL_PATH)/crypto/x509/v3_aaa.c $(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c $(OPENSSL_PATH)/crypto/x509/v3_addr.c $(OPENSSL_PATH)/crypto/x509/v3_admis.c $(OPENSSL_PATH)/crypto/x509/v3_akeya.c $(OPENSSL_PATH)/crypto/x509/v3_akid.c $(OPENSSL_PATH)/crypto/x509/v3_asid.c + $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c + $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c $(OPENSSL_PATH)/crypto/x509/v3_audit_id.c + $(OPENSSL_PATH)/crypto/x509/v3_authattid.c $(OPENSSL_PATH)/crypto/x509/v3_battcons.c $(OPENSSL_PATH)/crypto/x509/v3_bcons.c $(OPENSSL_PATH)/crypto/x509/v3_bitst.c @@ -544,12 +571,14 @@ $(OPENSSL_PATH)/crypto/x509/v3_pmaps.c $(OPENSSL_PATH)/crypto/x509/v3_prn.c $(OPENSSL_PATH)/crypto/x509/v3_purp.c + $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c $(OPENSSL_PATH)/crypto/x509/v3_san.c $(OPENSSL_PATH)/crypto/x509/v3_sda.c $(OPENSSL_PATH)/crypto/x509/v3_single_use.c $(OPENSSL_PATH)/crypto/x509/v3_skid.c $(OPENSSL_PATH)/crypto/x509/v3_soa_id.c $(OPENSSL_PATH)/crypto/x509/v3_sxnet.c + $(OPENSSL_PATH)/crypto/x509/v3_timespec.c $(OPENSSL_PATH)/crypto/x509/v3_tlsf.c $(OPENSSL_PATH)/crypto/x509/v3_usernotice.c $(OPENSSL_PATH)/crypto/x509/v3_utf8.c @@ -628,6 +657,9 @@ $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c $(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c $(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/ecdh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/ecx_exch.c @@ -648,13 +680,19 @@ $(OPENSSL_PATH)/providers/implementations/kem/ec_kem.c $(OPENSSL_PATH)/providers/implementations/kem/ecx_kem.c $(OPENSSL_PATH)/providers/implementations/kem/kem_util.c + $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c + $(OPENSSL_PATH)/providers/implementations/kem/mlx_kem.c $(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c $(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/ec_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/ecx_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/mlx_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c $(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c @@ -672,13 +710,19 @@ $(OPENSSL_PATH)/providers/implementations/signature/ecdsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c $(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c $(OPENSSL_GEN_PATH)/crypto/params_idx.c $(OPENSSL_PATH)/providers/common/der/der_ec_key.c $(OPENSSL_PATH)/providers/common/der/der_ec_sig.c $(OPENSSL_PATH)/providers/common/der/der_ecx_key.c + $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c $(OPENSSL_PATH)/providers/common/der/der_rsa_key.c + $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c $(OPENSSL_PATH)/providers/common/provider_ctx.c $(OPENSSL_PATH)/providers/common/provider_err.c $(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c @@ -693,7 +737,9 @@ $(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_ec_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_ecx_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c $(OPENSSL_PATH)/ssl/bio_ssl.c $(OPENSSL_PATH)/ssl/d1_lib.c @@ -709,7 +755,6 @@ $(OPENSSL_PATH)/ssl/ssl_cert_comp.c $(OPENSSL_PATH)/ssl/ssl_ciph.c $(OPENSSL_PATH)/ssl/ssl_conf.c - $(OPENSSL_PATH)/ssl/ssl_err.c $(OPENSSL_PATH)/ssl/ssl_err_legacy.c $(OPENSSL_PATH)/ssl/ssl_init.c $(OPENSSL_PATH)/ssl/ssl_lib.c @@ -726,6 +771,8 @@ $(OPENSSL_PATH)/ssl/tls13_enc.c $(OPENSSL_PATH)/ssl/tls_depr.c $(OPENSSL_PATH)/ssl/tls_srp.c + $(OPENSSL_PATH)/ssl/quic/quic_tls.c + $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c $(OPENSSL_PATH)/ssl/record/rec_layer_d1.c $(OPENSSL_PATH)/ssl/record/rec_layer_s3.c $(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c diff --git a/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf b/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf index f46659c0a0..64270b816e 100644 --- a/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf +++ b/CryptoPkg/Library/OpensslLib/OpensslLibFullAccel.inf @@ -349,7 +349,9 @@ $(OPENSSL_PATH)/crypto/evp/pmeth_check.c $(OPENSSL_PATH)/crypto/evp/pmeth_gn.c $(OPENSSL_PATH)/crypto/evp/pmeth_lib.c + $(OPENSSL_PATH)/crypto/evp/s_lib.c $(OPENSSL_PATH)/crypto/evp/signature.c + $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c $(OPENSSL_PATH)/crypto/ffc/ffc_backend.c $(OPENSSL_PATH)/crypto/ffc/ffc_dh.c $(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c @@ -357,6 +359,7 @@ $(OPENSSL_PATH)/crypto/ffc/ffc_params.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c + $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c $(OPENSSL_PATH)/crypto/hashtable/hashtable.c $(OPENSSL_PATH)/crypto/hmac/hmac.c $(OPENSSL_PATH)/crypto/hpke/hpke.c @@ -412,6 +415,7 @@ $(OPENSSL_PATH)/crypto/self_test_core.c $(OPENSSL_PATH)/crypto/sleep.c $(OPENSSL_PATH)/crypto/sparse_array.c + $(OPENSSL_PATH)/crypto/ssl_err.c $(OPENSSL_PATH)/crypto/threads_lib.c $(OPENSSL_PATH)/crypto/threads_none.c $(OPENSSL_PATH)/crypto/threads_pthread.c @@ -422,6 +426,15 @@ $(OPENSSL_PATH)/crypto/md5/md5_dgst.c $(OPENSSL_PATH)/crypto/md5/md5_one.c $(OPENSSL_PATH)/crypto/md5/md5_sha1.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c + $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c $(OPENSSL_PATH)/crypto/modes/cbc128.c $(OPENSSL_PATH)/crypto/modes/ccm128.c $(OPENSSL_PATH)/crypto/modes/cfb128.c @@ -502,6 +515,16 @@ $(OPENSSL_PATH)/crypto/sha/sha256.c $(OPENSSL_PATH)/crypto/sha/sha3.c $(OPENSSL_PATH)/crypto/sha/sha512.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c $(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c $(OPENSSL_PATH)/crypto/sm3/sm3.c $(OPENSSL_PATH)/crypto/stack/stack.c @@ -526,13 +549,17 @@ $(OPENSSL_PATH)/crypto/x509/t_crl.c $(OPENSSL_PATH)/crypto/x509/t_req.c $(OPENSSL_PATH)/crypto/x509/t_x509.c + $(OPENSSL_PATH)/crypto/x509/v3_aaa.c $(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c $(OPENSSL_PATH)/crypto/x509/v3_addr.c $(OPENSSL_PATH)/crypto/x509/v3_admis.c $(OPENSSL_PATH)/crypto/x509/v3_akeya.c $(OPENSSL_PATH)/crypto/x509/v3_akid.c $(OPENSSL_PATH)/crypto/x509/v3_asid.c + $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c + $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c $(OPENSSL_PATH)/crypto/x509/v3_audit_id.c + $(OPENSSL_PATH)/crypto/x509/v3_authattid.c $(OPENSSL_PATH)/crypto/x509/v3_battcons.c $(OPENSSL_PATH)/crypto/x509/v3_bcons.c $(OPENSSL_PATH)/crypto/x509/v3_bitst.c @@ -560,12 +587,14 @@ $(OPENSSL_PATH)/crypto/x509/v3_pmaps.c $(OPENSSL_PATH)/crypto/x509/v3_prn.c $(OPENSSL_PATH)/crypto/x509/v3_purp.c + $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c $(OPENSSL_PATH)/crypto/x509/v3_san.c $(OPENSSL_PATH)/crypto/x509/v3_sda.c $(OPENSSL_PATH)/crypto/x509/v3_single_use.c $(OPENSSL_PATH)/crypto/x509/v3_skid.c $(OPENSSL_PATH)/crypto/x509/v3_soa_id.c $(OPENSSL_PATH)/crypto/x509/v3_sxnet.c + $(OPENSSL_PATH)/crypto/x509/v3_timespec.c $(OPENSSL_PATH)/crypto/x509/v3_tlsf.c $(OPENSSL_PATH)/crypto/x509/v3_usernotice.c $(OPENSSL_PATH)/crypto/x509/v3_utf8.c @@ -644,6 +673,9 @@ $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c $(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c $(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/ecdh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/ecx_exch.c @@ -664,13 +696,19 @@ $(OPENSSL_PATH)/providers/implementations/kem/ec_kem.c $(OPENSSL_PATH)/providers/implementations/kem/ecx_kem.c $(OPENSSL_PATH)/providers/implementations/kem/kem_util.c + $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c + $(OPENSSL_PATH)/providers/implementations/kem/mlx_kem.c $(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c $(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/ec_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/ecx_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/mlx_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c $(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c @@ -688,13 +726,19 @@ $(OPENSSL_PATH)/providers/implementations/signature/ecdsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c $(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c $(OPENSSL_GEN_PATH)/crypto/params_idx.c $(OPENSSL_PATH)/providers/common/der/der_ec_key.c $(OPENSSL_PATH)/providers/common/der/der_ec_sig.c $(OPENSSL_PATH)/providers/common/der/der_ecx_key.c + $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c $(OPENSSL_PATH)/providers/common/der/der_rsa_key.c + $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c $(OPENSSL_PATH)/providers/common/provider_ctx.c $(OPENSSL_PATH)/providers/common/provider_err.c $(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c @@ -709,7 +753,9 @@ $(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_ec_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_ecx_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c $(OPENSSL_PATH)/ssl/bio_ssl.c $(OPENSSL_PATH)/ssl/d1_lib.c @@ -725,7 +771,6 @@ $(OPENSSL_PATH)/ssl/ssl_cert_comp.c $(OPENSSL_PATH)/ssl/ssl_ciph.c $(OPENSSL_PATH)/ssl/ssl_conf.c - $(OPENSSL_PATH)/ssl/ssl_err.c $(OPENSSL_PATH)/ssl/ssl_err_legacy.c $(OPENSSL_PATH)/ssl/ssl_init.c $(OPENSSL_PATH)/ssl/ssl_lib.c @@ -742,6 +787,8 @@ $(OPENSSL_PATH)/ssl/tls13_enc.c $(OPENSSL_PATH)/ssl/tls_depr.c $(OPENSSL_PATH)/ssl/tls_srp.c + $(OPENSSL_PATH)/ssl/quic/quic_tls.c + $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c $(OPENSSL_PATH)/ssl/record/rec_layer_d1.c $(OPENSSL_PATH)/ssl/record/rec_layer_s3.c $(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c @@ -1077,7 +1124,9 @@ $(OPENSSL_PATH)/crypto/evp/pmeth_check.c $(OPENSSL_PATH)/crypto/evp/pmeth_gn.c $(OPENSSL_PATH)/crypto/evp/pmeth_lib.c + $(OPENSSL_PATH)/crypto/evp/s_lib.c $(OPENSSL_PATH)/crypto/evp/signature.c + $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c $(OPENSSL_PATH)/crypto/ffc/ffc_backend.c $(OPENSSL_PATH)/crypto/ffc/ffc_dh.c $(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c @@ -1085,6 +1134,7 @@ $(OPENSSL_PATH)/crypto/ffc/ffc_params.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c + $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c $(OPENSSL_PATH)/crypto/hashtable/hashtable.c $(OPENSSL_PATH)/crypto/hmac/hmac.c $(OPENSSL_PATH)/crypto/hpke/hpke.c @@ -1140,6 +1190,7 @@ $(OPENSSL_PATH)/crypto/self_test_core.c $(OPENSSL_PATH)/crypto/sleep.c $(OPENSSL_PATH)/crypto/sparse_array.c + $(OPENSSL_PATH)/crypto/ssl_err.c $(OPENSSL_PATH)/crypto/threads_lib.c $(OPENSSL_PATH)/crypto/threads_none.c $(OPENSSL_PATH)/crypto/threads_pthread.c @@ -1150,6 +1201,15 @@ $(OPENSSL_PATH)/crypto/md5/md5_dgst.c $(OPENSSL_PATH)/crypto/md5/md5_one.c $(OPENSSL_PATH)/crypto/md5/md5_sha1.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c + $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c $(OPENSSL_PATH)/crypto/modes/cbc128.c $(OPENSSL_PATH)/crypto/modes/ccm128.c $(OPENSSL_PATH)/crypto/modes/cfb128.c @@ -1229,6 +1289,16 @@ $(OPENSSL_PATH)/crypto/sha/sha256.c $(OPENSSL_PATH)/crypto/sha/sha3.c $(OPENSSL_PATH)/crypto/sha/sha512.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c $(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c $(OPENSSL_PATH)/crypto/sm3/sm3.c $(OPENSSL_PATH)/crypto/stack/stack.c @@ -1253,13 +1323,17 @@ $(OPENSSL_PATH)/crypto/x509/t_crl.c $(OPENSSL_PATH)/crypto/x509/t_req.c $(OPENSSL_PATH)/crypto/x509/t_x509.c + $(OPENSSL_PATH)/crypto/x509/v3_aaa.c $(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c $(OPENSSL_PATH)/crypto/x509/v3_addr.c $(OPENSSL_PATH)/crypto/x509/v3_admis.c $(OPENSSL_PATH)/crypto/x509/v3_akeya.c $(OPENSSL_PATH)/crypto/x509/v3_akid.c $(OPENSSL_PATH)/crypto/x509/v3_asid.c + $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c + $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c $(OPENSSL_PATH)/crypto/x509/v3_audit_id.c + $(OPENSSL_PATH)/crypto/x509/v3_authattid.c $(OPENSSL_PATH)/crypto/x509/v3_battcons.c $(OPENSSL_PATH)/crypto/x509/v3_bcons.c $(OPENSSL_PATH)/crypto/x509/v3_bitst.c @@ -1287,12 +1361,14 @@ $(OPENSSL_PATH)/crypto/x509/v3_pmaps.c $(OPENSSL_PATH)/crypto/x509/v3_prn.c $(OPENSSL_PATH)/crypto/x509/v3_purp.c + $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c $(OPENSSL_PATH)/crypto/x509/v3_san.c $(OPENSSL_PATH)/crypto/x509/v3_sda.c $(OPENSSL_PATH)/crypto/x509/v3_single_use.c $(OPENSSL_PATH)/crypto/x509/v3_skid.c $(OPENSSL_PATH)/crypto/x509/v3_soa_id.c $(OPENSSL_PATH)/crypto/x509/v3_sxnet.c + $(OPENSSL_PATH)/crypto/x509/v3_timespec.c $(OPENSSL_PATH)/crypto/x509/v3_tlsf.c $(OPENSSL_PATH)/crypto/x509/v3_usernotice.c $(OPENSSL_PATH)/crypto/x509/v3_utf8.c @@ -1371,6 +1447,9 @@ $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c $(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c $(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/ecdh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/ecx_exch.c @@ -1391,13 +1470,19 @@ $(OPENSSL_PATH)/providers/implementations/kem/ec_kem.c $(OPENSSL_PATH)/providers/implementations/kem/ecx_kem.c $(OPENSSL_PATH)/providers/implementations/kem/kem_util.c + $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c + $(OPENSSL_PATH)/providers/implementations/kem/mlx_kem.c $(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c $(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/ec_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/ecx_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/mlx_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c $(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c @@ -1415,13 +1500,19 @@ $(OPENSSL_PATH)/providers/implementations/signature/ecdsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c $(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c $(OPENSSL_GEN_PATH)/crypto/params_idx.c $(OPENSSL_PATH)/providers/common/der/der_ec_key.c $(OPENSSL_PATH)/providers/common/der/der_ec_sig.c $(OPENSSL_PATH)/providers/common/der/der_ecx_key.c + $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c $(OPENSSL_PATH)/providers/common/der/der_rsa_key.c + $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c $(OPENSSL_PATH)/providers/common/provider_ctx.c $(OPENSSL_PATH)/providers/common/provider_err.c $(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c @@ -1436,7 +1527,9 @@ $(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_ec_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_ecx_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c $(OPENSSL_PATH)/ssl/bio_ssl.c $(OPENSSL_PATH)/ssl/d1_lib.c @@ -1452,7 +1545,6 @@ $(OPENSSL_PATH)/ssl/ssl_cert_comp.c $(OPENSSL_PATH)/ssl/ssl_ciph.c $(OPENSSL_PATH)/ssl/ssl_conf.c - $(OPENSSL_PATH)/ssl/ssl_err.c $(OPENSSL_PATH)/ssl/ssl_err_legacy.c $(OPENSSL_PATH)/ssl/ssl_init.c $(OPENSSL_PATH)/ssl/ssl_lib.c @@ -1469,6 +1561,8 @@ $(OPENSSL_PATH)/ssl/tls13_enc.c $(OPENSSL_PATH)/ssl/tls_depr.c $(OPENSSL_PATH)/ssl/tls_srp.c + $(OPENSSL_PATH)/ssl/quic/quic_tls.c + $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c $(OPENSSL_PATH)/ssl/record/rec_layer_d1.c $(OPENSSL_PATH)/ssl/record/rec_layer_s3.c $(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c @@ -1491,6 +1585,7 @@ $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha1-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm + $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/aesni-xts-avx512.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/bsaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/aes/vpaes-x86_64.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-MSFT/crypto/x86_64cpuid.nasm ||||gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm @@ -1509,6 +1604,7 @@ $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha1-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-sha256-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm + $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/aesni-xts-avx512.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/bsaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-GCC/crypto/aes/vpaes-x86_64.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm $(OPENSSL_GEN_PATH)/X64-GCC/crypto/x86_64cpuid.s ||||!gEfiCryptoPkgTokenSpaceGuid.PcdOpensslLibAssemblySourceStyleNasm @@ -1823,7 +1919,9 @@ $(OPENSSL_PATH)/crypto/evp/pmeth_check.c $(OPENSSL_PATH)/crypto/evp/pmeth_gn.c $(OPENSSL_PATH)/crypto/evp/pmeth_lib.c + $(OPENSSL_PATH)/crypto/evp/s_lib.c $(OPENSSL_PATH)/crypto/evp/signature.c + $(OPENSSL_PATH)/crypto/evp/skeymgmt_meth.c $(OPENSSL_PATH)/crypto/ffc/ffc_backend.c $(OPENSSL_PATH)/crypto/ffc/ffc_dh.c $(OPENSSL_PATH)/crypto/ffc/ffc_key_generate.c @@ -1831,6 +1929,7 @@ $(OPENSSL_PATH)/crypto/ffc/ffc_params.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_generate.c $(OPENSSL_PATH)/crypto/ffc/ffc_params_validate.c + $(OPENSSL_PATH)/crypto/hashtable/hashfunc.c $(OPENSSL_PATH)/crypto/hashtable/hashtable.c $(OPENSSL_PATH)/crypto/hmac/hmac.c $(OPENSSL_PATH)/crypto/hpke/hpke.c @@ -1886,6 +1985,7 @@ $(OPENSSL_PATH)/crypto/self_test_core.c $(OPENSSL_PATH)/crypto/sleep.c $(OPENSSL_PATH)/crypto/sparse_array.c + $(OPENSSL_PATH)/crypto/ssl_err.c $(OPENSSL_PATH)/crypto/threads_lib.c $(OPENSSL_PATH)/crypto/threads_none.c $(OPENSSL_PATH)/crypto/threads_pthread.c @@ -1896,6 +1996,15 @@ $(OPENSSL_PATH)/crypto/md5/md5_dgst.c $(OPENSSL_PATH)/crypto/md5/md5_one.c $(OPENSSL_PATH)/crypto/md5/md5_sha1.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_encoders.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_key_compress.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_matrix.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_ntt.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_params.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sample.c + $(OPENSSL_PATH)/crypto/ml_dsa/ml_dsa_sign.c + $(OPENSSL_PATH)/crypto/ml_kem/ml_kem.c $(OPENSSL_PATH)/crypto/modes/cbc128.c $(OPENSSL_PATH)/crypto/modes/ccm128.c $(OPENSSL_PATH)/crypto/modes/cfb128.c @@ -1975,6 +2084,16 @@ $(OPENSSL_PATH)/crypto/sha/sha256.c $(OPENSSL_PATH)/crypto/sha/sha3.c $(OPENSSL_PATH)/crypto/sha/sha512.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_adrs.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_hash_ctx.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_dsa_key.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_fors.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hash.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_hypertree.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_params.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_wots.c + $(OPENSSL_PATH)/crypto/slh_dsa/slh_xmss.c $(OPENSSL_PATH)/crypto/sm3/legacy_sm3.c $(OPENSSL_PATH)/crypto/sm3/sm3.c $(OPENSSL_PATH)/crypto/stack/stack.c @@ -1999,13 +2118,17 @@ $(OPENSSL_PATH)/crypto/x509/t_crl.c $(OPENSSL_PATH)/crypto/x509/t_req.c $(OPENSSL_PATH)/crypto/x509/t_x509.c + $(OPENSSL_PATH)/crypto/x509/v3_aaa.c $(OPENSSL_PATH)/crypto/x509/v3_ac_tgt.c $(OPENSSL_PATH)/crypto/x509/v3_addr.c $(OPENSSL_PATH)/crypto/x509/v3_admis.c $(OPENSSL_PATH)/crypto/x509/v3_akeya.c $(OPENSSL_PATH)/crypto/x509/v3_akid.c $(OPENSSL_PATH)/crypto/x509/v3_asid.c + $(OPENSSL_PATH)/crypto/x509/v3_attrdesc.c + $(OPENSSL_PATH)/crypto/x509/v3_attrmap.c $(OPENSSL_PATH)/crypto/x509/v3_audit_id.c + $(OPENSSL_PATH)/crypto/x509/v3_authattid.c $(OPENSSL_PATH)/crypto/x509/v3_battcons.c $(OPENSSL_PATH)/crypto/x509/v3_bcons.c $(OPENSSL_PATH)/crypto/x509/v3_bitst.c @@ -2033,12 +2156,14 @@ $(OPENSSL_PATH)/crypto/x509/v3_pmaps.c $(OPENSSL_PATH)/crypto/x509/v3_prn.c $(OPENSSL_PATH)/crypto/x509/v3_purp.c + $(OPENSSL_PATH)/crypto/x509/v3_rolespec.c $(OPENSSL_PATH)/crypto/x509/v3_san.c $(OPENSSL_PATH)/crypto/x509/v3_sda.c $(OPENSSL_PATH)/crypto/x509/v3_single_use.c $(OPENSSL_PATH)/crypto/x509/v3_skid.c $(OPENSSL_PATH)/crypto/x509/v3_soa_id.c $(OPENSSL_PATH)/crypto/x509/v3_sxnet.c + $(OPENSSL_PATH)/crypto/x509/v3_timespec.c $(OPENSSL_PATH)/crypto/x509/v3_tlsf.c $(OPENSSL_PATH)/crypto/x509/v3_usernotice.c $(OPENSSL_PATH)/crypto/x509/v3_utf8.c @@ -2117,6 +2242,9 @@ $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_pvk2key.c $(OPENSSL_PATH)/providers/implementations/encode_decode/decode_spki2typespki.c $(OPENSSL_PATH)/providers/implementations/encode_decode/endecoder_common.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_common_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_dsa_codecs.c + $(OPENSSL_PATH)/providers/implementations/encode_decode/ml_kem_codecs.c $(OPENSSL_PATH)/providers/implementations/exchange/dh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/ecdh_exch.c $(OPENSSL_PATH)/providers/implementations/exchange/ecx_exch.c @@ -2137,13 +2265,19 @@ $(OPENSSL_PATH)/providers/implementations/kem/ec_kem.c $(OPENSSL_PATH)/providers/implementations/kem/ecx_kem.c $(OPENSSL_PATH)/providers/implementations/kem/kem_util.c + $(OPENSSL_PATH)/providers/implementations/kem/ml_kem_kem.c + $(OPENSSL_PATH)/providers/implementations/kem/mlx_kem.c $(OPENSSL_PATH)/providers/implementations/kem/rsa_kem.c $(OPENSSL_PATH)/providers/implementations/keymgmt/dh_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/ec_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/ecx_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/kdf_legacy_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/mac_legacy_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_dsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/ml_kem_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/mlx_kmgmt.c $(OPENSSL_PATH)/providers/implementations/keymgmt/rsa_kmgmt.c + $(OPENSSL_PATH)/providers/implementations/keymgmt/slh_dsa_kmgmt.c $(OPENSSL_PATH)/providers/implementations/macs/gmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/hmac_prov.c $(OPENSSL_PATH)/providers/implementations/macs/kmac_prov.c @@ -2161,13 +2295,19 @@ $(OPENSSL_PATH)/providers/implementations/signature/ecdsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/eddsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/mac_legacy_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/ml_dsa_sig.c $(OPENSSL_PATH)/providers/implementations/signature/rsa_sig.c + $(OPENSSL_PATH)/providers/implementations/signature/slh_dsa_sig.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/aes_skmgmt.c + $(OPENSSL_PATH)/providers/implementations/skeymgmt/generic.c $(OPENSSL_PATH)/ssl/record/methods/ssl3_cbc.c $(OPENSSL_GEN_PATH)/crypto/params_idx.c $(OPENSSL_PATH)/providers/common/der/der_ec_key.c $(OPENSSL_PATH)/providers/common/der/der_ec_sig.c $(OPENSSL_PATH)/providers/common/der/der_ecx_key.c + $(OPENSSL_PATH)/providers/common/der/der_ml_dsa_key.c $(OPENSSL_PATH)/providers/common/der/der_rsa_key.c + $(OPENSSL_PATH)/providers/common/der/der_slh_dsa_key.c $(OPENSSL_PATH)/providers/common/provider_ctx.c $(OPENSSL_PATH)/providers/common/provider_err.c $(OPENSSL_PATH)/providers/implementations/ciphers/ciphercommon.c @@ -2182,7 +2322,9 @@ $(OPENSSL_GEN_PATH)/providers/common/der/der_digests_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_ec_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_ecx_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_ml_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_rsa_gen.c + $(OPENSSL_GEN_PATH)/providers/common/der/der_slh_dsa_gen.c $(OPENSSL_GEN_PATH)/providers/common/der/der_wrap_gen.c $(OPENSSL_PATH)/ssl/bio_ssl.c $(OPENSSL_PATH)/ssl/d1_lib.c @@ -2198,7 +2340,6 @@ $(OPENSSL_PATH)/ssl/ssl_cert_comp.c $(OPENSSL_PATH)/ssl/ssl_ciph.c $(OPENSSL_PATH)/ssl/ssl_conf.c - $(OPENSSL_PATH)/ssl/ssl_err.c $(OPENSSL_PATH)/ssl/ssl_err_legacy.c $(OPENSSL_PATH)/ssl/ssl_init.c $(OPENSSL_PATH)/ssl/ssl_lib.c @@ -2215,6 +2356,8 @@ $(OPENSSL_PATH)/ssl/tls13_enc.c $(OPENSSL_PATH)/ssl/tls_depr.c $(OPENSSL_PATH)/ssl/tls_srp.c + $(OPENSSL_PATH)/ssl/quic/quic_tls.c + $(OPENSSL_PATH)/ssl/quic/quic_tls_api.c $(OPENSSL_PATH)/ssl/record/rec_layer_d1.c $(OPENSSL_PATH)/ssl/record/rec_layer_s3.c $(OPENSSL_PATH)/ssl/record/methods/dtls_meth.c