From e33651f4168897db99813d271984ac699be98cd8 Mon Sep 17 00:00:00 2001
From: DistroBaker <osci-list@redhat.com>
Date: Thu, 10 Dec 2020 01:32:28 +0100
Subject: [PATCH] Merged update from upstream sources

This is an automated DistroBaker update from upstream sources.
If you do not know what this is about or would like to opt out,
contact the OSCI team.

Source: https://src.fedoraproject.org/rpms/openssl.git#a07706cf0e50b02a61d3cb10ecad554d4ac4240c
---
 .gitignore                           |    1 +
 openssl-1.1.1-arm-update.patch       | 2392 +++++++++++++++++++++++++-
 openssl-1.1.1-fips-post-rand.patch   |   50 +-
 openssl-1.1.1-version-override.patch |   12 +-
 openssl.spec                         |    5 +-
 sources                              |    2 +-
 6 files changed, 2369 insertions(+), 93 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3305a0f..d1abce3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,3 +49,4 @@ openssl-1.0.0a-usa.tar.bz2
 /openssl-1.1.1f-hobbled.tar.xz
 /openssl-1.1.1g-hobbled.tar.xz
 /openssl-1.1.1h-hobbled.tar.xz
+/openssl-1.1.1i-hobbled.tar.xz
diff --git a/openssl-1.1.1-arm-update.patch b/openssl-1.1.1-arm-update.patch
index 998905f..2b8c549 100644
--- a/openssl-1.1.1-arm-update.patch
+++ b/openssl-1.1.1-arm-update.patch
@@ -1,6 +1,6 @@
-diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl
---- openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update	2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl	2019-11-20 11:36:22.389506155 +0100
+diff -up openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl
+--- openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl.arm-update	2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl	2020-12-09 10:39:50.645705385 +0100
 @@ -27,44 +27,72 @@
  # CBC encrypt case. On Cortex-A57 parallelizable mode performance
  # seems to be limited by sheer amount of NEON instructions...
@@ -85,7 +85,844 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
  ___
  
  # Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
-@@ -514,6 +542,13 @@ $code.=<<___;
+@@ -361,6 +389,836 @@ ___
+ &gen_block("en");
+ &gen_block("de");
+ }}}
++
++# Performance in cycles per byte.
++# Processed with AES-ECB different key size.
++# It shows the value before and after optimization as below:
++# (before/after):
++#
++#		AES-128-ECB		AES-192-ECB		AES-256-ECB
++# Cortex-A57	1.85/0.82		2.16/0.96		2.47/1.10
++# Cortex-A72	1.64/0.85		1.82/0.99		2.13/1.14
++
++# Optimization is implemented by loop unrolling and interleaving.
++# Commonly, we choose the unrolling factor as 5, if the input
++# data size smaller than 5 blocks, but not smaller than 3 blocks,
++# choose 3 as the unrolling factor.
++# If the input data size dsize >= 5*16 bytes, then take 5 blocks
++# as one iteration, every loop the left size lsize -= 5*16.
++# If 5*16 > lsize >= 3*16 bytes, take 3 blocks as one iteration,
++# every loop lsize -=3*16.
++# If lsize < 3*16 bytes, treat them as the tail, interleave the
++# two blocks AES instructions.
++# There is one special case, if the original input data size dsize
++# = 16 bytes, we will treat it seperately to improve the
++# performance: one independent code block without LR, FP load and
++# store, just looks like what the original ECB implementation does.
++
++{{{
++my ($inp,$out,$len,$key)=map("x$_",(0..3));
++my ($enc,$rounds,$cnt,$key_,$step)=("w4","w5","w6","x7","x8");
++my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));
++
++my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
++
++### q7	last round key
++### q10-q15	q7 Last 7 round keys
++### q8-q9	preloaded round keys except last 7 keys for big size
++### q5, q6, q8-q9	preloaded round keys except last 7 keys for only 16 byte
++
++{
++my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
++
++my ($dat3,$in3,$tmp3);	# used only in 64-bit mode
++my ($dat4,$in4,$tmp4);
++if ($flavour =~ /64/) {
++    ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23));
++}
++
++$code.=<<___;
++.globl	${prefix}_ecb_encrypt
++.type	${prefix}_ecb_encrypt,%function
++.align	5
++${prefix}_ecb_encrypt:
++___
++$code.=<<___	if ($flavour =~ /64/);
++	subs	$len,$len,#16
++	// Original input data size bigger than 16, jump to big size processing.
++	b.ne    .Lecb_big_size
++	vld1.8	{$dat0},[$inp]
++	cmp	$enc,#0					// en- or decrypting?
++	ldr	$rounds,[$key,#240]
++	vld1.32	{q5-q6},[$key],#32			// load key schedule...
++
++	b.eq .Lecb_small_dec
++	aese	$dat0,q5
++	aesmc	$dat0,$dat0
++	vld1.32	{q8-q9},[$key],#32			// load key schedule...
++	aese	$dat0,q6
++	aesmc	$dat0,$dat0
++	subs	$rounds,$rounds,#10			// if rounds==10, jump to aes-128-ecb processing
++	b.eq    .Lecb_128_enc
++.Lecb_round_loop:
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	vld1.32	{q8},[$key],#16				// load key schedule...
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	vld1.32	{q9},[$key],#16				// load key schedule...
++	subs	$rounds,$rounds,#2			// bias
++	b.gt    .Lecb_round_loop
++.Lecb_128_enc:
++	vld1.32	{q10-q11},[$key],#32		// load key schedule...
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	vld1.32	{q12-q13},[$key],#32		// load key schedule...
++	aese	$dat0,q10
++	aesmc	$dat0,$dat0
++	aese	$dat0,q11
++	aesmc	$dat0,$dat0
++	vld1.32	{q14-q15},[$key],#32		// load key schedule...
++	aese	$dat0,q12
++	aesmc	$dat0,$dat0
++	aese	$dat0,q13
++	aesmc	$dat0,$dat0
++	vld1.32	{$rndlast},[$key]
++	aese	$dat0,q14
++	aesmc	$dat0,$dat0
++	aese	$dat0,q15
++	veor	$dat0,$dat0,$rndlast
++	vst1.8	{$dat0},[$out]
++	b	.Lecb_Final_abort
++.Lecb_small_dec:
++	aesd	$dat0,q5
++	aesimc	$dat0,$dat0
++	vld1.32	{q8-q9},[$key],#32			// load key schedule...
++	aesd	$dat0,q6
++	aesimc	$dat0,$dat0
++	subs	$rounds,$rounds,#10			// bias
++	b.eq    .Lecb_128_dec
++.Lecb_dec_round_loop:
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	vld1.32	{q8},[$key],#16				// load key schedule...
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat0
++	vld1.32	{q9},[$key],#16				// load key schedule...
++	subs	$rounds,$rounds,#2			// bias
++	b.gt    .Lecb_dec_round_loop
++.Lecb_128_dec:
++	vld1.32	{q10-q11},[$key],#32		// load key schedule...
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat0
++	vld1.32	{q12-q13},[$key],#32		// load key schedule...
++	aesd	$dat0,q10
++	aesimc	$dat0,$dat0
++	aesd	$dat0,q11
++	aesimc	$dat0,$dat0
++	vld1.32	{q14-q15},[$key],#32		// load key schedule...
++	aesd	$dat0,q12
++	aesimc	$dat0,$dat0
++	aesd	$dat0,q13
++	aesimc	$dat0,$dat0
++	vld1.32	{$rndlast},[$key]
++	aesd	$dat0,q14
++	aesimc	$dat0,$dat0
++	aesd	$dat0,q15
++	veor	$dat0,$dat0,$rndlast
++	vst1.8	{$dat0},[$out]
++	b	.Lecb_Final_abort
++.Lecb_big_size:
++___
++$code.=<<___	if ($flavour =~ /64/);
++	stp	x29,x30,[sp,#-16]!
++	add	x29,sp,#0
++___
++$code.=<<___	if ($flavour !~ /64/);
++	mov	ip,sp
++	stmdb	sp!,{r4-r8,lr}
++	vstmdb	sp!,{d8-d15}			@ ABI specification says so
++	ldmia	ip,{r4-r5}			@ load remaining args
++	subs	$len,$len,#16
++___
++$code.=<<___;
++	mov	$step,#16
++	b.lo	.Lecb_done
++	cclr	$step,eq
++
++	cmp	$enc,#0					// en- or decrypting?
++	ldr	$rounds,[$key,#240]
++	and	$len,$len,#-16
++	vld1.8	{$dat},[$inp],$step
++
++	vld1.32	{q8-q9},[$key]				// load key schedule...
++	sub	$rounds,$rounds,#6
++	add	$key_,$key,x5,lsl#4				// pointer to last 7 round keys
++	sub	$rounds,$rounds,#2
++	vld1.32	{q10-q11},[$key_],#32
++	vld1.32	{q12-q13},[$key_],#32
++	vld1.32	{q14-q15},[$key_],#32
++	vld1.32	{$rndlast},[$key_]
++
++	add	$key_,$key,#32
++	mov	$cnt,$rounds
++	b.eq	.Lecb_dec
++
++	vld1.8	{$dat1},[$inp],#16
++	subs	$len,$len,#32				// bias
++	add	$cnt,$rounds,#2
++	vorr	$in1,$dat1,$dat1
++	vorr	$dat2,$dat1,$dat1
++	vorr	$dat1,$dat,$dat
++	b.lo	.Lecb_enc_tail
++
++	vorr	$dat1,$in1,$in1
++	vld1.8	{$dat2},[$inp],#16
++___
++$code.=<<___	if ($flavour =~ /64/);
++	cmp	$len,#32
++	b.lo	.Loop3x_ecb_enc
++
++	vld1.8	{$dat3},[$inp],#16
++	vld1.8	{$dat4},[$inp],#16
++	sub	$len,$len,#32				// bias
++	mov	$cnt,$rounds
++
++.Loop5x_ecb_enc:
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	aese	$dat3,q8
++	aesmc	$dat3,$dat3
++	aese	$dat4,q8
++	aesmc	$dat4,$dat4
++	vld1.32	{q8},[$key_],#16
++	subs	$cnt,$cnt,#2
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	aese	$dat3,q9
++	aesmc	$dat3,$dat3
++	aese	$dat4,q9
++	aesmc	$dat4,$dat4
++	vld1.32	{q9},[$key_],#16
++	b.gt	.Loop5x_ecb_enc
++
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	aese	$dat3,q8
++	aesmc	$dat3,$dat3
++	aese	$dat4,q8
++	aesmc	$dat4,$dat4
++	cmp	$len,#0x40					// because .Lecb_enc_tail4x
++	sub	$len,$len,#0x50
++
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	aese	$dat3,q9
++	aesmc	$dat3,$dat3
++	aese	$dat4,q9
++	aesmc	$dat4,$dat4
++	csel	x6,xzr,$len,gt			// borrow x6, $cnt, "gt" is not typo
++	mov	$key_,$key
++
++	aese	$dat0,q10
++	aesmc	$dat0,$dat0
++	aese	$dat1,q10
++	aesmc	$dat1,$dat1
++	aese	$dat2,q10
++	aesmc	$dat2,$dat2
++	aese	$dat3,q10
++	aesmc	$dat3,$dat3
++	aese	$dat4,q10
++	aesmc	$dat4,$dat4
++	add	$inp,$inp,x6				// $inp is adjusted in such way that
++							// at exit from the loop $dat1-$dat4
++							// are loaded with last "words"
++	add	x6,$len,#0x60		    // because .Lecb_enc_tail4x
++
++	aese	$dat0,q11
++	aesmc	$dat0,$dat0
++	aese	$dat1,q11
++	aesmc	$dat1,$dat1
++	aese	$dat2,q11
++	aesmc	$dat2,$dat2
++	aese	$dat3,q11
++	aesmc	$dat3,$dat3
++	aese	$dat4,q11
++	aesmc	$dat4,$dat4
++
++	aese	$dat0,q12
++	aesmc	$dat0,$dat0
++	aese	$dat1,q12
++	aesmc	$dat1,$dat1
++	aese	$dat2,q12
++	aesmc	$dat2,$dat2
++	aese	$dat3,q12
++	aesmc	$dat3,$dat3
++	aese	$dat4,q12
++	aesmc	$dat4,$dat4
++
++	aese	$dat0,q13
++	aesmc	$dat0,$dat0
++	aese	$dat1,q13
++	aesmc	$dat1,$dat1
++	aese	$dat2,q13
++	aesmc	$dat2,$dat2
++	aese	$dat3,q13
++	aesmc	$dat3,$dat3
++	aese	$dat4,q13
++	aesmc	$dat4,$dat4
++
++	aese	$dat0,q14
++	aesmc	$dat0,$dat0
++	aese	$dat1,q14
++	aesmc	$dat1,$dat1
++	aese	$dat2,q14
++	aesmc	$dat2,$dat2
++	aese	$dat3,q14
++	aesmc	$dat3,$dat3
++	aese	$dat4,q14
++	aesmc	$dat4,$dat4
++
++	aese	$dat0,q15
++	vld1.8	{$in0},[$inp],#16
++	aese	$dat1,q15
++	vld1.8	{$in1},[$inp],#16
++	aese	$dat2,q15
++	vld1.8	{$in2},[$inp],#16
++	aese	$dat3,q15
++	vld1.8	{$in3},[$inp],#16
++	aese	$dat4,q15
++	vld1.8	{$in4},[$inp],#16
++	cbz	x6,.Lecb_enc_tail4x
++	vld1.32 {q8},[$key_],#16			// re-pre-load rndkey[0]
++	veor	$tmp0,$rndlast,$dat0
++	vorr	$dat0,$in0,$in0
++	veor	$tmp1,$rndlast,$dat1
++	vorr	$dat1,$in1,$in1
++	veor	$tmp2,$rndlast,$dat2
++	vorr	$dat2,$in2,$in2
++	veor	$tmp3,$rndlast,$dat3
++	vorr	$dat3,$in3,$in3
++	veor	$tmp4,$rndlast,$dat4
++	vst1.8	{$tmp0},[$out],#16
++	vorr	$dat4,$in4,$in4
++	vst1.8	{$tmp1},[$out],#16
++	mov	$cnt,$rounds
++	vst1.8	{$tmp2},[$out],#16
++	vld1.32 {q9},[$key_],#16			// re-pre-load rndkey[1]
++	vst1.8	{$tmp3},[$out],#16
++	vst1.8	{$tmp4},[$out],#16
++	b.hs	.Loop5x_ecb_enc
++
++	add	$len,$len,#0x50
++	cbz	$len,.Lecb_done
++
++	add	$cnt,$rounds,#2
++	subs	$len,$len,#0x30
++	vorr	$dat0,$in2,$in2
++	vorr	$dat1,$in3,$in3
++	vorr	$dat2,$in4,$in4
++	b.lo	.Lecb_enc_tail
++
++	b	.Loop3x_ecb_enc
++
++.align	4
++.Lecb_enc_tail4x:
++	veor	$tmp1,$rndlast,$dat1
++	veor	$tmp2,$rndlast,$dat2
++	veor	$tmp3,$rndlast,$dat3
++	veor	$tmp4,$rndlast,$dat4
++	vst1.8	{$tmp1},[$out],#16
++	vst1.8	{$tmp2},[$out],#16
++	vst1.8	{$tmp3},[$out],#16
++	vst1.8	{$tmp4},[$out],#16
++
++	b	.Lecb_done
++.align	4
++___
++$code.=<<___;
++.Loop3x_ecb_enc:
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	vld1.32	{q8},[$key_],#16
++	subs	$cnt,$cnt,#2
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	vld1.32	{q9},[$key_],#16
++	b.gt	.Loop3x_ecb_enc
++
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	subs	$len,$len,#0x30
++	mov.lo	x6,$len				// x6, $cnt, is zero at this point
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	add	$inp,$inp,x6			// $inp is adjusted in such way that
++						// at exit from the loop $dat1-$dat2
++						// are loaded with last "words"
++	mov	$key_,$key
++	aese	$dat0,q12
++	aesmc	$dat0,$dat0
++	aese	$dat1,q12
++	aesmc	$dat1,$dat1
++	aese	$dat2,q12
++	aesmc	$dat2,$dat2
++	vld1.8	{$in0},[$inp],#16
++	aese	$dat0,q13
++	aesmc	$dat0,$dat0
++	aese	$dat1,q13
++	aesmc	$dat1,$dat1
++	aese	$dat2,q13
++	aesmc	$dat2,$dat2
++	vld1.8	{$in1},[$inp],#16
++	aese	$dat0,q14
++	aesmc	$dat0,$dat0
++	aese	$dat1,q14
++	aesmc	$dat1,$dat1
++	aese	$dat2,q14
++	aesmc	$dat2,$dat2
++	vld1.8	{$in2},[$inp],#16
++	aese	$dat0,q15
++	aese	$dat1,q15
++	aese	$dat2,q15
++	vld1.32 {q8},[$key_],#16		// re-pre-load rndkey[0]
++	add	$cnt,$rounds,#2
++	veor	$tmp0,$rndlast,$dat0
++	veor	$tmp1,$rndlast,$dat1
++	veor	$dat2,$dat2,$rndlast
++	vld1.32 {q9},[$key_],#16		// re-pre-load rndkey[1]
++	vst1.8	{$tmp0},[$out],#16
++	vorr	$dat0,$in0,$in0
++	vst1.8	{$tmp1},[$out],#16
++	vorr	$dat1,$in1,$in1
++	vst1.8	{$dat2},[$out],#16
++	vorr	$dat2,$in2,$in2
++	b.hs	.Loop3x_ecb_enc
++
++	cmn	$len,#0x30
++	b.eq	.Lecb_done
++	nop
++
++.Lecb_enc_tail:
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	vld1.32	{q8},[$key_],#16
++	subs	$cnt,$cnt,#2
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	vld1.32	{q9},[$key_],#16
++	b.gt	.Lecb_enc_tail
++
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	aese	$dat1,q12
++	aesmc	$dat1,$dat1
++	aese	$dat2,q12
++	aesmc	$dat2,$dat2
++	cmn	$len,#0x20
++	aese	$dat1,q13
++	aesmc	$dat1,$dat1
++	aese	$dat2,q13
++	aesmc	$dat2,$dat2
++	aese	$dat1,q14
++	aesmc	$dat1,$dat1
++	aese	$dat2,q14
++	aesmc	$dat2,$dat2
++	aese	$dat1,q15
++	aese	$dat2,q15
++	b.eq	.Lecb_enc_one
++	veor	$tmp1,$rndlast,$dat1
++	veor	$tmp2,$rndlast,$dat2
++	vst1.8	{$tmp1},[$out],#16
++	vst1.8	{$tmp2},[$out],#16
++	b	.Lecb_done
++
++.Lecb_enc_one:
++	veor	$tmp1,$rndlast,$dat2
++	vst1.8	{$tmp1},[$out],#16
++	b	.Lecb_done
++___
++
++$code.=<<___;
++.align	5
++.Lecb_dec:
++	vld1.8	{$dat1},[$inp],#16
++	subs	$len,$len,#32			// bias
++	add	$cnt,$rounds,#2
++	vorr	$in1,$dat1,$dat1
++	vorr	$dat2,$dat1,$dat1
++	vorr	$dat1,$dat,$dat
++	b.lo	.Lecb_dec_tail
++
++	vorr	$dat1,$in1,$in1
++	vld1.8	{$dat2},[$inp],#16
++___
++$code.=<<___	if ($flavour =~ /64/);
++	cmp	$len,#32
++	b.lo	.Loop3x_ecb_dec
++
++	vld1.8	{$dat3},[$inp],#16
++	vld1.8	{$dat4},[$inp],#16
++	sub	$len,$len,#32				// bias
++	mov	$cnt,$rounds
++
++.Loop5x_ecb_dec:
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q8
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q8
++	aesimc	$dat4,$dat4
++	vld1.32	{q8},[$key_],#16
++	subs	$cnt,$cnt,#2
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q9
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q9
++	aesimc	$dat4,$dat4
++	vld1.32	{q9},[$key_],#16
++	b.gt	.Loop5x_ecb_dec
++
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q8
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q8
++	aesimc	$dat4,$dat4
++	cmp	$len,#0x40				// because .Lecb_tail4x
++	sub	$len,$len,#0x50
++
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q9
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q9
++	aesimc	$dat4,$dat4
++	csel	x6,xzr,$len,gt		// borrow x6, $cnt, "gt" is not typo
++	mov	$key_,$key
++
++	aesd	$dat0,q10
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q10
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q10
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q10
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q10
++	aesimc	$dat4,$dat4
++	add	$inp,$inp,x6				// $inp is adjusted in such way that
++							// at exit from the loop $dat1-$dat4
++							// are loaded with last "words"
++	add	x6,$len,#0x60			// because .Lecb_tail4x
++
++	aesd	$dat0,q11
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q11
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q11
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q11
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q11
++	aesimc	$dat4,$dat4
++
++	aesd	$dat0,q12
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q12
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q12
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q12
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q12
++	aesimc	$dat4,$dat4
++
++	aesd	$dat0,q13
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q13
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q13
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q13
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q13
++	aesimc	$dat4,$dat4
++
++	aesd	$dat0,q14
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q14
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q14
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q14
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q14
++	aesimc	$dat4,$dat4
++
++	aesd	$dat0,q15
++	vld1.8	{$in0},[$inp],#16
++	aesd	$dat1,q15
++	vld1.8	{$in1},[$inp],#16
++	aesd	$dat2,q15
++	vld1.8	{$in2},[$inp],#16
++	aesd	$dat3,q15
++	vld1.8	{$in3},[$inp],#16
++	aesd	$dat4,q15
++	vld1.8	{$in4},[$inp],#16
++	cbz	x6,.Lecb_tail4x
++	vld1.32 {q8},[$key_],#16			// re-pre-load rndkey[0]
++	veor	$tmp0,$rndlast,$dat0
++	vorr	$dat0,$in0,$in0
++	veor	$tmp1,$rndlast,$dat1
++	vorr	$dat1,$in1,$in1
++	veor	$tmp2,$rndlast,$dat2
++	vorr	$dat2,$in2,$in2
++	veor	$tmp3,$rndlast,$dat3
++	vorr	$dat3,$in3,$in3
++	veor	$tmp4,$rndlast,$dat4
++	vst1.8	{$tmp0},[$out],#16
++	vorr	$dat4,$in4,$in4
++	vst1.8	{$tmp1},[$out],#16
++	mov	$cnt,$rounds
++	vst1.8	{$tmp2},[$out],#16
++	vld1.32 {q9},[$key_],#16			// re-pre-load rndkey[1]
++	vst1.8	{$tmp3},[$out],#16
++	vst1.8	{$tmp4},[$out],#16
++	b.hs	.Loop5x_ecb_dec
++
++	add	$len,$len,#0x50
++	cbz	$len,.Lecb_done
++
++	add	$cnt,$rounds,#2
++	subs	$len,$len,#0x30
++	vorr	$dat0,$in2,$in2
++	vorr	$dat1,$in3,$in3
++	vorr	$dat2,$in4,$in4
++	b.lo	.Lecb_dec_tail
++
++	b	.Loop3x_ecb_dec
++
++.align	4
++.Lecb_tail4x:
++	veor	$tmp1,$rndlast,$dat1
++	veor	$tmp2,$rndlast,$dat2
++	veor	$tmp3,$rndlast,$dat3
++	veor	$tmp4,$rndlast,$dat4
++	vst1.8	{$tmp1},[$out],#16
++	vst1.8	{$tmp2},[$out],#16
++	vst1.8	{$tmp3},[$out],#16
++	vst1.8	{$tmp4},[$out],#16
++
++	b	.Lecb_done
++.align	4
++___
++$code.=<<___;
++.Loop3x_ecb_dec:
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	vld1.32	{q8},[$key_],#16
++	subs	$cnt,$cnt,#2
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	vld1.32	{q9},[$key_],#16
++	b.gt	.Loop3x_ecb_dec
++
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	subs	$len,$len,#0x30
++	mov.lo	x6,$len				// x6, $cnt, is zero at this point
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	add	$inp,$inp,x6 			// $inp is adjusted in such way that
++						// at exit from the loop $dat1-$dat2
++						// are loaded with last "words"
++	mov	$key_,$key
++	aesd	$dat0,q12
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q12
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q12
++	aesimc	$dat2,$dat2
++	vld1.8	{$in0},[$inp],#16
++	aesd	$dat0,q13
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q13
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q13
++	aesimc	$dat2,$dat2
++	vld1.8	{$in1},[$inp],#16
++	aesd	$dat0,q14
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q14
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q14
++	aesimc	$dat2,$dat2
++	vld1.8	{$in2},[$inp],#16
++	aesd	$dat0,q15
++	aesd	$dat1,q15
++	aesd	$dat2,q15
++	vld1.32 {q8},[$key_],#16			// re-pre-load rndkey[0]
++	add	$cnt,$rounds,#2
++	veor	$tmp0,$rndlast,$dat0
++	veor	$tmp1,$rndlast,$dat1
++	veor	$dat2,$dat2,$rndlast
++	vld1.32 {q9},[$key_],#16			// re-pre-load rndkey[1]
++	vst1.8	{$tmp0},[$out],#16
++	vorr	$dat0,$in0,$in0
++	vst1.8	{$tmp1},[$out],#16
++	vorr	$dat1,$in1,$in1
++	vst1.8	{$dat2},[$out],#16
++	vorr	$dat2,$in2,$in2
++	b.hs	.Loop3x_ecb_dec
++
++	cmn	$len,#0x30
++	b.eq	.Lecb_done
++	nop
++
++.Lecb_dec_tail:
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	vld1.32	{q8},[$key_],#16
++	subs	$cnt,$cnt,#2
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	vld1.32	{q9},[$key_],#16
++	b.gt	.Lecb_dec_tail
++
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	aesd	$dat1,q12
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q12
++	aesimc	$dat2,$dat2
++	cmn	$len,#0x20
++	aesd	$dat1,q13
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q13
++	aesimc	$dat2,$dat2
++	aesd	$dat1,q14
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q14
++	aesimc	$dat2,$dat2
++	aesd	$dat1,q15
++	aesd	$dat2,q15
++	b.eq	.Lecb_dec_one
++	veor	$tmp1,$rndlast,$dat1
++	veor	$tmp2,$rndlast,$dat2
++	vst1.8	{$tmp1},[$out],#16
++	vst1.8	{$tmp2},[$out],#16
++	b	.Lecb_done
++
++.Lecb_dec_one:
++	veor	$tmp1,$rndlast,$dat2
++	vst1.8	{$tmp1},[$out],#16
++
++.Lecb_done:
++___
++}
++$code.=<<___	if ($flavour !~ /64/);
++	vldmia	sp!,{d8-d15}
++	ldmia	sp!,{r4-r8,pc}
++___
++$code.=<<___	if ($flavour =~ /64/);
++	ldr	x29,[sp],#16
++___
++$code.=<<___	if ($flavour =~ /64/);
++.Lecb_Final_abort:
++	ret
++___
++$code.=<<___;
++.size	${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt
++___
++}}}
+ {{{
+ my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5";
+ my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12");
+@@ -519,6 +1377,13 @@ $code.=<<___;
  ___
  {
  my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
@@ -99,7 +936,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
  $code.=<<___;
  .align	5
  .Lcbc_dec:
-@@ -530,7 +565,196 @@ $code.=<<___;
+@@ -535,7 +1400,196 @@ $code.=<<___;
  	vorr	$in0,$dat,$dat
  	vorr	$in1,$dat1,$dat1
  	vorr	$in2,$dat2,$dat2
@@ -225,7 +1062,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
 +	aesimc	$dat3,$dat3
 +	aesd	$dat4,q14
 +	aesimc	$dat4,$dat4
-+
+ 
 +	 veor	$tmp0,$ivec,$rndlast
 +	aesd	$dat0,q15
 +	 veor	$tmp1,$in0,$rndlast
@@ -277,7 +1114,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
 +	b.lo	.Lcbc_dec_tail
 +
 +	b	.Loop3x_cbc_dec
- 
++
 +.align	4
 +.Lcbc_tail4x:
 +	veor	$tmp1,$tmp0,$dat1
@@ -296,7 +1133,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
  .Loop3x_cbc_dec:
  	aesd	$dat0,q8
  	aesimc	$dat0,$dat0
-@@ -691,6 +915,9 @@ my $step="x12";		# aliases with $tctr2
+@@ -696,6 +1750,9 @@ my $step="x12";		# aliases with $tctr2
  my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
  my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
  
@@ -306,10 +1143,10 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
  my ($dat,$tmp)=($dat0,$tmp0);
  
  ### q8-q15	preloaded key schedule
-@@ -743,6 +970,175 @@ $code.=<<___;
- 	rev		$tctr2, $ctr
+@@ -751,6 +1808,175 @@ $code.=<<___;
+ 	vmov.32		${ivec}[3],$tctr2
  	sub		$len,$len,#3		// bias
- 	vmov.32		${dat2}[3],$tctr2
+ 	vorr		$dat2,$ivec,$ivec
 +___
 +$code.=<<___	if ($flavour =~ /64/);
 +	cmp		$len,#2
@@ -482,7 +1319,1440 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
  	b		.Loop3x_ctr32
  
  .align	4
-@@ -955,7 +1351,7 @@ if ($flavour =~ /64/) {			######## 64-bi
+@@ -905,6 +2131,1432 @@ $code.=<<___;
+ .size	${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
+ ___
+ }}}
++# Performance in cycles per byte.
++# Processed with AES-XTS different key size.
++# It shows the value before and after optimization as below:
++# (before/after):
++#
++#		AES-128-XTS		AES-256-XTS
++# Cortex-A57	3.36/1.09		4.02/1.37
++# Cortex-A72	3.03/1.02		3.28/1.33
++
++# Optimization is implemented by loop unrolling and interleaving.
++# Commonly, we choose the unrolling factor as 5, if the input
++# data size smaller than 5 blocks, but not smaller than 3 blocks,
++# choose 3 as the unrolling factor.
++# If the input data size dsize >= 5*16 bytes, then take 5 blocks
++# as one iteration, every loop the left size lsize -= 5*16.
++# If lsize < 5*16 bytes, treat them as the tail. Note: left 4*16 bytes
++# will be processed specially, which be integrated into the 5*16 bytes
++# loop to improve the efficiency.
++# There is one special case, if the original input data size dsize
++# = 16 bytes, we will treat it seperately to improve the
++# performance: one independent code block without LR, FP load and
++# store.
++# Encryption will process the (length -tailcnt) bytes as mentioned
++# previously, then encrypt the composite block as last second
++# cipher block.
++# Decryption will process the (length -tailcnt -1) bytes as mentioned
++# previously, then decrypt the last second cipher block to get the
++# last plain block(tail), decrypt the composite block as last second
++# plain text block.
++
++{{{
++my ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5));
++my ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10");
++my ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20");
++my ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19");
++my ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11");
++my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));
++my ($iv0,$iv1,$iv2,$iv3,$iv4)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b");
++my ($ivd00,$ivd01,$ivd20,$ivd21)=("d6","v6.d[1]","d9","v9.d[1]");
++my ($ivd10,$ivd11,$ivd30,$ivd31,$ivd40,$ivd41)=("d8","v8.d[1]","d10","v10.d[1]","d11","v11.d[1]");
++
++my ($tmpin)=("v26.16b");
++my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
++
++# q7	last round key
++# q10-q15, q7	Last 7 round keys
++# q8-q9	preloaded round keys except last 7 keys for big size
++# q20, q21, q8-q9	preloaded round keys except last 7 keys for only 16 byte
++
++
++my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
++
++my ($dat3,$in3,$tmp3);	# used only in 64-bit mode
++my ($dat4,$in4,$tmp4);
++if ($flavour =~ /64/) {
++    ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23));
++}
++
++$code.=<<___	if ($flavour =~ /64/);
++.globl	${prefix}_xts_encrypt
++.type	${prefix}_xts_encrypt,%function
++.align	5
++${prefix}_xts_encrypt:
++___
++$code.=<<___	if ($flavour =~ /64/);
++	cmp	$len,#16
++	// Original input data size bigger than 16, jump to big size processing.
++	b.ne	.Lxts_enc_big_size
++	// Encrypt the iv with key2, as the first XEX iv.
++	ldr	$rounds,[$key2,#240]
++	vld1.8	{$dat},[$key2],#16
++	vld1.8	{$iv0},[$ivp]
++	sub	$rounds,$rounds,#2
++	vld1.8	{$dat1},[$key2],#16
++
++.Loop_enc_iv_enc:
++	aese	$iv0,$dat
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat},[$key2],#16
++	subs	$rounds,$rounds,#2
++	aese	$iv0,$dat1
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat1},[$key2],#16
++	b.gt	.Loop_enc_iv_enc
++
++	aese	$iv0,$dat
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat},[$key2]
++	aese	$iv0,$dat1
++	veor	$iv0,$iv0,$dat
++
++	vld1.8	{$dat0},[$inp]
++	veor	$dat0,$iv0,$dat0
++
++	ldr	$rounds,[$key1,#240]
++	vld1.32	{q20-q21},[$key1],#32		// load key schedule...
++
++	aese	$dat0,q20
++	aesmc	$dat0,$dat0
++	vld1.32	{q8-q9},[$key1],#32		// load key schedule...
++	aese	$dat0,q21
++	aesmc	$dat0,$dat0
++	subs	$rounds,$rounds,#10		// if rounds==10, jump to aes-128-xts processing
++	b.eq	.Lxts_128_enc
++.Lxts_enc_round_loop:
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	vld1.32	{q8},[$key1],#16		// load key schedule...
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	vld1.32	{q9},[$key1],#16		// load key schedule...
++	subs	$rounds,$rounds,#2		// bias
++	b.gt	.Lxts_enc_round_loop
++.Lxts_128_enc:
++	vld1.32	{q10-q11},[$key1],#32		// load key schedule...
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	vld1.32	{q12-q13},[$key1],#32		// load key schedule...
++	aese	$dat0,q10
++	aesmc	$dat0,$dat0
++	aese	$dat0,q11
++	aesmc	$dat0,$dat0
++	vld1.32	{q14-q15},[$key1],#32		// load key schedule...
++	aese	$dat0,q12
++	aesmc	$dat0,$dat0
++	aese	$dat0,q13
++	aesmc	$dat0,$dat0
++	vld1.32	{$rndlast},[$key1]
++	aese	$dat0,q14
++	aesmc	$dat0,$dat0
++	aese	$dat0,q15
++	veor	$dat0,$dat0,$rndlast
++	veor	$dat0,$dat0,$iv0
++	vst1.8	{$dat0},[$out]
++	b	.Lxts_enc_final_abort
++
++.align	4
++.Lxts_enc_big_size:
++___
++$code.=<<___	if ($flavour =~ /64/);
++	stp	$constnumx,$tmpinp,[sp,#-64]!
++	stp	$tailcnt,$midnumx,[sp,#48]
++	stp	$ivd10,$ivd20,[sp,#32]
++	stp	$ivd30,$ivd40,[sp,#16]
++
++	// tailcnt store the tail value of length%16.
++	and	$tailcnt,$len,#0xf
++	and	$len,$len,#-16
++	subs	$len,$len,#16
++	mov	$step,#16
++	b.lo	.Lxts_abort
++	csel	$step,xzr,$step,eq
++
++	// Firstly, encrypt the iv with key2, as the first iv of XEX.
++	ldr	$rounds,[$key2,#240]
++	vld1.32	{$dat},[$key2],#16
++	vld1.8	{$iv0},[$ivp]
++	sub	$rounds,$rounds,#2
++	vld1.32	{$dat1},[$key2],#16
++
++.Loop_iv_enc:
++	aese	$iv0,$dat
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat},[$key2],#16
++	subs	$rounds,$rounds,#2
++	aese	$iv0,$dat1
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat1},[$key2],#16
++	b.gt	.Loop_iv_enc
++
++	aese	$iv0,$dat
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat},[$key2]
++	aese	$iv0,$dat1
++	veor	$iv0,$iv0,$dat
++
++	// The iv for second block
++	// $ivl- iv(low), $ivh - iv(high)
++	// the five ivs stored into, $iv0,$iv1,$iv2,$iv3,$iv4
++	fmov	$ivl,$ivd00
++	fmov	$ivh,$ivd01
++	mov	$constnum,#0x87
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr#31
++	eor	$ivl,$tmpmx,$ivl,lsl#1
++	fmov	$ivd10,$ivl
++	fmov	$ivd11,$ivh
++
++	ldr	$rounds0,[$key1,#240]		// next starting point
++	vld1.8	{$dat},[$inp],$step
++
++	vld1.32	{q8-q9},[$key1]			// load key schedule...
++	sub	$rounds0,$rounds0,#6
++	add	$key_,$key1,$ivp,lsl#4		// pointer to last 7 round keys
++	sub	$rounds0,$rounds0,#2
++	vld1.32	{q10-q11},[$key_],#32
++	vld1.32	{q12-q13},[$key_],#32
++	vld1.32	{q14-q15},[$key_],#32
++	vld1.32	{$rndlast},[$key_]
++
++	add	$key_,$key1,#32
++	mov	$rounds,$rounds0
++
++	// Encryption
++.Lxts_enc:
++	vld1.8	{$dat2},[$inp],#16
++	subs	$len,$len,#32			// bias
++	add	$rounds,$rounds0,#2
++	vorr	$in1,$dat,$dat
++	vorr	$dat1,$dat,$dat
++	vorr	$in3,$dat,$dat
++	vorr	$in2,$dat2,$dat2
++	vorr	$in4,$dat2,$dat2
++	b.lo	.Lxts_inner_enc_tail
++	veor	$dat,$dat,$iv0			// before encryption, xor with iv
++	veor	$dat2,$dat2,$iv1
++
++	// The iv for third block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr#31
++	eor	$ivl,$tmpmx,$ivl,lsl#1
++	fmov	$ivd20,$ivl
++	fmov	$ivd21,$ivh
++
++
++	vorr	$dat1,$dat2,$dat2
++	vld1.8	{$dat2},[$inp],#16
++	vorr	$in0,$dat,$dat
++	vorr	$in1,$dat1,$dat1
++	veor	$in2,$dat2,$iv2 		// the third block
++	veor	$dat2,$dat2,$iv2
++	cmp	$len,#32
++	b.lo	.Lxts_outer_enc_tail
++
++	// The iv for fourth block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr#31
++	eor	$ivl,$tmpmx,$ivl,lsl#1
++	fmov	$ivd30,$ivl
++	fmov	$ivd31,$ivh
++
++	vld1.8	{$dat3},[$inp],#16
++	// The iv for fifth block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr#31
++	eor	$ivl,$tmpmx,$ivl,lsl#1
++	fmov	$ivd40,$ivl
++	fmov	$ivd41,$ivh
++
++	vld1.8	{$dat4},[$inp],#16
++	veor	$dat3,$dat3,$iv3		// the fourth block
++	veor	$dat4,$dat4,$iv4
++	sub	$len,$len,#32			// bias
++	mov	$rounds,$rounds0
++	b	.Loop5x_xts_enc
++
++.align	4
++.Loop5x_xts_enc:
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	aese	$dat3,q8
++	aesmc	$dat3,$dat3
++	aese	$dat4,q8
++	aesmc	$dat4,$dat4
++	vld1.32	{q8},[$key_],#16
++	subs	$rounds,$rounds,#2
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	aese	$dat3,q9
++	aesmc	$dat3,$dat3
++	aese	$dat4,q9
++	aesmc	$dat4,$dat4
++	vld1.32	{q9},[$key_],#16
++	b.gt	.Loop5x_xts_enc
++
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	aese	$dat3,q8
++	aesmc	$dat3,$dat3
++	aese	$dat4,q8
++	aesmc	$dat4,$dat4
++	subs	$len,$len,#0x50			// because .Lxts_enc_tail4x
++
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	aese	$dat3,q9
++	aesmc	$dat3,$dat3
++	aese	$dat4,q9
++	aesmc	$dat4,$dat4
++	csel	$xoffset,xzr,$len,gt		// borrow x6, w6, "gt" is not typo
++	mov	$key_,$key1
++
++	aese	$dat0,q10
++	aesmc	$dat0,$dat0
++	aese	$dat1,q10
++	aesmc	$dat1,$dat1
++	aese	$dat2,q10
++	aesmc	$dat2,$dat2
++	aese	$dat3,q10
++	aesmc	$dat3,$dat3
++	aese	$dat4,q10
++	aesmc	$dat4,$dat4
++	add	$inp,$inp,$xoffset		// x0 is adjusted in such way that
++						// at exit from the loop v1.16b-v26.16b
++						// are loaded with last "words"
++	add	$xoffset,$len,#0x60		// because .Lxts_enc_tail4x
++
++	aese	$dat0,q11
++	aesmc	$dat0,$dat0
++	aese	$dat1,q11
++	aesmc	$dat1,$dat1
++	aese	$dat2,q11
++	aesmc	$dat2,$dat2
++	aese	$dat3,q11
++	aesmc	$dat3,$dat3
++	aese	$dat4,q11
++	aesmc	$dat4,$dat4
++
++	aese	$dat0,q12
++	aesmc	$dat0,$dat0
++	aese	$dat1,q12
++	aesmc	$dat1,$dat1
++	aese	$dat2,q12
++	aesmc	$dat2,$dat2
++	aese	$dat3,q12
++	aesmc	$dat3,$dat3
++	aese	$dat4,q12
++	aesmc	$dat4,$dat4
++
++	aese	$dat0,q13
++	aesmc	$dat0,$dat0
++	aese	$dat1,q13
++	aesmc	$dat1,$dat1
++	aese	$dat2,q13
++	aesmc	$dat2,$dat2
++	aese	$dat3,q13
++	aesmc	$dat3,$dat3
++	aese	$dat4,q13
++	aesmc	$dat4,$dat4
++
++	aese	$dat0,q14
++	aesmc	$dat0,$dat0
++	aese	$dat1,q14
++	aesmc	$dat1,$dat1
++	aese	$dat2,q14
++	aesmc	$dat2,$dat2
++	aese	$dat3,q14
++	aesmc	$dat3,$dat3
++	aese	$dat4,q14
++	aesmc	$dat4,$dat4
++
++	veor	$tmp0,$rndlast,$iv0
++	aese	$dat0,q15
++	// The iv for first block of one iteration
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr#31
++	eor	$ivl,$tmpmx,$ivl,lsl#1
++	fmov	$ivd00,$ivl
++	fmov	$ivd01,$ivh
++	veor	$tmp1,$rndlast,$iv1
++	vld1.8	{$in0},[$inp],#16
++	aese	$dat1,q15
++	// The iv for second block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr#31
++	eor	$ivl,$tmpmx,$ivl,lsl#1
++	fmov	$ivd10,$ivl
++	fmov	$ivd11,$ivh
++	veor	$tmp2,$rndlast,$iv2
++	vld1.8	{$in1},[$inp],#16
++	aese	$dat2,q15
++	// The iv for third block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr#31
++	eor	$ivl,$tmpmx,$ivl,lsl#1
++	fmov	$ivd20,$ivl
++	fmov	$ivd21,$ivh
++	veor	$tmp3,$rndlast,$iv3
++	vld1.8	{$in2},[$inp],#16
++	aese	$dat3,q15
++	// The iv for fourth block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr#31
++	eor	$ivl,$tmpmx,$ivl,lsl#1
++	fmov	$ivd30,$ivl
++	fmov	$ivd31,$ivh
++	veor	$tmp4,$rndlast,$iv4
++	vld1.8	{$in3},[$inp],#16
++	aese	$dat4,q15
++
++	// The iv for fifth block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd40,$ivl
++	fmov	$ivd41,$ivh
++
++	vld1.8	{$in4},[$inp],#16
++	cbz	$xoffset,.Lxts_enc_tail4x
++	vld1.32 {q8},[$key_],#16		// re-pre-load rndkey[0]
++	veor	$tmp0,$tmp0,$dat0
++	veor	$dat0,$in0,$iv0
++	veor	$tmp1,$tmp1,$dat1
++	veor	$dat1,$in1,$iv1
++	veor	$tmp2,$tmp2,$dat2
++	veor	$dat2,$in2,$iv2
++	veor	$tmp3,$tmp3,$dat3
++	veor	$dat3,$in3,$iv3
++	veor	$tmp4,$tmp4,$dat4
++	vst1.8	{$tmp0},[$out],#16
++	veor	$dat4,$in4,$iv4
++	vst1.8	{$tmp1},[$out],#16
++	mov	$rounds,$rounds0
++	vst1.8	{$tmp2},[$out],#16
++	vld1.32	{q9},[$key_],#16		// re-pre-load rndkey[1]
++	vst1.8	{$tmp3},[$out],#16
++	vst1.8	{$tmp4},[$out],#16
++	b.hs	.Loop5x_xts_enc
++
++
++	// If left 4 blocks, borrow the five block's processing.
++	cmn	$len,#0x10
++	b.ne	.Loop5x_enc_after
++	vorr	$iv4,$iv3,$iv3
++	vorr	$iv3,$iv2,$iv2
++	vorr	$iv2,$iv1,$iv1
++	vorr	$iv1,$iv0,$iv0
++	fmov	$ivl,$ivd40
++	fmov	$ivh,$ivd41
++	veor	$dat0,$iv0,$in0
++	veor	$dat1,$iv1,$in1
++	veor	$dat2,$in2,$iv2
++	veor	$dat3,$in3,$iv3
++	veor	$dat4,$in4,$iv4
++	b.eq	.Loop5x_xts_enc
++
++.Loop5x_enc_after:
++	add	$len,$len,#0x50
++	cbz	$len,.Lxts_enc_done
++
++	add	$rounds,$rounds0,#2
++	subs	$len,$len,#0x30
++	b.lo	.Lxts_inner_enc_tail
++
++	veor	$dat0,$iv0,$in2
++	veor	$dat1,$iv1,$in3
++	veor	$dat2,$in4,$iv2
++	b	.Lxts_outer_enc_tail
++
++.align	4
++.Lxts_enc_tail4x:
++	add	$inp,$inp,#16
++	veor	$tmp1,$dat1,$tmp1
++	vst1.8	{$tmp1},[$out],#16
++	veor	$tmp2,$dat2,$tmp2
++	vst1.8	{$tmp2},[$out],#16
++	veor	$tmp3,$dat3,$tmp3
++	veor	$tmp4,$dat4,$tmp4
++	vst1.8	{$tmp3-$tmp4},[$out],#32
++
++	b	.Lxts_enc_done
++.align	4
++.Lxts_outer_enc_tail:
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	vld1.32	{q8},[$key_],#16
++	subs	$rounds,$rounds,#2
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	vld1.32	{q9},[$key_],#16
++	b.gt	.Lxts_outer_enc_tail
++
++	aese	$dat0,q8
++	aesmc	$dat0,$dat0
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	veor	$tmp0,$iv0,$rndlast
++	subs	$len,$len,#0x30
++	// The iv for first block
++	fmov	$ivl,$ivd20
++	fmov	$ivh,$ivd21
++	//mov	$constnum,#0x87
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr#31
++	eor	$ivl,$tmpmx,$ivl,lsl#1
++	fmov	$ivd00,$ivl
++	fmov	$ivd01,$ivh
++	veor	$tmp1,$iv1,$rndlast
++	csel	$xoffset,$len,$xoffset,lo       // x6, w6, is zero at this point
++	aese	$dat0,q9
++	aesmc	$dat0,$dat0
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	veor	$tmp2,$iv2,$rndlast
++
++	add	$xoffset,$xoffset,#0x20
++	add	$inp,$inp,$xoffset
++	mov	$key_,$key1
++
++	aese	$dat0,q12
++	aesmc	$dat0,$dat0
++	aese	$dat1,q12
++	aesmc	$dat1,$dat1
++	aese	$dat2,q12
++	aesmc	$dat2,$dat2
++	aese	$dat0,q13
++	aesmc	$dat0,$dat0
++	aese	$dat1,q13
++	aesmc	$dat1,$dat1
++	aese	$dat2,q13
++	aesmc	$dat2,$dat2
++	aese	$dat0,q14
++	aesmc	$dat0,$dat0
++	aese	$dat1,q14
++	aesmc	$dat1,$dat1
++	aese	$dat2,q14
++	aesmc	$dat2,$dat2
++	aese	$dat0,q15
++	aese	$dat1,q15
++	aese	$dat2,q15
++	vld1.8	{$in2},[$inp],#16
++	add	$rounds,$rounds0,#2
++	vld1.32	{q8},[$key_],#16                // re-pre-load rndkey[0]
++	veor	$tmp0,$tmp0,$dat0
++	veor	$tmp1,$tmp1,$dat1
++	veor	$dat2,$dat2,$tmp2
++	vld1.32	{q9},[$key_],#16                // re-pre-load rndkey[1]
++	vst1.8	{$tmp0},[$out],#16
++	vst1.8	{$tmp1},[$out],#16
++	vst1.8	{$dat2},[$out],#16
++	cmn	$len,#0x30
++	b.eq	.Lxts_enc_done
++.Lxts_encxor_one:
++	vorr	$in3,$in1,$in1
++	vorr	$in4,$in2,$in2
++	nop
++
++.Lxts_inner_enc_tail:
++	cmn	$len,#0x10
++	veor	$dat1,$in3,$iv0
++	veor	$dat2,$in4,$iv1
++	b.eq	.Lxts_enc_tail_loop
++	veor	$dat2,$in4,$iv0
++.Lxts_enc_tail_loop:
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	vld1.32	{q8},[$key_],#16
++	subs	$rounds,$rounds,#2
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	vld1.32	{q9},[$key_],#16
++	b.gt	.Lxts_enc_tail_loop
++
++	aese	$dat1,q8
++	aesmc	$dat1,$dat1
++	aese	$dat2,q8
++	aesmc	$dat2,$dat2
++	aese	$dat1,q9
++	aesmc	$dat1,$dat1
++	aese	$dat2,q9
++	aesmc	$dat2,$dat2
++	aese	$dat1,q12
++	aesmc	$dat1,$dat1
++	aese	$dat2,q12
++	aesmc	$dat2,$dat2
++	cmn	$len,#0x20
++	aese	$dat1,q13
++	aesmc	$dat1,$dat1
++	aese	$dat2,q13
++	aesmc	$dat2,$dat2
++	veor	$tmp1,$iv0,$rndlast
++	aese	$dat1,q14
++	aesmc	$dat1,$dat1
++	aese	$dat2,q14
++	aesmc	$dat2,$dat2
++	veor	$tmp2,$iv1,$rndlast
++	aese	$dat1,q15
++	aese	$dat2,q15
++	b.eq	.Lxts_enc_one
++	veor	$tmp1,$tmp1,$dat1
++	vst1.8	{$tmp1},[$out],#16
++	veor	$tmp2,$tmp2,$dat2
++	vorr	$iv0,$iv1,$iv1
++	vst1.8	{$tmp2},[$out],#16
++	fmov	$ivl,$ivd10
++	fmov	$ivh,$ivd11
++	mov	$constnum,#0x87
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd00,$ivl
++	fmov	$ivd01,$ivh
++	b	.Lxts_enc_done
++
++.Lxts_enc_one:
++	veor	$tmp1,$tmp1,$dat2
++	vorr	$iv0,$iv0,$iv0
++	vst1.8	{$tmp1},[$out],#16
++	fmov	$ivl,$ivd00
++	fmov	$ivh,$ivd01
++	mov	$constnum,#0x87
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd00,$ivl
++	fmov	$ivd01,$ivh
++	b	.Lxts_enc_done
++.align	5
++.Lxts_enc_done:
++	// Process the tail block with cipher stealing.
++	tst	$tailcnt,#0xf
++	b.eq	.Lxts_abort
++
++	mov	$tmpinp,$inp
++	mov	$tmpoutp,$out
++	sub	$out,$out,#16
++.composite_enc_loop:
++	subs	$tailcnt,$tailcnt,#1
++	ldrb	$l2outp,[$out,$tailcnt]
++	ldrb	$loutp,[$tmpinp,$tailcnt]
++	strb	$l2outp,[$tmpoutp,$tailcnt]
++	strb	$loutp,[$out,$tailcnt]
++	b.gt	.composite_enc_loop
++.Lxts_enc_load_done:
++	vld1.8	{$tmpin},[$out]
++	veor	$tmpin,$tmpin,$iv0
++
++	// Encrypt the composite block to get the last second encrypted text block
++	ldr	$rounds,[$key1,#240]		// load key schedule...
++	vld1.8	{$dat},[$key1],#16
++	sub	$rounds,$rounds,#2
++	vld1.8	{$dat1},[$key1],#16		// load key schedule...
++.Loop_final_enc:
++	aese	$tmpin,$dat0
++	aesmc	$tmpin,$tmpin
++	vld1.32	{$dat0},[$key1],#16
++	subs	$rounds,$rounds,#2
++	aese	$tmpin,$dat1
++	aesmc	$tmpin,$tmpin
++	vld1.32	{$dat1},[$key1],#16
++	b.gt	.Loop_final_enc
++
++	aese	$tmpin,$dat0
++	aesmc	$tmpin,$tmpin
++	vld1.32	{$dat0},[$key1]
++	aese	$tmpin,$dat1
++	veor	$tmpin,$tmpin,$dat0
++	veor	$tmpin,$tmpin,$iv0
++	vst1.8	{$tmpin},[$out]
++
++.Lxts_abort:
++	ldp	$tailcnt,$midnumx,[sp,#48]
++	ldp	$ivd10,$ivd20,[sp,#32]
++	ldp	$ivd30,$ivd40,[sp,#16]
++	ldp	$constnumx,$tmpinp,[sp],#64
++.Lxts_enc_final_abort:
++	ret
++.size	${prefix}_xts_encrypt,.-${prefix}_xts_encrypt
++___
++
++}}}
++{{{
++my ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5));
++my ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10");
++my ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20");
++my ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19");
++my ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11");
++my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));
++my ($iv0,$iv1,$iv2,$iv3,$iv4,$tmpin)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b","v26.16b");
++my ($ivd00,$ivd01,$ivd20,$ivd21)=("d6","v6.d[1]","d9","v9.d[1]");
++my ($ivd10,$ivd11,$ivd30,$ivd31,$ivd40,$ivd41)=("d8","v8.d[1]","d10","v10.d[1]","d11","v11.d[1]");
++
++my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
++
++# q7	last round key
++# q10-q15, q7	Last 7 round keys
++# q8-q9	preloaded round keys except last 7 keys for big size
++# q20, q21, q8-q9	preloaded round keys except last 7 keys for only 16 byte
++
++{
++my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
++
++my ($dat3,$in3,$tmp3);	# used only in 64-bit mode
++my ($dat4,$in4,$tmp4);
++if ($flavour =~ /64/) {
++    ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23));
++}
++
++$code.=<<___	if ($flavour =~ /64/);
++.globl	${prefix}_xts_decrypt
++.type	${prefix}_xts_decrypt,%function
++.align	5
++${prefix}_xts_decrypt:
++___
++$code.=<<___	if ($flavour =~ /64/);
++	cmp	$len,#16
++	// Original input data size bigger than 16, jump to big size processing.
++	b.ne	.Lxts_dec_big_size
++	// Encrypt the iv with key2, as the first XEX iv.
++	ldr	$rounds,[$key2,#240]
++	vld1.8	{$dat},[$key2],#16
++	vld1.8	{$iv0},[$ivp]
++	sub	$rounds,$rounds,#2
++	vld1.8	{$dat1},[$key2],#16
++
++.Loop_dec_small_iv_enc:
++	aese	$iv0,$dat
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat},[$key2],#16
++	subs	$rounds,$rounds,#2
++	aese	$iv0,$dat1
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat1},[$key2],#16
++	b.gt	.Loop_dec_small_iv_enc
++
++	aese	$iv0,$dat
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat},[$key2]
++	aese	$iv0,$dat1
++	veor	$iv0,$iv0,$dat
++
++	vld1.8	{$dat0},[$inp]
++	veor	$dat0,$iv0,$dat0
++
++	ldr	$rounds,[$key1,#240]
++	vld1.32	{q20-q21},[$key1],#32			// load key schedule...
++
++	aesd	$dat0,q20
++	aesimc	$dat0,$dat0
++	vld1.32	{q8-q9},[$key1],#32			// load key schedule...
++	aesd	$dat0,q21
++	aesimc	$dat0,$dat0
++	subs	$rounds,$rounds,#10			// bias
++	b.eq	.Lxts_128_dec
++.Lxts_dec_round_loop:
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	vld1.32	{q8},[$key1],#16			// load key schedule...
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat0
++	vld1.32	{q9},[$key1],#16			// load key schedule...
++	subs	$rounds,$rounds,#2			// bias
++	b.gt	.Lxts_dec_round_loop
++.Lxts_128_dec:
++	vld1.32	{q10-q11},[$key1],#32			// load key schedule...
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat0
++	vld1.32	{q12-q13},[$key1],#32			// load key schedule...
++	aesd	$dat0,q10
++	aesimc	$dat0,$dat0
++	aesd	$dat0,q11
++	aesimc	$dat0,$dat0
++	vld1.32	{q14-q15},[$key1],#32			// load key schedule...
++	aesd	$dat0,q12
++	aesimc	$dat0,$dat0
++	aesd	$dat0,q13
++	aesimc	$dat0,$dat0
++	vld1.32	{$rndlast},[$key1]
++	aesd	$dat0,q14
++	aesimc	$dat0,$dat0
++	aesd	$dat0,q15
++	veor	$dat0,$dat0,$rndlast
++	veor	$dat0,$iv0,$dat0
++	vst1.8	{$dat0},[$out]
++	b	.Lxts_dec_final_abort
++.Lxts_dec_big_size:
++___
++$code.=<<___	if ($flavour =~ /64/);
++	stp	$constnumx,$tmpinp,[sp,#-64]!
++	stp	$tailcnt,$midnumx,[sp,#48]
++	stp	$ivd10,$ivd20,[sp,#32]
++	stp	$ivd30,$ivd40,[sp,#16]
++
++	and	$tailcnt,$len,#0xf
++	and	$len,$len,#-16
++	subs	$len,$len,#16
++	mov	$step,#16
++	b.lo	.Lxts_dec_abort
++
++	// Encrypt the iv with key2, as the first XEX iv
++	ldr	$rounds,[$key2,#240]
++	vld1.8	{$dat},[$key2],#16
++	vld1.8	{$iv0},[$ivp]
++	sub	$rounds,$rounds,#2
++	vld1.8	{$dat1},[$key2],#16
++
++.Loop_dec_iv_enc:
++	aese	$iv0,$dat
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat},[$key2],#16
++	subs	$rounds,$rounds,#2
++	aese	$iv0,$dat1
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat1},[$key2],#16
++	b.gt	.Loop_dec_iv_enc
++
++	aese	$iv0,$dat
++	aesmc	$iv0,$iv0
++	vld1.32	{$dat},[$key2]
++	aese	$iv0,$dat1
++	veor	$iv0,$iv0,$dat
++
++	// The iv for second block
++	// $ivl- iv(low), $ivh - iv(high)
++	// the five ivs stored into, $iv0,$iv1,$iv2,$iv3,$iv4
++	fmov	$ivl,$ivd00
++	fmov	$ivh,$ivd01
++	mov	$constnum,#0x87
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd10,$ivl
++	fmov	$ivd11,$ivh
++
++	ldr	$rounds0,[$key1,#240]		// load rounds number
++
++	// The iv for third block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd20,$ivl
++	fmov	$ivd21,$ivh
++
++	vld1.32	{q8-q9},[$key1]			// load key schedule...
++	sub	$rounds0,$rounds0,#6
++	add	$key_,$key1,$ivp,lsl#4		// pointer to last 7 round keys
++	sub	$rounds0,$rounds0,#2
++	vld1.32	{q10-q11},[$key_],#32		// load key schedule...
++	vld1.32	{q12-q13},[$key_],#32
++	vld1.32	{q14-q15},[$key_],#32
++	vld1.32	{$rndlast},[$key_]
++
++	// The iv for fourth block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd30,$ivl
++	fmov	$ivd31,$ivh
++
++	add	$key_,$key1,#32
++	mov	$rounds,$rounds0
++	b	.Lxts_dec
++
++	// Decryption
++.align	5
++.Lxts_dec:
++	tst	$tailcnt,#0xf
++	b.eq	.Lxts_dec_begin
++	subs	$len,$len,#16
++	csel	$step,xzr,$step,eq
++	vld1.8	{$dat},[$inp],#16
++	b.lo	.Lxts_done
++	sub	$inp,$inp,#16
++.Lxts_dec_begin:
++	vld1.8	{$dat},[$inp],$step
++	subs	$len,$len,#32			// bias
++	add	$rounds,$rounds0,#2
++	vorr	$in1,$dat,$dat
++	vorr	$dat1,$dat,$dat
++	vorr	$in3,$dat,$dat
++	vld1.8	{$dat2},[$inp],#16
++	vorr	$in2,$dat2,$dat2
++	vorr	$in4,$dat2,$dat2
++	b.lo	.Lxts_inner_dec_tail
++	veor	$dat,$dat,$iv0			// before decryt, xor with iv
++	veor	$dat2,$dat2,$iv1
++
++	vorr	$dat1,$dat2,$dat2
++	vld1.8	{$dat2},[$inp],#16
++	vorr	$in0,$dat,$dat
++	vorr	$in1,$dat1,$dat1
++	veor	$in2,$dat2,$iv2			// third block xox with third iv
++	veor	$dat2,$dat2,$iv2
++	cmp	$len,#32
++	b.lo	.Lxts_outer_dec_tail
++
++	vld1.8	{$dat3},[$inp],#16
++
++	// The iv for fifth block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd40,$ivl
++	fmov	$ivd41,$ivh
++
++	vld1.8	{$dat4},[$inp],#16
++	veor	$dat3,$dat3,$iv3		// the fourth block
++	veor	$dat4,$dat4,$iv4
++	sub $len,$len,#32			// bias
++	mov	$rounds,$rounds0
++	b	.Loop5x_xts_dec
++
++.align	4
++.Loop5x_xts_dec:
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q8
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q8
++	aesimc	$dat4,$dat4
++	vld1.32	{q8},[$key_],#16		// load key schedule...
++	subs	$rounds,$rounds,#2
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q9
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q9
++	aesimc	$dat4,$dat4
++	vld1.32	{q9},[$key_],#16		// load key schedule...
++	b.gt	.Loop5x_xts_dec
++
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q8
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q8
++	aesimc	$dat4,$dat4
++	subs	$len,$len,#0x50			// because .Lxts_dec_tail4x
++
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q9
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q9
++	aesimc	$dat4,$dat4
++	csel	$xoffset,xzr,$len,gt		// borrow x6, w6, "gt" is not typo
++	mov	$key_,$key1
++
++	aesd	$dat0,q10
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q10
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q10
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q10
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q10
++	aesimc	$dat4,$dat4
++	add	$inp,$inp,$xoffset		// x0 is adjusted in such way that
++						// at exit from the loop v1.16b-v26.16b
++						// are loaded with last "words"
++	add	$xoffset,$len,#0x60		// because .Lxts_dec_tail4x
++
++	aesd	$dat0,q11
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q11
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q11
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q11
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q11
++	aesimc	$dat4,$dat4
++
++	aesd	$dat0,q12
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q12
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q12
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q12
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q12
++	aesimc	$dat4,$dat4
++
++	aesd	$dat0,q13
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q13
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q13
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q13
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q13
++	aesimc	$dat4,$dat4
++
++	aesd	$dat0,q14
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q14
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q14
++	aesimc	$dat2,$dat2
++	aesd	$dat3,q14
++	aesimc	$dat3,$dat3
++	aesd	$dat4,q14
++	aesimc	$dat4,$dat4
++
++	veor	$tmp0,$rndlast,$iv0
++	aesd	$dat0,q15
++	// The iv for first block of next iteration.
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd00,$ivl
++	fmov	$ivd01,$ivh
++	veor	$tmp1,$rndlast,$iv1
++	vld1.8	{$in0},[$inp],#16
++	aesd	$dat1,q15
++	// The iv for second block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd10,$ivl
++	fmov	$ivd11,$ivh
++	veor	$tmp2,$rndlast,$iv2
++	vld1.8	{$in1},[$inp],#16
++	aesd	$dat2,q15
++	// The iv for third block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd20,$ivl
++	fmov	$ivd21,$ivh
++	veor	$tmp3,$rndlast,$iv3
++	vld1.8	{$in2},[$inp],#16
++	aesd	$dat3,q15
++	// The iv for fourth block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd30,$ivl
++	fmov	$ivd31,$ivh
++	veor	$tmp4,$rndlast,$iv4
++	vld1.8	{$in3},[$inp],#16
++	aesd	$dat4,q15
++
++	// The iv for fifth block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd40,$ivl
++	fmov	$ivd41,$ivh
++
++	vld1.8	{$in4},[$inp],#16
++	cbz	$xoffset,.Lxts_dec_tail4x
++	vld1.32	{q8},[$key_],#16		// re-pre-load rndkey[0]
++	veor	$tmp0,$tmp0,$dat0
++	veor	$dat0,$in0,$iv0
++	veor	$tmp1,$tmp1,$dat1
++	veor	$dat1,$in1,$iv1
++	veor	$tmp2,$tmp2,$dat2
++	veor	$dat2,$in2,$iv2
++	veor	$tmp3,$tmp3,$dat3
++	veor	$dat3,$in3,$iv3
++	veor	$tmp4,$tmp4,$dat4
++	vst1.8	{$tmp0},[$out],#16
++	veor	$dat4,$in4,$iv4
++	vst1.8	{$tmp1},[$out],#16
++	mov	$rounds,$rounds0
++	vst1.8	{$tmp2},[$out],#16
++	vld1.32	{q9},[$key_],#16		// re-pre-load rndkey[1]
++	vst1.8	{$tmp3},[$out],#16
++	vst1.8	{$tmp4},[$out],#16
++	b.hs	.Loop5x_xts_dec
++
++	cmn	$len,#0x10
++	b.ne	.Loop5x_dec_after
++	// If x2($len) equal to -0x10, the left blocks is 4.
++	// After specially processing, utilize the five blocks processing again.
++	// It will use the following IVs: $iv0,$iv0,$iv1,$iv2,$iv3.
++	vorr	$iv4,$iv3,$iv3
++	vorr	$iv3,$iv2,$iv2
++	vorr	$iv2,$iv1,$iv1
++	vorr	$iv1,$iv0,$iv0
++	fmov	$ivl,$ivd40
++	fmov	$ivh,$ivd41
++	veor	$dat0,$iv0,$in0
++	veor	$dat1,$iv1,$in1
++	veor	$dat2,$in2,$iv2
++	veor	$dat3,$in3,$iv3
++	veor	$dat4,$in4,$iv4
++	b.eq	.Loop5x_xts_dec
++
++.Loop5x_dec_after:
++	add	$len,$len,#0x50
++	cbz	$len,.Lxts_done
++
++	add	$rounds,$rounds0,#2
++	subs	$len,$len,#0x30
++	b.lo	.Lxts_inner_dec_tail
++
++	veor	$dat0,$iv0,$in2
++	veor	$dat1,$iv1,$in3
++	veor	$dat2,$in4,$iv2
++	b	.Lxts_outer_dec_tail
++
++.align	4
++.Lxts_dec_tail4x:
++	add	$inp,$inp,#16
++	vld1.32	{$dat0},[$inp],#16
++	veor	$tmp1,$dat1,$tmp0
++	vst1.8	{$tmp1},[$out],#16
++	veor	$tmp2,$dat2,$tmp2
++	vst1.8	{$tmp2},[$out],#16
++	veor	$tmp3,$dat3,$tmp3
++	veor	$tmp4,$dat4,$tmp4
++	vst1.8	{$tmp3-$tmp4},[$out],#32
++
++	b	.Lxts_done
++.align	4
++.Lxts_outer_dec_tail:
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	vld1.32	{q8},[$key_],#16
++	subs	$rounds,$rounds,#2
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	vld1.32	{q9},[$key_],#16
++	b.gt	.Lxts_outer_dec_tail
++
++	aesd	$dat0,q8
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	veor	$tmp0,$iv0,$rndlast
++	subs	$len,$len,#0x30
++	// The iv for first block
++	fmov	$ivl,$ivd20
++	fmov	$ivh,$ivd21
++	mov	$constnum,#0x87
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd00,$ivl
++	fmov	$ivd01,$ivh
++	veor	$tmp1,$iv1,$rndlast
++	csel	$xoffset,$len,$xoffset,lo	// x6, w6, is zero at this point
++	aesd	$dat0,q9
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	veor	$tmp2,$iv2,$rndlast
++	// The iv for second block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd10,$ivl
++	fmov	$ivd11,$ivh
++
++	add	$xoffset,$xoffset,#0x20
++	add	$inp,$inp,$xoffset		// $inp is adjusted to the last data
++
++	mov	$key_,$key1
++
++	// The iv for third block
++	extr	$midnumx,$ivh,$ivh,#32
++	extr	$ivh,$ivh,$ivl,#63
++	and	$tmpmw,$constnum,$midnum,asr #31
++	eor	$ivl,$tmpmx,$ivl,lsl #1
++	fmov	$ivd20,$ivl
++	fmov	$ivd21,$ivh
++
++	aesd	$dat0,q12
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q12
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q12
++	aesimc	$dat2,$dat2
++	aesd	$dat0,q13
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q13
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q13
++	aesimc	$dat2,$dat2
++	aesd	$dat0,q14
++	aesimc	$dat0,$dat0
++	aesd	$dat1,q14
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q14
++	aesimc	$dat2,$dat2
++	vld1.8	{$in2},[$inp],#16
++	aesd	$dat0,q15
++	aesd	$dat1,q15
++	aesd	$dat2,q15
++	vld1.32	{q8},[$key_],#16		// re-pre-load rndkey[0]
++	add	$rounds,$rounds0,#2
++	veor	$tmp0,$tmp0,$dat0
++	veor	$tmp1,$tmp1,$dat1
++	veor	$dat2,$dat2,$tmp2
++	vld1.32	{q9},[$key_],#16		// re-pre-load rndkey[1]
++	vst1.8	{$tmp0},[$out],#16
++	vst1.8	{$tmp1},[$out],#16
++	vst1.8	{$dat2},[$out],#16
++
++	cmn	$len,#0x30
++	add	$len,$len,#0x30
++	b.eq	.Lxts_done
++	sub	$len,$len,#0x30
++	vorr	$in3,$in1,$in1
++	vorr	$in4,$in2,$in2
++	nop
++
++.Lxts_inner_dec_tail:
++	// $len == -0x10 means two blocks left.
++	cmn	$len,#0x10
++	veor	$dat1,$in3,$iv0
++	veor	$dat2,$in4,$iv1
++	b.eq	.Lxts_dec_tail_loop
++	veor	$dat2,$in4,$iv0
++.Lxts_dec_tail_loop:
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	vld1.32	{q8},[$key_],#16
++	subs	$rounds,$rounds,#2
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	vld1.32	{q9},[$key_],#16
++	b.gt	.Lxts_dec_tail_loop
++
++	aesd	$dat1,q8
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q8
++	aesimc	$dat2,$dat2
++	aesd	$dat1,q9
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q9
++	aesimc	$dat2,$dat2
++	aesd	$dat1,q12
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q12
++	aesimc	$dat2,$dat2
++	cmn	$len,#0x20
++	aesd	$dat1,q13
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q13
++	aesimc	$dat2,$dat2
++	veor	$tmp1,$iv0,$rndlast
++	aesd	$dat1,q14
++	aesimc	$dat1,$dat1
++	aesd	$dat2,q14
++	aesimc	$dat2,$dat2
++	veor	$tmp2,$iv1,$rndlast
++	aesd	$dat1,q15
++	aesd	$dat2,q15
++	b.eq	.Lxts_dec_one
++	veor	$tmp1,$tmp1,$dat1
++	veor	$tmp2,$tmp2,$dat2
++	vorr	$iv0,$iv2,$iv2
++	vorr	$iv1,$iv3,$iv3
++	vst1.8	{$tmp1},[$out],#16
++	vst1.8	{$tmp2},[$out],#16
++	add	$len,$len,#16
++	b	.Lxts_done
++
++.Lxts_dec_one:
++	veor	$tmp1,$tmp1,$dat2
++	vorr	$iv0,$iv1,$iv1
++	vorr	$iv1,$iv2,$iv2
++	vst1.8	{$tmp1},[$out],#16
++	add	$len,$len,#32
++
++.Lxts_done:
++	tst	$tailcnt,#0xf
++	b.eq	.Lxts_dec_abort
++	// Processing the last two blocks with cipher stealing.
++	mov	x7,x3
++	cbnz	x2,.Lxts_dec_1st_done
++	vld1.32	{$dat0},[$inp],#16
++
++	// Decrypt the last secod block to get the last plain text block
++.Lxts_dec_1st_done:
++	eor	$tmpin,$dat0,$iv1
++	ldr	$rounds,[$key1,#240]
++	vld1.32	{$dat0},[$key1],#16
++	sub	$rounds,$rounds,#2
++	vld1.32	{$dat1},[$key1],#16
++.Loop_final_2nd_dec:
++	aesd	$tmpin,$dat0
++	aesimc	$tmpin,$tmpin
++	vld1.32	{$dat0},[$key1],#16		// load key schedule...
++	subs	$rounds,$rounds,#2
++	aesd	$tmpin,$dat1
++	aesimc	$tmpin,$tmpin
++	vld1.32	{$dat1},[$key1],#16		// load key schedule...
++	b.gt	.Loop_final_2nd_dec
++
++	aesd	$tmpin,$dat0
++	aesimc	$tmpin,$tmpin
++	vld1.32	{$dat0},[$key1]
++	aesd	$tmpin,$dat1
++	veor	$tmpin,$tmpin,$dat0
++	veor	$tmpin,$tmpin,$iv1
++	vst1.8	{$tmpin},[$out]
++
++	mov	$tmpinp,$inp
++	add	$tmpoutp,$out,#16
++
++	// Composite the tailcnt "16 byte not aligned block" into the last second plain blocks
++	// to get the last encrypted block.
++.composite_dec_loop:
++	subs	$tailcnt,$tailcnt,#1
++	ldrb	$l2outp,[$out,$tailcnt]
++	ldrb	$loutp,[$tmpinp,$tailcnt]
++	strb	$l2outp,[$tmpoutp,$tailcnt]
++	strb	$loutp,[$out,$tailcnt]
++	b.gt	.composite_dec_loop
++.Lxts_dec_load_done:
++	vld1.8	{$tmpin},[$out]
++	veor	$tmpin,$tmpin,$iv0
++
++	// Decrypt the composite block to get the last second plain text block
++	ldr	$rounds,[$key_,#240]
++	vld1.8	{$dat},[$key_],#16
++	sub	$rounds,$rounds,#2
++	vld1.8	{$dat1},[$key_],#16
++.Loop_final_dec:
++	aesd	$tmpin,$dat0
++	aesimc	$tmpin,$tmpin
++	vld1.32	{$dat0},[$key_],#16		// load key schedule...
++	subs	$rounds,$rounds,#2
++	aesd	$tmpin,$dat1
++	aesimc	$tmpin,$tmpin
++	vld1.32	{$dat1},[$key_],#16		// load key schedule...
++	b.gt	.Loop_final_dec
++
++	aesd	$tmpin,$dat0
++	aesimc	$tmpin,$tmpin
++	vld1.32	{$dat0},[$key_]
++	aesd	$tmpin,$dat1
++	veor	$tmpin,$tmpin,$dat0
++	veor	$tmpin,$tmpin,$iv0
++	vst1.8	{$tmpin},[$out]
++
++.Lxts_dec_abort:
++	ldp	$tailcnt,$midnumx,[sp,#48]
++	ldp	$ivd10,$ivd20,[sp,#32]
++	ldp	$ivd30,$ivd40,[sp,#16]
++	ldp	$constnumx,$tmpinp,[sp],#64
++
++.Lxts_dec_final_abort:
++	ret
++.size	${prefix}_xts_decrypt,.-${prefix}_xts_decrypt
++___
++}
++}}}
+ $code.=<<___;
+ #endif
+ ___
+@@ -963,7 +3615,7 @@ if ($flavour =~ /64/) {			######## 64-bi
  	    # since ARMv7 instructions are always encoded little-endian.
  	    # correct solution is to use .inst directive, but older
  	    # assemblers don't implement it:-(
@@ -491,7 +2761,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
  			$word&0xff,($word>>8)&0xff,
  			($word>>16)&0xff,($word>>24)&0xff,
  			$mnemonic,$arg;
-@@ -996,14 +1392,17 @@ if ($flavour =~ /64/) {			######## 64-bi
+@@ -1004,14 +3656,17 @@ if ($flavour =~ /64/) {			######## 64-bi
  	s/\],#[0-9]+/]!/o;
  
  	s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo	or
@@ -511,9 +2781,9 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
  	print $_,"\n";
      }
  }
-diff -up openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl.arm-update openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl
---- openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl.arm-update	2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl	2019-11-20 11:36:22.389506155 +0100
+diff -up openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl.arm-update openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl
+--- openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl.arm-update	2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl	2020-12-09 10:37:38.405558929 +0100
 @@ -30,6 +30,7 @@
  # Denver(***)       16.6(**)    15.1/17.8(**)    [8.80/9.93         ]
  # Apple A7(***)     22.7(**)    10.9/14.3        [8.45/10.0         ]
@@ -522,9 +2792,9 @@ diff -up openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl.arm-update openssl-1.1.1c/
  #
  # (*)	ECB denotes approximate result for parallelizable modes
  #	such as CBC decrypt, CTR, etc.;
-diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl
---- openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update	2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl	2019-11-21 16:44:50.814651553 +0100
+diff -up openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl
+--- openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl.arm-update	2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl	2020-12-09 10:40:57.922288627 +0100
 @@ -18,32 +18,44 @@
  #
  # ChaCha20 for ARMv8.
@@ -585,20 +2855,22 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  *STDOUT=*OUT;
  
  sub AUTOLOAD()		# thunk [simplified] x86-style perlasm
-@@ -120,41 +132,36 @@ my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)
+@@ -120,42 +132,37 @@ my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)
  }
  
  $code.=<<___;
 -#include "arm_arch.h"
+-
+-.text
+-
 +#ifndef	__KERNEL__
 +# include "arm_arch.h"
-+.extern	OPENSSL_armcap_P
+ .extern	OPENSSL_armcap_P
+ .hidden	OPENSSL_armcap_P
 +#endif
++
++.text
  
- .text
- 
--.extern	OPENSSL_armcap_P
--
  .align	5
  .Lsigma:
  .quad	0x3320646e61707865,0x6b20657479622d32		// endian-neutral
@@ -641,7 +2913,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  
  .Lshort:
  	.inst	0xd503233f			// paciasp
-@@ -173,7 +180,7 @@ ChaCha20_ctr32:
+@@ -174,7 +181,7 @@ ChaCha20_ctr32:
  	ldp	@d[2],@d[3],[$key]		// load key
  	ldp	@d[4],@d[5],[$key,#16]
  	ldp	@d[6],@d[7],[$ctr]		// load counter
@@ -650,7 +2922,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	ror	@d[2],@d[2],#32
  	ror	@d[3],@d[3],#32
  	ror	@d[4],@d[4],#32
-@@ -242,7 +249,7 @@ $code.=<<___;
+@@ -243,7 +250,7 @@ $code.=<<___;
  	add	@x[14],@x[14],@x[15],lsl#32
  	ldp	@x[13],@x[15],[$inp,#48]
  	add	$inp,$inp,#64
@@ -659,7 +2931,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	rev	@x[0],@x[0]
  	rev	@x[2],@x[2]
  	rev	@x[4],@x[4]
-@@ -299,7 +306,7 @@ $code.=<<___;
+@@ -300,7 +307,7 @@ $code.=<<___;
  	add	@x[10],@x[10],@x[11],lsl#32
  	add	@x[12],@x[12],@x[13],lsl#32
  	add	@x[14],@x[14],@x[15],lsl#32
@@ -668,7 +2940,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	rev	@x[0],@x[0]
  	rev	@x[2],@x[2]
  	rev	@x[4],@x[4]
-@@ -340,46 +347,91 @@ $code.=<<___;
+@@ -341,46 +348,91 @@ $code.=<<___;
  ___
  
  {{{
@@ -789,7 +3061,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	.inst	0xd503233f			// paciasp
  	stp	x29,x30,[sp,#-96]!
  	add	x29,sp,#0
-@@ -402,8 +454,9 @@ ChaCha20_neon:
+@@ -403,8 +455,9 @@ ChaCha20_neon:
  	ld1	{@K[1],@K[2]},[$key]
  	ldp	@d[6],@d[7],[$ctr]		// load counter
  	ld1	{@K[3]},[$ctr]
@@ -801,7 +3073,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	rev64	@K[0],@K[0]
  	ror	@d[2],@d[2],#32
  	ror	@d[3],@d[3],#32
-@@ -412,115 +465,129 @@ ChaCha20_neon:
+@@ -413,115 +466,129 @@ ChaCha20_neon:
  	ror	@d[6],@d[6],#32
  	ror	@d[7],@d[7],#32
  #endif
@@ -1013,7 +3285,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	rev	@x[0],@x[0]
  	rev	@x[2],@x[2]
  	rev	@x[4],@x[4]
-@@ -530,48 +597,68 @@ $code.=<<___;
+@@ -531,48 +598,68 @@ $code.=<<___;
  	rev	@x[12],@x[12]
  	rev	@x[14],@x[14]
  #endif
@@ -1106,7 +3378,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	ldp	x19,x20,[x29,#16]
  	add	sp,sp,#64
  	ldp	x21,x22,[x29,#32]
-@@ -582,8 +669,10 @@ $code.=<<___;
+@@ -583,8 +670,10 @@ $code.=<<___;
  	.inst	0xd50323bf			// autiasp
  	ret
  
@@ -1118,7 +3390,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	cmp	$len,#64
  	b.lo	.Less_than_64
  
-@@ -600,7 +689,7 @@ $code.=<<___;
+@@ -601,7 +690,7 @@ $code.=<<___;
  	add	@x[14],@x[14],@x[15],lsl#32
  	ldp	@x[13],@x[15],[$inp,#48]
  	add	$inp,$inp,#64
@@ -1127,7 +3399,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	rev	@x[0],@x[0]
  	rev	@x[2],@x[2]
  	rev	@x[4],@x[4]
-@@ -620,48 +709,68 @@ $code.=<<___;
+@@ -621,48 +710,68 @@ $code.=<<___;
  	eor	@x[14],@x[14],@x[15]
  
  	stp	@x[0],@x[2],[$out,#0]		// store output
@@ -1220,7 +3492,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	sub	$out,$out,#1
  	add	$inp,$inp,$len
  	add	$out,$out,$len
-@@ -694,9 +803,41 @@ $code.=<<___;
+@@ -695,9 +804,41 @@ $code.=<<___;
  .size	ChaCha20_neon,.-ChaCha20_neon
  ___
  {
@@ -1263,7 +3535,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  
  $code.=<<___;
  .type	ChaCha20_512_neon,%function
-@@ -716,6 +857,7 @@ ChaCha20_512_neon:
+@@ -717,6 +858,7 @@ ChaCha20_512_neon:
  .L512_or_more_neon:
  	sub	sp,sp,#128+64
  
@@ -1271,7 +3543,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	ldp	@d[0],@d[1],[@x[0]]		// load sigma
  	ld1	{@K[0]},[@x[0]],#16
  	ldp	@d[2],@d[3],[$key]		// load key
-@@ -723,8 +865,9 @@ ChaCha20_512_neon:
+@@ -724,8 +866,9 @@ ChaCha20_512_neon:
  	ld1	{@K[1],@K[2]},[$key]
  	ldp	@d[6],@d[7],[$ctr]		// load counter
  	ld1	{@K[3]},[$ctr]
@@ -1283,7 +3555,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	rev64	@K[0],@K[0]
  	ror	@d[2],@d[2],#32
  	ror	@d[3],@d[3],#32
-@@ -791,9 +934,10 @@ ChaCha20_512_neon:
+@@ -792,9 +935,10 @@ ChaCha20_512_neon:
  	 mov	$C4,@K[2]
  	 stp	@K[3],@K[4],[sp,#48]		// off-load key block, variable part
  	 mov	$C5,@K[2]
@@ -1295,7 +3567,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	subs	$len,$len,#512
  .Loop_upper_neon:
  	sub	$ctr,$ctr,#1
-@@ -866,7 +1010,7 @@ $code.=<<___;
+@@ -867,7 +1011,7 @@ $code.=<<___;
  	add	@x[14],@x[14],@x[15],lsl#32
  	ldp	@x[13],@x[15],[$inp,#48]
  	add	$inp,$inp,#64
@@ -1304,7 +3576,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	rev	@x[0],@x[0]
  	rev	@x[2],@x[2]
  	rev	@x[4],@x[4]
-@@ -955,6 +1099,7 @@ $code.=<<___;
+@@ -956,6 +1100,7 @@ $code.=<<___;
  	add.32	@x[2],@x[2],@d[1]
  	 ldp	@K[4],@K[5],[sp,#64]
  	add	@x[3],@x[3],@d[1],lsr#32
@@ -1312,7 +3584,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	 add	$A0,$A0,@K[0]
  	add.32	@x[4],@x[4],@d[2]
  	 add	$A1,$A1,@K[0]
-@@ -1007,7 +1152,7 @@ $code.=<<___;
+@@ -1008,7 +1153,7 @@ $code.=<<___;
  	add	$inp,$inp,#64
  	 add	$B5,$B5,@K[1]
  
@@ -1321,7 +3593,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	rev	@x[0],@x[0]
  	rev	@x[2],@x[2]
  	rev	@x[4],@x[4]
-@@ -1085,26 +1230,26 @@ $code.=<<___;
+@@ -1086,26 +1231,26 @@ $code.=<<___;
  	b.hs	.Loop_outer_512_neon
  
  	adds	$len,$len,#512
@@ -1356,7 +3628,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	eor	@K[1],@K[1],@K[1]
  	eor	@K[2],@K[2],@K[2]
  	eor	@K[3],@K[3],@K[3]
-@@ -1114,6 +1259,7 @@ $code.=<<___;
+@@ -1115,6 +1260,7 @@ $code.=<<___;
  	b	.Loop_outer
  
  .Ldone_512_neon:
@@ -1364,7 +3636,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	ldp	x19,x20,[x29,#16]
  	add	sp,sp,#128+64
  	ldp	x21,x22,[x29,#32]
-@@ -1132,9 +1278,11 @@ foreach (split("\n",$code)) {
+@@ -1133,9 +1279,11 @@ foreach (split("\n",$code)) {
  	s/\`([^\`]*)\`/eval $1/geo;
  
  	(s/\b([a-z]+)\.32\b/$1/ and (s/x([0-9]+)/w$1/g or 1))	or
@@ -1377,9 +3649,9 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
  	(s/\brev32\.16\b/rev32/ and (s/\.4s/\.8h/g or 1));
  
  	#s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo;
-diff -up openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl.arm-update openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl
---- openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl.arm-update	2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl	2019-11-20 11:36:22.389506155 +0100
+diff -up openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl.arm-update openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl
+--- openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl.arm-update	2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl	2020-12-09 10:37:38.408558954 +0100
 @@ -42,6 +42,7 @@
  # Denver	0.51		0.65		6.02
  # Mongoose	0.65		1.10		8.06
@@ -1388,9 +3660,9 @@ diff -up openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl.arm-update openssl-1.1.
  #
  # (*)	presented for reference/comparison purposes;
  
-diff -up openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl.arm-update openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl
---- openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl.arm-update	2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl	2019-11-20 11:36:22.390506137 +0100
+diff -up openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl.arm-update openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl
+--- openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl.arm-update	2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl	2020-12-09 10:37:38.408558954 +0100
 @@ -29,6 +29,7 @@
  # X-Gene	2.13/+68%	2.27
  # Mongoose	1.77/+75%	1.12
@@ -1399,9 +3671,9 @@ diff -up openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl.arm-update openssl
  #
  # (*)	estimate based on resources availability is less than 1.0,
  #	i.e. measured result is worse than expected, presumably binary
-diff -up openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl.arm-update openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl
---- openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl.arm-update	2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl	2019-11-20 11:36:22.390506137 +0100
+diff -up openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl.arm-update openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl
+--- openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl.arm-update	2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl	2020-12-09 10:37:38.408558954 +0100
 @@ -51,6 +51,7 @@
  # Kryo		12
  # Denver	7.8
@@ -1410,9 +3682,9 @@ diff -up openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl.arm-update openssl-1.
  #
  # (*)	Corresponds to SHA3-256. No improvement coefficients are listed
  #	because they vary too much from compiler to compiler. Newer
-diff -up openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl.arm-update openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl
---- openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl.arm-update	2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl	2019-11-20 11:36:22.390506137 +0100
+diff -up openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl.arm-update openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl
+--- openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl.arm-update	2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl	2020-12-09 10:37:38.408558954 +0100
 @@ -27,6 +27,7 @@
  # X-Gene				8.80 (+200%)
  # Mongoose	2.05			6.50 (+160%)
@@ -1421,9 +3693,9 @@ diff -up openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl.arm-update openssl-1.1.1c/c
  #
  # (*)	Software results are presented mostly for reference purposes.
  # (**)	Keep in mind that Denver relies on binary translation, which
-diff -up openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl.arm-update openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl
---- openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl.arm-update	2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl	2019-11-20 11:36:22.390506137 +0100
+diff -up openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl.arm-update openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl
+--- openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl.arm-update	2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl	2020-12-09 10:37:38.408558954 +0100
 @@ -28,6 +28,7 @@
  # X-Gene			20.0 (+100%)	12.8 (+300%(***))
  # Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
diff --git a/openssl-1.1.1-fips-post-rand.patch b/openssl-1.1.1-fips-post-rand.patch
index 18a01fe..027dc55 100644
--- a/openssl-1.1.1-fips-post-rand.patch
+++ b/openssl-1.1.1-fips-post-rand.patch
@@ -1,6 +1,6 @@
-diff -up openssl-1.1.1e/crypto/fips/fips.c.fips-post-rand openssl-1.1.1e/crypto/fips/fips.c
---- openssl-1.1.1e/crypto/fips/fips.c.fips-post-rand	2020-03-17 18:06:16.822418854 +0100
-+++ openssl-1.1.1e/crypto/fips/fips.c	2020-03-17 18:06:16.861418172 +0100
+diff -up openssl-1.1.1i/crypto/fips/fips.c.fips-post-rand openssl-1.1.1i/crypto/fips/fips.c
+--- openssl-1.1.1i/crypto/fips/fips.c.fips-post-rand	2020-12-09 10:26:41.634106328 +0100
++++ openssl-1.1.1i/crypto/fips/fips.c	2020-12-09 10:26:41.652106475 +0100
 @@ -68,6 +68,7 @@
  
  # include <openssl/fips.h>
@@ -51,10 +51,10 @@ diff -up openssl-1.1.1e/crypto/fips/fips.c.fips-post-rand openssl-1.1.1e/crypto/
          ret = 1;
          goto end;
      }
-diff -up openssl-1.1.1e/crypto/rand/drbg_lib.c.fips-post-rand openssl-1.1.1e/crypto/rand/drbg_lib.c
---- openssl-1.1.1e/crypto/rand/drbg_lib.c.fips-post-rand	2020-03-17 15:31:17.000000000 +0100
-+++ openssl-1.1.1e/crypto/rand/drbg_lib.c	2020-03-17 18:07:35.305045521 +0100
-@@ -1009,6 +1009,20 @@ size_t rand_drbg_seedlen(RAND_DRBG *drbg
+diff -up openssl-1.1.1i/crypto/rand/drbg_lib.c.fips-post-rand openssl-1.1.1i/crypto/rand/drbg_lib.c
+--- openssl-1.1.1i/crypto/rand/drbg_lib.c.fips-post-rand	2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/rand/drbg_lib.c	2020-12-09 10:26:41.652106475 +0100
+@@ -1005,6 +1005,20 @@ size_t rand_drbg_seedlen(RAND_DRBG *drbg
      return min_entropy > min_entropylen ? min_entropy : min_entropylen;
  }
  
@@ -75,9 +75,9 @@ diff -up openssl-1.1.1e/crypto/rand/drbg_lib.c.fips-post-rand openssl-1.1.1e/cry
  /* Implements the default OpenSSL RAND_add() method */
  static int drbg_add(const void *buf, int num, double randomness)
  {
-diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/crypto/rand/rand_unix.c
---- openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand	2020-03-17 15:31:17.000000000 +0100
-+++ openssl-1.1.1e/crypto/rand/rand_unix.c	2020-03-17 18:09:01.503537189 +0100
+diff -up openssl-1.1.1i/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1i/crypto/rand/rand_unix.c
+--- openssl-1.1.1i/crypto/rand/rand_unix.c.fips-post-rand	2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/rand/rand_unix.c	2020-12-09 10:36:59.531221903 +0100
 @@ -17,10 +17,12 @@
  #include <openssl/crypto.h>
  #include "rand_local.h"
@@ -91,7 +91,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
  # ifdef DEVRANDOM_WAIT
  #  include <sys/shm.h>
  #  include <sys/utsname.h>
-@@ -342,7 +344,7 @@ static ssize_t sysctl_random(char *buf,
+@@ -344,7 +346,7 @@ static ssize_t sysctl_random(char *buf,
   * syscall_random(): Try to get random data using a system call
   * returns the number of bytes returned in buf, or < 0 on error.
   */
@@ -100,15 +100,15 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
  {
      /*
       * Note: 'buflen' equals the size of the buffer which is used by the
-@@ -364,6 +366,7 @@ static ssize_t syscall_random(void *buf,
-      * - Linux since 3.17 with glibc 2.25
-      * - FreeBSD since 12.0 (1200061)
+@@ -369,6 +371,7 @@ static ssize_t syscall_random(void *buf,
+      * Note: Sometimes getentropy() can be provided but not implemented
+      * internally. So we need to check errno for ENOSYS
       */
 +#  if 0
  #  if defined(__GNUC__) && __GNUC__>=2 && defined(__ELF__) && !defined(__hpux)
      extern int getentropy(void *buffer, size_t length) __attribute__((weak));
  
-@@ -385,10 +388,10 @@ static ssize_t syscall_random(void *buf,
+@@ -394,10 +397,10 @@ static ssize_t syscall_random(void *buf,
      if (p_getentropy.p != NULL)
          return p_getentropy.f(buf, buflen) == 0 ? (ssize_t)buflen : -1;
  #  endif
@@ -122,7 +122,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
  #  elif (defined(__FreeBSD__) || defined(__NetBSD__)) && defined(KERN_ARND)
      return sysctl_random(buf, buflen);
  #  else
-@@ -623,6 +626,9 @@ size_t rand_pool_acquire_entropy(RAND_PO
+@@ -633,6 +636,9 @@ size_t rand_pool_acquire_entropy(RAND_PO
      size_t entropy_available;
  
  #   if defined(OPENSSL_RAND_SEED_GETRANDOM)
@@ -132,7 +132,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
      {
          size_t bytes_needed;
          unsigned char *buffer;
-@@ -633,7 +639,7 @@ size_t rand_pool_acquire_entropy(RAND_PO
+@@ -643,7 +649,7 @@ size_t rand_pool_acquire_entropy(RAND_PO
          bytes_needed = rand_pool_bytes_needed(pool, 1 /*entropy_factor*/);
          while (bytes_needed != 0 && attempts-- > 0) {
              buffer = rand_pool_add_begin(pool, bytes_needed);
@@ -141,7 +141,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
              if (bytes > 0) {
                  rand_pool_add_end(pool, bytes, 8 * bytes);
                  bytes_needed -= bytes;
-@@ -668,8 +674,10 @@ size_t rand_pool_acquire_entropy(RAND_PO
+@@ -678,8 +684,10 @@ size_t rand_pool_acquire_entropy(RAND_PO
              int attempts = 3;
              const int fd = get_random_device(i);
  
@@ -153,7 +153,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
  
              while (bytes_needed != 0 && attempts-- > 0) {
                  buffer = rand_pool_add_begin(pool, bytes_needed);
-@@ -732,7 +740,9 @@ size_t rand_pool_acquire_entropy(RAND_PO
+@@ -742,7 +750,9 @@ size_t rand_pool_acquire_entropy(RAND_PO
              return entropy_available;
      }
  #   endif
@@ -164,9 +164,9 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
      return rand_pool_entropy_available(pool);
  #  endif
  }
-diff -up openssl-1.1.1e/include/crypto/fips.h.fips-post-rand openssl-1.1.1e/include/crypto/fips.h
---- openssl-1.1.1e/include/crypto/fips.h.fips-post-rand	2020-03-17 18:06:16.831418696 +0100
-+++ openssl-1.1.1e/include/crypto/fips.h	2020-03-17 18:06:16.861418172 +0100
+diff -up openssl-1.1.1i/include/crypto/fips.h.fips-post-rand openssl-1.1.1i/include/crypto/fips.h
+--- openssl-1.1.1i/include/crypto/fips.h.fips-post-rand	2020-12-09 10:26:41.639106369 +0100
++++ openssl-1.1.1i/include/crypto/fips.h	2020-12-09 10:26:41.657106516 +0100
 @@ -77,6 +77,8 @@ int FIPS_selftest_hmac(void);
  int FIPS_selftest_drbg(void);
  int FIPS_selftest_cmac(void);
@@ -176,9 +176,9 @@ diff -up openssl-1.1.1e/include/crypto/fips.h.fips-post-rand openssl-1.1.1e/incl
  int fips_pkey_signature_test(EVP_PKEY *pkey,
                                   const unsigned char *tbs, int tbslen,
                                   const unsigned char *kat,
-diff -up openssl-1.1.1e/include/crypto/rand.h.fips-post-rand openssl-1.1.1e/include/crypto/rand.h
---- openssl-1.1.1e/include/crypto/rand.h.fips-post-rand	2020-03-17 15:31:17.000000000 +0100
-+++ openssl-1.1.1e/include/crypto/rand.h	2020-03-17 18:07:35.303045555 +0100
+diff -up openssl-1.1.1i/include/crypto/rand.h.fips-post-rand openssl-1.1.1i/include/crypto/rand.h
+--- openssl-1.1.1i/include/crypto/rand.h.fips-post-rand	2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/include/crypto/rand.h	2020-12-09 10:26:41.657106516 +0100
 @@ -24,6 +24,7 @@
  typedef struct rand_pool_st RAND_POOL;
  
diff --git a/openssl-1.1.1-version-override.patch b/openssl-1.1.1-version-override.patch
index ff69bdb..727cc26 100644
--- a/openssl-1.1.1-version-override.patch
+++ b/openssl-1.1.1-version-override.patch
@@ -1,12 +1,12 @@
-diff -up openssl-1.1.1g/include/openssl/opensslv.h.version-override openssl-1.1.1g/include/openssl/opensslv.h
---- openssl-1.1.1g/include/openssl/opensslv.h.version-override	2020-04-23 13:29:37.802673513 +0200
-+++ openssl-1.1.1g/include/openssl/opensslv.h	2020-04-23 13:30:13.064008458 +0200
+diff -up openssl-1.1.1i/include/openssl/opensslv.h.version-override openssl-1.1.1i/include/openssl/opensslv.h
+--- openssl-1.1.1i/include/openssl/opensslv.h.version-override	2020-12-09 10:25:12.042374409 +0100
++++ openssl-1.1.1i/include/openssl/opensslv.h	2020-12-09 10:26:00.362769170 +0100
 @@ -40,7 +40,7 @@ extern "C" {
   *  major minor fix final patch/beta)
   */
- # define OPENSSL_VERSION_NUMBER  0x1010108fL
--# define OPENSSL_VERSION_TEXT    "OpenSSL 1.1.1h  22 Sep 2020"
-+# define OPENSSL_VERSION_TEXT    "OpenSSL 1.1.1h FIPS 22 Sep 2020"
+ # define OPENSSL_VERSION_NUMBER  0x1010109fL
+-# define OPENSSL_VERSION_TEXT    "OpenSSL 1.1.1i  8 Dec 2020"
++# define OPENSSL_VERSION_TEXT    "OpenSSL 1.1.1i FIPS  8 Dec 2020"
  
  /*-
   * The macros below are to be used for shared library (.so, .dll, ...)
diff --git a/openssl.spec b/openssl.spec
index 3f6403c..2e26e49 100644
--- a/openssl.spec
+++ b/openssl.spec
@@ -21,7 +21,7 @@
 
 Summary: Utilities from the general purpose cryptography library with TLS implementation
 Name: openssl
-Version: 1.1.1h
+Version: 1.1.1i
 Release: 1%{?dist}
 Epoch: 1
 # We have to remove certain patented algorithms from the openssl source
@@ -473,6 +473,9 @@ export LD_LIBRARY_PATH
 %ldconfig_scriptlets libs
 
 %changelog
+* Wed Dec 9 2020 Tomáš Mráz <tmraz@redhat.com> 1.1.1i-1
+- Update to the 1.1.1i release fixing CVE-2020-1971
+
 * Mon Nov 9 2020 Sahana Prasad <sahana@redhat.com> - 1.1.1h-1
 - Upgrade to version 1.1.1.h
 
diff --git a/sources b/sources
index 2bae151..4c1e648 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-SHA512 (openssl-1.1.1h-hobbled.tar.xz) = 75e1d3f34f93462b97db92aa6538fd4f2f091ad717438e51d147508738be720d7d0bf4a9b1fda3a1943a4c13aae2a39da3add05f7da833b3c6de40a97bc97908
+SHA512 (openssl-1.1.1i-hobbled.tar.xz) = e131a05e88690a7be7c3d74cbb26620130498ced2ce3d7fd55979aab5ea736ec8b268ba92268bd5bc347989325a3950a066883007cb20c2dd9739fd1eafc513f