ceph/0001-src-common-crc32c_intel_fast.patch

231 lines
6.8 KiB
Diff
Raw Normal View History

--- ceph-15.2.2/src/yasm-wrapper.orig 2020-03-09 12:57:37.603837466 -0400
+++ ceph-15.2.2/src/yasm-wrapper 2020-03-09 13:02:31.496796609 -0400
@@ -1,10 +1,11 @@
-#!/bin/sh -e
+#!/bin/sh
# libtool and yasm do not get along.
# filter out any crap that libtool feeds us that yasm does not understand.
#echo $0: got $*
new=""
touch=""
+object=""
while [ -n "$*" ]; do
case "$1" in
-f )
@@ -29,6 +30,12 @@
touch="$1"
shift
;;
+ -o )
+ shift
+ object="$1"
+ new="$new -o $1"
+ shift
+ ;;
* )
new="$new $1"
shift
@@ -36,8 +43,16 @@
esac
done
-#echo $0: yasm $new
-yasm $new
+#echo ${0}: yasm ${new}
+yasm ${new}
+
+echo ${new} | grep -- "crc32c_intel_fast*asm\.s"
+if [ $? -ne 0 ]; then
+ touch /tmp/${object}
+ ld -r -z ibt -z shstk -z noexecstack -o ${object}.tmp ${object}
+ mv ${object}.tmp ${object}
+fi
+
[ -n "$touch" ] && touch $touch
--- ceph-15.2.2/src/common/crc32c_intel_fast_asm.s.orig 2020-05-26 08:34:32.226201974 -0400
+++ ceph-15.2.2/src/common/crc32c_intel_fast_asm.s 2020-05-26 17:19:20.327201974 -0400
@@ -1,5 +1,5 @@
;
-; Copyright 2012-2013 Intel Corporation All Rights Reserved.
+; Copyright 2012-2015 Intel Corporation All Rights Reserved.
; All rights reserved.
;
; http://opensource.org/licenses/BSD-3-Clause
@@ -59,16 +59,34 @@
xor rbx, rbx ;; rbx = crc1 = 0;
xor r10, r10 ;; r10 = crc2 = 0;
+ cmp len, %%bSize*3*2
+ jbe %%non_prefetch
+
%assign i 0
%rep %%bSize/8 - 1
- crc32 rax, [bufptmp+i + 0*%%bSize] ;; update crc0
- crc32 rbx, [bufptmp+i + 1*%%bSize] ;; update crc1
- crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2
+ %if i < %%bSize*3/4
+ prefetchnta [bufptmp+ %%bSize*3 + i*4]
+ %endif
+ crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0
+ crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1
+ crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2
%assign i (i+8)
%endrep
- crc32 rax, [bufptmp+i + 0*%%bSize] ;; update crc0
- crc32 rbx, [bufptmp+i + 1*%%bSize] ;; update crc1
-; SKIP ;crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2
+ jmp %%next %+ %1
+
+%%non_prefetch:
+ %assign i 0
+ %rep %%bSize/8 - 1
+ crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0
+ crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1
+ crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2
+ %assign i (i+8)
+ %endrep
+
+%%next %+ %1:
+ crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0
+ crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1
+; SKIP ;crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2
; merge in crc0
movzx bufp_dw, al
@@ -180,12 +198,15 @@
%define crc_init_dw r8d
%endif
-
+ endbranch
push rdi
push rbx
mov rax, crc_init ;; rax = crc_init;
+ cmp len, 8
+ jb less_than_8
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -195,9 +216,6 @@
;; amount of the address
je proc_block ;; Skip if aligned
- cmp len, 8
- jb less_than_8
-
;;;; Calculate CRC of unaligned bytes of the buffer (if any) ;;;;
mov rbx, [bufptmp] ;; load a quadword from the buffer
add bufptmp, bufp ;; align buffer pointer for
@@ -233,7 +251,7 @@
jnc bit7 ;; jump to bit-6 if bit-7 == 0
%assign i 0
%rep 16
- crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
%assign i (i+8)
%endrep
je do_return ;; return if remaining data is zero
@@ -244,7 +262,7 @@
jnc bit6 ;; jump to bit-6 if bit-7 == 0
%assign i 0
%rep 8
- crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
%assign i (i+8)
%endrep
je do_return ;; return if remaining data is zero
@@ -254,7 +272,7 @@
jnc bit5 ;; jump to bit-5 if bit-6 == 0
%assign i 0
%rep 4
- crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
%assign i (i+8)
%endrep
je do_return ;; return if remaining data is zero
@@ -264,7 +282,7 @@
jnc bit4 ;; jump to bit-4 if bit-5 == 0
%assign i 0
%rep 2
- crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
%assign i (i+8)
%endrep
je do_return ;; return if remaining data is zero
@@ -272,11 +290,11 @@
bit4:
shl len_b, 1 ;; shift-out MSB (bit-4)
jnc bit3 ;; jump to bit-3 if bit-4 == 0
- crc32 rax, [bufptmp] ;; compute crc32 of 8-byte data
+ crc32 rax, qword [bufptmp] ;; compute crc32 of 8-byte data
je do_return ;; return if remaining data is zero
add bufptmp, 8 ;; buf +=8; (next 8 bytes)
bit3:
- mov rbx, [bufptmp] ;; load a 8-bytes from the buffer:
+ mov rbx, qword [bufptmp] ;; load a 8-bytes from the buffer:
shl len_b, 1 ;; shift-out MSB (bit-3)
jnc bit2 ;; jump to bit-2 if bit-3 == 0
crc32 eax, ebx ;; compute crc32 of 4-byte data
--- ceph-15.2.2/src/common/crc32c_intel_fast_zero_asm.s.orig 2020-05-26 08:34:32.226201974 -0400
+++ ceph-15.2.2/src/common/crc32c_intel_fast_zero_asm.s 2020-05-26 17:19:32.497201974 -0400
@@ -1,5 +1,5 @@
;
-; Copyright 2012-2013 Intel Corporation All Rights Reserved.
+; Copyright 2012-2015 Intel Corporation All Rights Reserved.
; All rights reserved.
;
; http://opensource.org/licenses/BSD-3-Clause
@@ -59,6 +59,19 @@
xor rbx, rbx ;; rbx = crc1 = 0;
xor r10, r10 ;; r10 = crc2 = 0;
+ cmp len, %%bSize*3*2
+ jbe %%non_prefetch
+
+ %assign i 0
+ %rep %%bSize/8 - 1
+ crc32 rax, bufptmp ;; update crc0
+ crc32 rbx, bufptmp ;; update crc1
+ crc32 r10, bufptmp ;; update crc2
+ %assign i (i+8)
+ %endrep
+ jmp %%next %+ %1
+
+%%non_prefetch:
%assign i 0
%rep %%bSize/8 - 1
crc32 rax, bufptmp ;; update crc0
@@ -66,6 +79,8 @@
crc32 r10, bufptmp ;; update crc2
%assign i (i+8)
%endrep
+
+%%next %+ %1:
crc32 rax, bufptmp ;; update crc0
crc32 rbx, bufptmp ;; update crc1
; SKIP ;crc32 r10, bufptmp ;; update crc2
@@ -180,12 +195,15 @@
%define crc_init_dw r8d
%endif
-
+ endbranch
push rdi
push rbx
mov rax, crc_init ;; rax = crc_init;
+ cmp len, 8
+ jb less_than_8
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;