257 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			257 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /*
 | |
|  * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
 | |
|  *
 | |
|  * This file is subject to the terms and conditions of the GNU General Public
 | |
|  * License.  See the file COPYING in the main directory of this archive
 | |
|  * for more details. No warranty for anything given at all.
 | |
|  */
 | |
| #include <linux/linkage.h>
 | |
| #include <asm/errno.h>
 | |
| #include <asm/asm.h>
 | |
| 
 | |
| /*
 | |
|  * Checksum copy with exception handling.
 | |
|  * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
 | |
|  * destination is zeroed.
 | |
|  *
 | |
|  * Input
 | |
|  * rdi  source
 | |
|  * rsi  destination
 | |
|  * edx  len (32bit)
 | |
|  *
 | |
|  * Output
 | |
|  * eax  64bit sum. undefined in case of exception.
 | |
|  *
 | |
|  * Wrappers need to take care of valid exception sum and zeroing.
 | |
|  * They also should align source or destination to 8 bytes.
 | |
|  */
 | |
| 
 | |
| 	.macro source
 | |
| 10:
 | |
| 	_ASM_EXTABLE_UA(10b, .Lfault)
 | |
| 	.endm
 | |
| 
 | |
| 	.macro dest
 | |
| 20:
 | |
| 	_ASM_EXTABLE_UA(20b, .Lfault)
 | |
| 	.endm
 | |
| 
 | |
| SYM_FUNC_START(csum_partial_copy_generic)
 | |
| 	subq  $5*8, %rsp
 | |
| 	movq  %rbx, 0*8(%rsp)
 | |
| 	movq  %r12, 1*8(%rsp)
 | |
| 	movq  %r14, 2*8(%rsp)
 | |
| 	movq  %r13, 3*8(%rsp)
 | |
| 	movq  %r15, 4*8(%rsp)
 | |
| 
 | |
| 	movl  $-1, %eax
 | |
| 	xorl  %r9d, %r9d
 | |
| 	movl  %edx, %ecx
 | |
| 	cmpl  $8, %ecx
 | |
| 	jb    .Lshort
 | |
| 
 | |
| 	testb  $7, %sil
 | |
| 	jne   .Lunaligned
 | |
| .Laligned:
 | |
| 	movl  %ecx, %r12d
 | |
| 
 | |
| 	shrq  $6, %r12
 | |
| 	jz	.Lhandle_tail       /* < 64 */
 | |
| 
 | |
| 	clc
 | |
| 
 | |
| 	/* main loop. clear in 64 byte blocks */
 | |
| 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
 | |
| 	/* r11:	temp3, rdx: temp4, r12 loopcnt */
 | |
| 	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
 | |
| 	.p2align 4
 | |
| .Lloop:
 | |
| 	source
 | |
| 	movq  (%rdi), %rbx
 | |
| 	source
 | |
| 	movq  8(%rdi), %r8
 | |
| 	source
 | |
| 	movq  16(%rdi), %r11
 | |
| 	source
 | |
| 	movq  24(%rdi), %rdx
 | |
| 
 | |
| 	source
 | |
| 	movq  32(%rdi), %r10
 | |
| 	source
 | |
| 	movq  40(%rdi), %r15
 | |
| 	source
 | |
| 	movq  48(%rdi), %r14
 | |
| 	source
 | |
| 	movq  56(%rdi), %r13
 | |
| 
 | |
| 30:
 | |
| 	/*
 | |
| 	 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
 | |
| 	 * potentially unmapped kernel address.
 | |
| 	 */
 | |
| 	_ASM_EXTABLE(30b, 2f)
 | |
| 	prefetcht0 5*64(%rdi)
 | |
| 2:
 | |
| 	adcq  %rbx, %rax
 | |
| 	adcq  %r8, %rax
 | |
| 	adcq  %r11, %rax
 | |
| 	adcq  %rdx, %rax
 | |
| 	adcq  %r10, %rax
 | |
| 	adcq  %r15, %rax
 | |
| 	adcq  %r14, %rax
 | |
| 	adcq  %r13, %rax
 | |
| 
 | |
| 	decl %r12d
 | |
| 
 | |
| 	dest
 | |
| 	movq %rbx, (%rsi)
 | |
| 	dest
 | |
| 	movq %r8, 8(%rsi)
 | |
| 	dest
 | |
| 	movq %r11, 16(%rsi)
 | |
| 	dest
 | |
| 	movq %rdx, 24(%rsi)
 | |
| 
 | |
| 	dest
 | |
| 	movq %r10, 32(%rsi)
 | |
| 	dest
 | |
| 	movq %r15, 40(%rsi)
 | |
| 	dest
 | |
| 	movq %r14, 48(%rsi)
 | |
| 	dest
 | |
| 	movq %r13, 56(%rsi)
 | |
| 
 | |
| 	leaq 64(%rdi), %rdi
 | |
| 	leaq 64(%rsi), %rsi
 | |
| 
 | |
| 	jnz	.Lloop
 | |
| 
 | |
| 	adcq  %r9, %rax
 | |
| 
 | |
| 	/* do last up to 56 bytes */
 | |
| .Lhandle_tail:
 | |
| 	/* ecx:	count, rcx.63: the end result needs to be rol8 */
 | |
| 	movq %rcx, %r10
 | |
| 	andl $63, %ecx
 | |
| 	shrl $3, %ecx
 | |
| 	jz	.Lfold
 | |
| 	clc
 | |
| 	.p2align 4
 | |
| .Lloop_8:
 | |
| 	source
 | |
| 	movq (%rdi), %rbx
 | |
| 	adcq %rbx, %rax
 | |
| 	decl %ecx
 | |
| 	dest
 | |
| 	movq %rbx, (%rsi)
 | |
| 	leaq 8(%rsi), %rsi /* preserve carry */
 | |
| 	leaq 8(%rdi), %rdi
 | |
| 	jnz	.Lloop_8
 | |
| 	adcq %r9, %rax	/* add in carry */
 | |
| 
 | |
| .Lfold:
 | |
| 	/* reduce checksum to 32bits */
 | |
| 	movl %eax, %ebx
 | |
| 	shrq $32, %rax
 | |
| 	addl %ebx, %eax
 | |
| 	adcl %r9d, %eax
 | |
| 
 | |
| 	/* do last up to 6 bytes */
 | |
| .Lhandle_7:
 | |
| 	movl %r10d, %ecx
 | |
| 	andl $7, %ecx
 | |
| .L1:				/* .Lshort rejoins the common path here */
 | |
| 	shrl $1, %ecx
 | |
| 	jz   .Lhandle_1
 | |
| 	movl $2, %edx
 | |
| 	xorl %ebx, %ebx
 | |
| 	clc
 | |
| 	.p2align 4
 | |
| .Lloop_1:
 | |
| 	source
 | |
| 	movw (%rdi), %bx
 | |
| 	adcl %ebx, %eax
 | |
| 	decl %ecx
 | |
| 	dest
 | |
| 	movw %bx, (%rsi)
 | |
| 	leaq 2(%rdi), %rdi
 | |
| 	leaq 2(%rsi), %rsi
 | |
| 	jnz .Lloop_1
 | |
| 	adcl %r9d, %eax	/* add in carry */
 | |
| 
 | |
| 	/* handle last odd byte */
 | |
| .Lhandle_1:
 | |
| 	testb $1, %r10b
 | |
| 	jz    .Lende
 | |
| 	xorl  %ebx, %ebx
 | |
| 	source
 | |
| 	movb (%rdi), %bl
 | |
| 	dest
 | |
| 	movb %bl, (%rsi)
 | |
| 	addl %ebx, %eax
 | |
| 	adcl %r9d, %eax		/* carry */
 | |
| 
 | |
| .Lende:
 | |
| 	testq %r10, %r10
 | |
| 	js  .Lwas_odd
 | |
| .Lout:
 | |
| 	movq 0*8(%rsp), %rbx
 | |
| 	movq 1*8(%rsp), %r12
 | |
| 	movq 2*8(%rsp), %r14
 | |
| 	movq 3*8(%rsp), %r13
 | |
| 	movq 4*8(%rsp), %r15
 | |
| 	addq $5*8, %rsp
 | |
| 	RET
 | |
| .Lshort:
 | |
| 	movl %ecx, %r10d
 | |
| 	jmp  .L1
 | |
| .Lunaligned:
 | |
| 	xorl %ebx, %ebx
 | |
| 	testb $1, %sil
 | |
| 	jne  .Lodd
 | |
| 1:	testb $2, %sil
 | |
| 	je   2f
 | |
| 	source
 | |
| 	movw (%rdi), %bx
 | |
| 	dest
 | |
| 	movw %bx, (%rsi)
 | |
| 	leaq 2(%rdi), %rdi
 | |
| 	subq $2, %rcx
 | |
| 	leaq 2(%rsi), %rsi
 | |
| 	addq %rbx, %rax
 | |
| 2:	testb $4, %sil
 | |
| 	je .Laligned
 | |
| 	source
 | |
| 	movl (%rdi), %ebx
 | |
| 	dest
 | |
| 	movl %ebx, (%rsi)
 | |
| 	leaq 4(%rdi), %rdi
 | |
| 	subq $4, %rcx
 | |
| 	leaq 4(%rsi), %rsi
 | |
| 	addq %rbx, %rax
 | |
| 	jmp .Laligned
 | |
| 
 | |
| .Lodd:
 | |
| 	source
 | |
| 	movb (%rdi), %bl
 | |
| 	dest
 | |
| 	movb %bl, (%rsi)
 | |
| 	leaq 1(%rdi), %rdi
 | |
| 	leaq 1(%rsi), %rsi
 | |
| 	/* decrement, set MSB */
 | |
| 	leaq -1(%rcx, %rcx), %rcx
 | |
| 	rorq $1, %rcx
 | |
| 	shll $8, %ebx
 | |
| 	addq %rbx, %rax
 | |
| 	jmp 1b
 | |
| 
 | |
| .Lwas_odd:
 | |
| 	roll $8, %eax
 | |
| 	jmp .Lout
 | |
| 
 | |
| 	/* Exception: just return 0 */
 | |
| .Lfault:
 | |
| 	xorl %eax, %eax
 | |
| 	jmp  .Lout
 | |
| SYM_FUNC_END(csum_partial_copy_generic)
 |