243 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			243 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0 */
 | |
| /*
 | |
|  * Copyright (C) IBM Corporation, 2011
 | |
|  * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
 | |
|  * Author - Balbir Singh <bsingharora@gmail.com>
 | |
|  */
 | |
| #include <linux/export.h>
 | |
| #include <asm/ppc_asm.h>
 | |
| #include <asm/errno.h>
 | |
| 
 | |
| 	.macro err1
 | |
| 100:
 | |
| 	EX_TABLE(100b,.Ldo_err1)
 | |
| 	.endm
 | |
| 
 | |
| 	.macro err2
 | |
| 200:
 | |
| 	EX_TABLE(200b,.Ldo_err2)
 | |
| 	.endm
 | |
| 
 | |
| 	.macro err3
 | |
| 300:	EX_TABLE(300b,.Ldone)
 | |
| 	.endm
 | |
| 
 | |
| .Ldo_err2:
 | |
| 	ld	r22,STK_REG(R22)(r1)
 | |
| 	ld	r21,STK_REG(R21)(r1)
 | |
| 	ld	r20,STK_REG(R20)(r1)
 | |
| 	ld	r19,STK_REG(R19)(r1)
 | |
| 	ld	r18,STK_REG(R18)(r1)
 | |
| 	ld	r17,STK_REG(R17)(r1)
 | |
| 	ld	r16,STK_REG(R16)(r1)
 | |
| 	ld	r15,STK_REG(R15)(r1)
 | |
| 	ld	r14,STK_REG(R14)(r1)
 | |
| 	addi	r1,r1,STACKFRAMESIZE
 | |
| .Ldo_err1:
 | |
| 	/* Do a byte by byte copy to get the exact remaining size */
 | |
| 	mtctr	r7
 | |
| 46:
 | |
| err3;	lbz	r0,0(r4)
 | |
| 	addi	r4,r4,1
 | |
| err3;	stb	r0,0(r3)
 | |
| 	addi	r3,r3,1
 | |
| 	bdnz	46b
 | |
| 	li	r3,0
 | |
| 	blr
 | |
| 
 | |
| .Ldone:
 | |
| 	mfctr	r3
 | |
| 	blr
 | |
| 
 | |
| 
 | |
| _GLOBAL(copy_mc_generic)
 | |
| 	mr	r7,r5
 | |
| 	cmpldi	r5,16
 | |
| 	blt	.Lshort_copy
 | |
| 
 | |
| .Lcopy:
 | |
| 	/* Get the source 8B aligned */
 | |
| 	neg	r6,r4
 | |
| 	mtocrf	0x01,r6
 | |
| 	clrldi	r6,r6,(64-3)
 | |
| 
 | |
| 	bf	cr7*4+3,1f
 | |
| err1;	lbz	r0,0(r4)
 | |
| 	addi	r4,r4,1
 | |
| err1;	stb	r0,0(r3)
 | |
| 	addi	r3,r3,1
 | |
| 	subi	r7,r7,1
 | |
| 
 | |
| 1:	bf	cr7*4+2,2f
 | |
| err1;	lhz	r0,0(r4)
 | |
| 	addi	r4,r4,2
 | |
| err1;	sth	r0,0(r3)
 | |
| 	addi	r3,r3,2
 | |
| 	subi	r7,r7,2
 | |
| 
 | |
| 2:	bf	cr7*4+1,3f
 | |
| err1;	lwz	r0,0(r4)
 | |
| 	addi	r4,r4,4
 | |
| err1;	stw	r0,0(r3)
 | |
| 	addi	r3,r3,4
 | |
| 	subi	r7,r7,4
 | |
| 
 | |
| 3:	sub	r5,r5,r6
 | |
| 	cmpldi	r5,128
 | |
| 
 | |
| 	mflr	r0
 | |
| 	stdu	r1,-STACKFRAMESIZE(r1)
 | |
| 	std	r14,STK_REG(R14)(r1)
 | |
| 	std	r15,STK_REG(R15)(r1)
 | |
| 	std	r16,STK_REG(R16)(r1)
 | |
| 	std	r17,STK_REG(R17)(r1)
 | |
| 	std	r18,STK_REG(R18)(r1)
 | |
| 	std	r19,STK_REG(R19)(r1)
 | |
| 	std	r20,STK_REG(R20)(r1)
 | |
| 	std	r21,STK_REG(R21)(r1)
 | |
| 	std	r22,STK_REG(R22)(r1)
 | |
| 	std	r0,STACKFRAMESIZE+16(r1)
 | |
| 
 | |
| 	blt	5f
 | |
| 	srdi	r6,r5,7
 | |
| 	mtctr	r6
 | |
| 
 | |
| 	/* Now do cacheline (128B) sized loads and stores. */
 | |
| 	.align	5
 | |
| 4:
 | |
| err2;	ld	r0,0(r4)
 | |
| err2;	ld	r6,8(r4)
 | |
| err2;	ld	r8,16(r4)
 | |
| err2;	ld	r9,24(r4)
 | |
| err2;	ld	r10,32(r4)
 | |
| err2;	ld	r11,40(r4)
 | |
| err2;	ld	r12,48(r4)
 | |
| err2;	ld	r14,56(r4)
 | |
| err2;	ld	r15,64(r4)
 | |
| err2;	ld	r16,72(r4)
 | |
| err2;	ld	r17,80(r4)
 | |
| err2;	ld	r18,88(r4)
 | |
| err2;	ld	r19,96(r4)
 | |
| err2;	ld	r20,104(r4)
 | |
| err2;	ld	r21,112(r4)
 | |
| err2;	ld	r22,120(r4)
 | |
| 	addi	r4,r4,128
 | |
| err2;	std	r0,0(r3)
 | |
| err2;	std	r6,8(r3)
 | |
| err2;	std	r8,16(r3)
 | |
| err2;	std	r9,24(r3)
 | |
| err2;	std	r10,32(r3)
 | |
| err2;	std	r11,40(r3)
 | |
| err2;	std	r12,48(r3)
 | |
| err2;	std	r14,56(r3)
 | |
| err2;	std	r15,64(r3)
 | |
| err2;	std	r16,72(r3)
 | |
| err2;	std	r17,80(r3)
 | |
| err2;	std	r18,88(r3)
 | |
| err2;	std	r19,96(r3)
 | |
| err2;	std	r20,104(r3)
 | |
| err2;	std	r21,112(r3)
 | |
| err2;	std	r22,120(r3)
 | |
| 	addi	r3,r3,128
 | |
| 	subi	r7,r7,128
 | |
| 	bdnz	4b
 | |
| 
 | |
| 	clrldi	r5,r5,(64-7)
 | |
| 
 | |
| 	/* Up to 127B to go */
 | |
| 5:	srdi	r6,r5,4
 | |
| 	mtocrf	0x01,r6
 | |
| 
 | |
| 6:	bf	cr7*4+1,7f
 | |
| err2;	ld	r0,0(r4)
 | |
| err2;	ld	r6,8(r4)
 | |
| err2;	ld	r8,16(r4)
 | |
| err2;	ld	r9,24(r4)
 | |
| err2;	ld	r10,32(r4)
 | |
| err2;	ld	r11,40(r4)
 | |
| err2;	ld	r12,48(r4)
 | |
| err2;	ld	r14,56(r4)
 | |
| 	addi	r4,r4,64
 | |
| err2;	std	r0,0(r3)
 | |
| err2;	std	r6,8(r3)
 | |
| err2;	std	r8,16(r3)
 | |
| err2;	std	r9,24(r3)
 | |
| err2;	std	r10,32(r3)
 | |
| err2;	std	r11,40(r3)
 | |
| err2;	std	r12,48(r3)
 | |
| err2;	std	r14,56(r3)
 | |
| 	addi	r3,r3,64
 | |
| 	subi	r7,r7,64
 | |
| 
 | |
| 7:	ld	r14,STK_REG(R14)(r1)
 | |
| 	ld	r15,STK_REG(R15)(r1)
 | |
| 	ld	r16,STK_REG(R16)(r1)
 | |
| 	ld	r17,STK_REG(R17)(r1)
 | |
| 	ld	r18,STK_REG(R18)(r1)
 | |
| 	ld	r19,STK_REG(R19)(r1)
 | |
| 	ld	r20,STK_REG(R20)(r1)
 | |
| 	ld	r21,STK_REG(R21)(r1)
 | |
| 	ld	r22,STK_REG(R22)(r1)
 | |
| 	addi	r1,r1,STACKFRAMESIZE
 | |
| 
 | |
| 	/* Up to 63B to go */
 | |
| 	bf	cr7*4+2,8f
 | |
| err1;	ld	r0,0(r4)
 | |
| err1;	ld	r6,8(r4)
 | |
| err1;	ld	r8,16(r4)
 | |
| err1;	ld	r9,24(r4)
 | |
| 	addi	r4,r4,32
 | |
| err1;	std	r0,0(r3)
 | |
| err1;	std	r6,8(r3)
 | |
| err1;	std	r8,16(r3)
 | |
| err1;	std	r9,24(r3)
 | |
| 	addi	r3,r3,32
 | |
| 	subi	r7,r7,32
 | |
| 
 | |
| 	/* Up to 31B to go */
 | |
| 8:	bf	cr7*4+3,9f
 | |
| err1;	ld	r0,0(r4)
 | |
| err1;	ld	r6,8(r4)
 | |
| 	addi	r4,r4,16
 | |
| err1;	std	r0,0(r3)
 | |
| err1;	std	r6,8(r3)
 | |
| 	addi	r3,r3,16
 | |
| 	subi	r7,r7,16
 | |
| 
 | |
| 9:	clrldi	r5,r5,(64-4)
 | |
| 
 | |
| 	/* Up to 15B to go */
 | |
| .Lshort_copy:
 | |
| 	mtocrf	0x01,r5
 | |
| 	bf	cr7*4+0,12f
 | |
| err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
 | |
| err1;	lwz	r6,4(r4)
 | |
| 	addi	r4,r4,8
 | |
| err1;	stw	r0,0(r3)
 | |
| err1;	stw	r6,4(r3)
 | |
| 	addi	r3,r3,8
 | |
| 	subi	r7,r7,8
 | |
| 
 | |
| 12:	bf	cr7*4+1,13f
 | |
| err1;	lwz	r0,0(r4)
 | |
| 	addi	r4,r4,4
 | |
| err1;	stw	r0,0(r3)
 | |
| 	addi	r3,r3,4
 | |
| 	subi	r7,r7,4
 | |
| 
 | |
| 13:	bf	cr7*4+2,14f
 | |
| err1;	lhz	r0,0(r4)
 | |
| 	addi	r4,r4,2
 | |
| err1;	sth	r0,0(r3)
 | |
| 	addi	r3,r3,2
 | |
| 	subi	r7,r7,2
 | |
| 
 | |
| 14:	bf	cr7*4+3,15f
 | |
| err1;	lbz	r0,0(r4)
 | |
| err1;	stb	r0,0(r3)
 | |
| 
 | |
| 15:	li	r3,0
 | |
| 	blr
 | |
| 
 | |
| EXPORT_SYMBOL_GPL(copy_mc_generic);
 |