aarch64: Drop strchrnul.S revert, apply fix from Richard Earnshaw.

2014-12-10 12:12:56 -05:00 · 2014-12-10 12:12:56 -05:00 · fd8c9e7125
commit fd8c9e7125
parent c13214a08b
3 changed files with 23 additions and 145 deletions
--- a/glibc-aarch64-fix-strchrnul-clobbering-v15.patch
+++ b/glibc-aarch64-fix-strchrnul-clobbering-v15.patch
@ -0,0 +1,19 @@
+commit aa76a5c7010e98c737d79f37aa6ae668f60f7a00
+Author: Richard Earnshaw <rearnsha@arm.com>
+Date:   Wed Dec 10 09:54:09 2014 +0000
+
+    [AArch64] Fix strchrnul clobbering v15
+
+diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S
+index b98c2e9..57835d8 100644
+--- a/sysdeps/aarch64/strchrnul.S
+++ b/sysdeps/aarch64/strchrnul.S
+@@ -46,7 +46,7 @@
+ #define vhas_nul2	v4
+ #define vhas_chr1	v5
+ #define vhas_chr2	v6
+-#define vrepmask	v15
+#define vrepmask	v7
+ #define vend1		v16
+ 
+ /* Core algorithm.
--- a/glibc-aarch64-strchrnul-revert.patch
+++ b/glibc-aarch64-strchrnul-revert.patch
@ -1,144 +0,0 @@
-commit 3ec7d8a6b30659b34693730a374c0265a191c4ba
-Author: Kyle McMartin <kyle@mcmartin.ca>
-Date:   Wed Dec 3 12:26:23 2014 -0500
-
-    Revert "[AArch64] Add optimized strchrnul."
-    
-    This reverts commit be9d4ccc7fe62751db1a5fdcb31958561dbbda9a.
-
-diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S
-deleted file mode 100644
-index b98c2e9..0000000
--- a/sysdeps/aarch64/strchrnul.S
-+++ /dev/null
-@@ -1,130 +0,0 @@
-/* strchrnul - find a character or nul in a string
-
-   Copyright (C) 2014 Free Software Foundation, Inc.
-
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- * Neon Available.
- */
-
-/* Arguments and results.  */
-#define srcin		x0
-#define chrin		w1
-
-#define result		x0
-
-/* Locals and temporaries.  */
-
-#define src		x2
-#define tmp1		x3
-#define wtmp2		w4
-#define tmp3		x5
-
-#define vrepchr		v0
-#define vdata1		v1
-#define vdata2		v2
-#define vhas_nul1	v3
-#define vhas_nul2	v4
-#define vhas_chr1	v5
-#define vhas_chr2	v6
-#define vrepmask	v15
-#define vend1		v16
-
-/* Core algorithm.
-
-   For each 32-byte hunk we calculate a 64-bit syndrome value, with
-   two bits per byte (LSB is always in bits 0 and 1, for both big
-   and little-endian systems).  For each tuple, bit 0 is set iff
-   the relevant byte matched the requested character or nul.  Since the
-   bits in the syndrome reflect exactly the order in which things occur
-   in the original string a count_trailing_zeros() operation will
-   identify exactly which byte is causing the termination.  */
-
-ENTRY (__strchrnul)
-	/* Magic constant 0x40100401 to allow us to identify which lane
-	   matches the termination condition.  */
-	mov	wtmp2, #0x0401
-	movk	wtmp2, #0x4010, lsl #16
-	dup	vrepchr.16b, chrin
-	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
-	dup	vrepmask.4s, wtmp2
-	ands	tmp1, srcin, #31
-	b.eq	L(loop)
-
-	/* Input string is not 32-byte aligned.  Rather than forcing
-	   the padding bytes to a safe value, we calculate the syndrome
-	   for all the bytes, but then mask off those bits of the
-	   syndrome that are related to the padding.  */
-	ld1	{vdata1.16b, vdata2.16b}, [src], #32
-	neg	tmp1, tmp1
-	cmeq	vhas_nul1.16b, vdata1.16b, #0
-	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
-	cmeq	vhas_nul2.16b, vdata2.16b, #0
-	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
-	orr	vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
-	orr	vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
-	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
-	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
-	lsl	tmp1, tmp1, #1
-	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
-	mov	tmp3, #~0
-	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
-	lsr	tmp1, tmp3, tmp1
-
-	mov	tmp3, vend1.2d[0]
-	bic	tmp1, tmp3, tmp1	// Mask padding bits.
-	cbnz	tmp1, L(tail)
-
-L(loop):
-	ld1	{vdata1.16b, vdata2.16b}, [src], #32
-	cmeq	vhas_nul1.16b, vdata1.16b, #0
-	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
-	cmeq	vhas_nul2.16b, vdata2.16b, #0
-	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
-	/* Use a fast check for the termination condition.  */
-	orr	vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
-	orr	vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
-	orr	vend1.16b, vhas_chr1.16b, vhas_chr2.16b
-	addp	vend1.2d, vend1.2d, vend1.2d
-	mov	tmp1, vend1.2d[0]
-	cbz	tmp1, L(loop)
-
-	/* Termination condition found.  Now need to establish exactly why
-	   we terminated.  */
-	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
-	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
-	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b		// 256->128
-	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
-
-	mov	tmp1, vend1.2d[0]
-L(tail):
-	/* Count the trailing zeros, by bit reversing...  */
-	rbit	tmp1, tmp1
-	/* Re-bias source.  */
-	sub	src, src, #32
-	clz	tmp1, tmp1	/* ... and counting the leading zeros.  */
-	/* tmp1 is twice the offset into the fragment.  */
-	add	result, src, tmp1, lsr #1
-	ret
-
-END(__strchrnul)
-weak_alias (__strchrnul, strchrnul)
--- a/glibc.spec
+++ b/glibc.spec
@ -237,7 +237,7 @@ Patch2031: %{name}-rh1070416.patch

 Patch2033: %{name}-aarch64-tls-fixes.patch
 Patch2034: %{name}-aarch64-workaround-nzcv-clobber-in-tlsdesc.patch
-Patch2035: %{name}-aarch64-strchrnul-revert.patch
+Patch2035: %{name}-aarch64-fix-strchrnul-clobbering-v15.patch

 ##############################################################################
 # End of glibc patches.
@ -1745,6 +1745,9 @@ rm -f *.filelist*
 %endif

 %changelog
+* Wed Dec 10 2014 Kyle McMartin <kmcmarti@redhat.com>
+- aarch64: Drop strchrnul.S revert, apply fix from Richard Earnshaw.
+
 * Fri Dec 05 2014 Carlos O'Donell <carlos@redhat.com> - 2.20.90-13
 - Fix permission of debuginfo source files to allow multiarch
  debuginfo packages to be installed and upgraded.