601650f878
* Tue May 31 2022 Arjun Shankar <arjun@redhat.com> - 2.34-35 - Sync with upstream branch release/2.34/master, commit ff450cdbdee0b8cb6b9d653d6d2fa892de29be31: - Fix deadlock when pthread_atfork handler calls pthread_atfork or dlclose - x86: Fallback {str|wcs}cmp RTM in the ncmp overflow case [BZ #29127] - string.h: fix __fortified_attr_access macro call [BZ #29162] - linux: Add a getauxval test [BZ #23293] - rtld: Use generic argv adjustment in ld.so [BZ #23293] - S390: Enable static PIE * Thu May 19 2022 Florian Weimer <fweimer@redhat.com> - 2.34-34 - Sync with upstream branch release/2.34/master, commit ede8d94d154157d269b18f3601440ac576c1f96a: - csu: Implement and use _dl_early_allocate during static startup - Linux: Introduce __brk_call for invoking the brk system call - Linux: Implement a useful version of _startup_fatal - ia64: Always define IA64_USE_NEW_STUB as a flag macro - Linux: Define MMAP_CALL_INTERNAL - i386: Honor I386_USE_SYSENTER for 6-argument Linux system calls - i386: Remove OPTIMIZE_FOR_GCC_5 from Linux libc-do-syscall.S - elf: Remove __libc_init_secure - Linux: Consolidate auxiliary vector parsing (redo) - Linux: Include <dl-auxv.h> in dl-sysdep.c only for SHARED - Revert "Linux: Consolidate auxiliary vector parsing" - Linux: Consolidate auxiliary vector parsing - Linux: Assume that NEED_DL_SYSINFO_DSO is always defined - Linux: Remove DL_FIND_ARG_COMPONENTS - Linux: Remove HAVE_AUX_SECURE, HAVE_AUX_XID, HAVE_AUX_PAGESIZE - elf: Merge dl-sysdep.c into the Linux version - elf: Remove unused NEED_DL_BASE_ADDR and _dl_base_addr - x86: Optimize {str|wcs}rchr-evex - x86: Optimize {str|wcs}rchr-avx2 - x86: Optimize {str|wcs}rchr-sse2 - x86: Cleanup page cross code in memcmp-avx2-movbe.S - x86: Remove memcmp-sse4.S - x86: Small improvements for wcslen - x86: Remove AVX str{n}casecmp - x86: Add EVEX optimized str{n}casecmp - x86: Add AVX2 optimized str{n}casecmp - x86: Optimize str{n}casecmp TOLOWER logic in strcmp-sse42.S - x86: Optimize str{n}casecmp TOLOWER logic in strcmp.S - x86: Remove strspn-sse2.S and use the generic implementation - x86: Remove strpbrk-sse2.S and use the generic implementation - x87: Remove strcspn-sse2.S and use the generic implementation - x86: Optimize strspn in strspn-c.c - x86: Optimize strcspn and strpbrk in strcspn-c.c - x86: Code cleanup in strchr-evex and comment justifying branch - x86: Code cleanup in strchr-avx2 and comment justifying branch - x86_64: Remove bcopy optimizations - x86-64: Remove bzero weak alias in SS2 memset - x86_64/multiarch: Sort sysdep_routines and put one entry per line - x86: Improve L to support L(XXX_SYMBOL (YYY, ZZZ)) - fortify: Ensure that __glibc_fortify condition is a constant [BZ #29141] * Thu May 12 2022 Florian Weimer <fweimer@redhat.com> - 2.34-33 - Sync with upstream branch release/2.34/master, commit 91c2e6c3db44297bf4cb3a2e3c40236c5b6a0b23: - dlfcn: Implement the RTLD_DI_PHDR request type for dlinfo - manual: Document the dlinfo function - x86: Fix fallback for wcsncmp_avx2 in strcmp-avx2.S [BZ #28896] - x86: Fix bug in strncmp-evex and strncmp-avx2 [BZ #28895] - x86: Set .text section in memset-vec-unaligned-erms - x86-64: Optimize bzero - x86: Remove SSSE3 instruction for broadcast in memset.S (SSE2 Only) - x86: Improve vec generation in memset-vec-unaligned-erms.S - x86-64: Fix strcmp-evex.S - x86-64: Fix strcmp-avx2.S - x86: Optimize strcmp-evex.S - x86: Optimize strcmp-avx2.S - manual: Clarify that abbreviations of long options are allowed - Add HWCAP2_AFP, HWCAP2_RPRES from Linux 5.17 to AArch64 bits/hwcap.h - aarch64: Add HWCAP2_ECV from Linux 5.16 - Add SOL_MPTCP, SOL_MCTP from Linux 5.16 to bits/socket.h - Update kernel version to 5.17 in tst-mman-consts.py - Update kernel version to 5.16 in tst-mman-consts.py - Update syscall lists for Linux 5.17 - Add ARPHRD_CAN, ARPHRD_MCTP to net/if_arp.h - Update kernel version to 5.15 in tst-mman-consts.py - Add PF_MCTP, AF_MCTP from Linux 5.15 to bits/socket.h Resolves: #2091541
165 lines
5.7 KiB
Diff
165 lines
5.7 KiB
Diff
commit 0dafa75e3c42994d0f23db62651d1802577272f2
|
|
Author: Noah Goldstein <goldstein.w.n@gmail.com>
|
|
Date: Wed Mar 23 16:57:26 2022 -0500
|
|
|
|
x86: Remove strcspn-sse2.S and use the generic implementation
|
|
|
|
The generic implementation is faster.
|
|
|
|
geometric_mean(N=20) of all benchmarks New / Original: .678
|
|
|
|
All string/memory tests pass.
|
|
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
|
|
|
(cherry picked from commit fe28e7d9d9535ebab4081d195c553b4fbf39d9ae)
|
|
|
|
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.S b/sysdeps/x86_64/multiarch/strcspn-sse2.c
|
|
similarity index 89%
|
|
rename from sysdeps/x86_64/multiarch/strcspn-sse2.S
|
|
rename to sysdeps/x86_64/multiarch/strcspn-sse2.c
|
|
index 63b260a9ed265230..9bd3dac82d90b3a5 100644
|
|
--- a/sysdeps/x86_64/multiarch/strcspn-sse2.S
|
|
+++ b/sysdeps/x86_64/multiarch/strcspn-sse2.c
|
|
@@ -19,10 +19,10 @@
|
|
#if IS_IN (libc)
|
|
|
|
# include <sysdep.h>
|
|
-# define strcspn __strcspn_sse2
|
|
+# define STRCSPN __strcspn_sse2
|
|
|
|
# undef libc_hidden_builtin_def
|
|
-# define libc_hidden_builtin_def(strcspn)
|
|
+# define libc_hidden_builtin_def(STRCSPN)
|
|
#endif
|
|
|
|
-#include <sysdeps/x86_64/strcspn.S>
|
|
+#include <string/strcspn.c>
|
|
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
|
|
deleted file mode 100644
|
|
index 6035a274c87bafb0..0000000000000000
|
|
--- a/sysdeps/x86_64/strcspn.S
|
|
+++ /dev/null
|
|
@@ -1,122 +0,0 @@
|
|
-/* strcspn (str, ss) -- Return the length of the initial segment of STR
|
|
- which contains no characters from SS.
|
|
- For AMD x86-64.
|
|
- Copyright (C) 1994-2021 Free Software Foundation, Inc.
|
|
- This file is part of the GNU C Library.
|
|
- Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
|
|
- Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
|
|
- Adopted for x86-64 by Andreas Jaeger <aj@suse.de>.
|
|
-
|
|
- The GNU C Library is free software; you can redistribute it and/or
|
|
- modify it under the terms of the GNU Lesser General Public
|
|
- License as published by the Free Software Foundation; either
|
|
- version 2.1 of the License, or (at your option) any later version.
|
|
-
|
|
- The GNU C Library is distributed in the hope that it will be useful,
|
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
- Lesser General Public License for more details.
|
|
-
|
|
- You should have received a copy of the GNU Lesser General Public
|
|
- License along with the GNU C Library; if not, see
|
|
- <https://www.gnu.org/licenses/>. */
|
|
-
|
|
-#include <sysdep.h>
|
|
-#include "asm-syntax.h"
|
|
-
|
|
- .text
|
|
-ENTRY (strcspn)
|
|
-
|
|
- movq %rdi, %rdx /* Save SRC. */
|
|
-
|
|
- /* First we create a table with flags for all possible characters.
|
|
- For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
|
|
- supported by the C string functions we have 256 characters.
|
|
- Before inserting marks for the stop characters we clear the whole
|
|
- table. */
|
|
- movq %rdi, %r8 /* Save value. */
|
|
- subq $256, %rsp /* Make space for 256 bytes. */
|
|
- cfi_adjust_cfa_offset(256)
|
|
- movl $32, %ecx /* 32*8 bytes = 256 bytes. */
|
|
- movq %rsp, %rdi
|
|
- xorl %eax, %eax /* We store 0s. */
|
|
- cld
|
|
- rep
|
|
- stosq
|
|
-
|
|
- movq %rsi, %rax /* Setup skipset. */
|
|
-
|
|
-/* For understanding the following code remember that %rcx == 0 now.
|
|
- Although all the following instruction only modify %cl we always
|
|
- have a correct zero-extended 64-bit value in %rcx. */
|
|
-
|
|
- .p2align 4
|
|
-L(2): movb (%rax), %cl /* get byte from skipset */
|
|
- testb %cl, %cl /* is NUL char? */
|
|
- jz L(1) /* yes => start compare loop */
|
|
- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
|
|
-
|
|
- movb 1(%rax), %cl /* get byte from skipset */
|
|
- testb $0xff, %cl /* is NUL char? */
|
|
- jz L(1) /* yes => start compare loop */
|
|
- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
|
|
-
|
|
- movb 2(%rax), %cl /* get byte from skipset */
|
|
- testb $0xff, %cl /* is NUL char? */
|
|
- jz L(1) /* yes => start compare loop */
|
|
- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
|
|
-
|
|
- movb 3(%rax), %cl /* get byte from skipset */
|
|
- addq $4, %rax /* increment skipset pointer */
|
|
- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
|
|
- testb $0xff, %cl /* is NUL char? */
|
|
- jnz L(2) /* no => process next dword from skipset */
|
|
-
|
|
-L(1): leaq -4(%rdx), %rax /* prepare loop */
|
|
-
|
|
- /* We use a neat trick for the following loop. Normally we would
|
|
- have to test for two termination conditions
|
|
- 1. a character in the skipset was found
|
|
- and
|
|
- 2. the end of the string was found
|
|
- But as a sign that the character is in the skipset we store its
|
|
- value in the table. But the value of NUL is NUL so the loop
|
|
- terminates for NUL in every case. */
|
|
-
|
|
- .p2align 4
|
|
-L(3): addq $4, %rax /* adjust pointer for full loop round */
|
|
-
|
|
- movb (%rax), %cl /* get byte from string */
|
|
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
|
- je L(4) /* yes => return */
|
|
-
|
|
- movb 1(%rax), %cl /* get byte from string */
|
|
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
|
- je L(5) /* yes => return */
|
|
-
|
|
- movb 2(%rax), %cl /* get byte from string */
|
|
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
|
- jz L(6) /* yes => return */
|
|
-
|
|
- movb 3(%rax), %cl /* get byte from string */
|
|
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
|
- jne L(3) /* no => start loop again */
|
|
-
|
|
- incq %rax /* adjust pointer */
|
|
-L(6): incq %rax
|
|
-L(5): incq %rax
|
|
-
|
|
-L(4): addq $256, %rsp /* remove skipset */
|
|
- cfi_adjust_cfa_offset(-256)
|
|
-#ifdef USE_AS_STRPBRK
|
|
- xorl %edx,%edx
|
|
- orb %cl, %cl /* was last character NUL? */
|
|
- cmovzq %rdx, %rax /* Yes: return NULL */
|
|
-#else
|
|
- subq %rdx, %rax /* we have to return the number of valid
|
|
- characters, so compute distance to first
|
|
- non-valid character */
|
|
-#endif
|
|
- ret
|
|
-END (strcspn)
|
|
-libc_hidden_builtin_def (strcspn)
|