Auto-sync with upstream branch master

Upstream commit: dd32e1db386c77c61850a7cbd0c126b7b3c63ece - Revert "x86: Prepare `strrchr-evex` and `strrchr-evex512` for AVX10" (#2244688) - Revert "elf: Always call destructors in reverse constructor order (bug 30785)" - Revert "elf: Fix compile error with -DNDEBUG [BZ #18755]" - Add strlcat/wcslcat testcase. - Add strlcpy/wcslcpy testcase - Add LE DSCP code point from RFC-8622. - Add HWCAP2_MOPS from Linux 6.5 to AArch64 bits/hwcap.h - Add SCM_SECURITY, SCM_PIDFD to bits/socket.h - Add AT_HANDLE_FID from Linux 6.5 to bits/fcntl-linux.h - Avoid maybe-uninitialized warning in __kernel_rem_pio2 - Fix WAIT_FOR_DEBUGGER for container tests.
2023-10-18 11:41:45 +02:00 · 2023-10-18 11:41:45 +02:00 · 2e44c0f9f3
commit 2e44c0f9f3
parent dff2117453
3 changed files with 986 additions and 3 deletions
--- a/glibc-rh2244688.patch
+++ b/glibc-rh2244688.patch
@ -0,0 +1,967 @@
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Wed Oct 18 11:12:29 2023 +0200
+
+    Revert "x86: Prepare `strrchr-evex` and `strrchr-evex512` for AVX10"
+    
+    This reverts commit a3c50bf46a1ca6d9d2b7d879176d345abf95a9de.
+
+diff --git a/sysdeps/x86_64/multiarch/strrchr-evex-base.S b/sysdeps/x86_64/multiarch/strrchr-evex-base.S
+index cd6a0a870a02b9bd..58b2853ab69265e8 100644
+--- a/sysdeps/x86_64/multiarch/strrchr-evex-base.S
+++ b/sysdeps/x86_64/multiarch/strrchr-evex-base.S
+@@ -1,4 +1,4 @@
+-/* Implementation for strrchr using evex256 and evex512.
+/* Placeholder function, not used by any processor at the moment.
+    Copyright (C) 2022-2023 Free Software Foundation, Inc.
+    This file is part of the GNU C Library.
+ 
+@@ -16,6 +16,8 @@
+    License along with the GNU C Library; if not, see
+    <https://www.gnu.org/licenses/>.  */
+ 
+/* UNUSED. Exists purely as reference implementation.  */
+
+ #include <isa-level.h>
+ 
+ #if ISA_SHOULD_BUILD (4)
+@@ -23,351 +25,240 @@
+ # include <sysdep.h>
+ 
+ # ifdef USE_AS_WCSRCHR
+-#  if VEC_SIZE == 64
+-#   define RCX_M	cx
+-#   define KORTEST_M	kortestw
+-#  else
+-#   define RCX_M	cl
+-#   define KORTEST_M	kortestb
+-#  endif
+-
+-#  define SHIFT_REG	VRCX
+ #  define CHAR_SIZE	4
+-#  define VPCMP		vpcmpd
+-#  define VPMIN		vpminud
+-#  define VPCOMPRESS	vpcompressd
+-#  define VPTESTN	vptestnmd
+-#  define VPTEST	vptestmd
+-#  define VPBROADCAST	vpbroadcastd
+#  define VPBROADCAST   vpbroadcastd
+ #  define VPCMPEQ	vpcmpeqd
+-
+#  define VPMINU	vpminud
+#  define VPTESTN	vptestnmd
+ # else
+-#  define SHIFT_REG	VRDI
+ #  define CHAR_SIZE	1
+-#  define VPCMP		vpcmpb
+-#  define VPMIN		vpminub
+-#  define VPCOMPRESS	vpcompressb
+-#  define VPTESTN	vptestnmb
+-#  define VPTEST	vptestmb
+-#  define VPBROADCAST	vpbroadcastb
+#  define VPBROADCAST   vpbroadcastb
+ #  define VPCMPEQ	vpcmpeqb
+-
+-#  define RCX_M		VRCX
+-#  define KORTEST_M	KORTEST
+#  define VPMINU	vpminub
+#  define VPTESTN	vptestnmb
+ # endif
+ 
+-# define VMATCH		VMM(0)
+-# define CHAR_PER_VEC	(VEC_SIZE / CHAR_SIZE)
+ # define PAGE_SIZE	4096
+# define CHAR_PER_VEC	(VEC_SIZE / CHAR_SIZE)
+ 
+ 	.section SECTION(.text), "ax", @progbits
+-	/* Aligning entry point to 64 byte, provides better performance for
+-	   one vector length string.  */
+-ENTRY_P2ALIGN(STRRCHR, 6)
+-	movl	%edi, %eax
+-	/* Broadcast CHAR to VMATCH.  */
+-	VPBROADCAST %esi, %VMATCH
+/* Aligning entry point to 64 byte, provides better performance for
+   one vector length string.  */
+ENTRY_P2ALIGN (STRRCHR, 6)
+ 
+-	andl	$(PAGE_SIZE - 1), %eax
+-	cmpl	$(PAGE_SIZE - VEC_SIZE), %eax
+-	jg	L(cross_page_boundary)
+	/* Broadcast CHAR to VMM(0).  */
+	VPBROADCAST %esi, %VMM(0)
+	movl	%edi, %eax
+	sall	$20, %eax
+	cmpl	$((PAGE_SIZE - VEC_SIZE) << 20), %eax
+	ja	L(page_cross)
+ 
+L(page_cross_continue):
+	/* Compare [w]char for null, mask bit will be set for match.  */
+ 	VMOVU	(%rdi), %VMM(1)
+-	/* k0 has a 1 for each zero CHAR in YMM1.  */
+-	VPTESTN	%VMM(1), %VMM(1), %k0
+-	KMOV	%k0, %VGPR(rsi)
+-	test	%VGPR(rsi), %VGPR(rsi)
+-	jz	L(aligned_more)
+-	/* fallthrough: zero CHAR in first VEC.  */
+-L(page_cross_return):
+-	/* K1 has a 1 for each search CHAR match in VEC(1).  */
+-	VPCMPEQ	%VMATCH, %VMM(1), %k1
+-	KMOV	%k1, %VGPR(rax)
+-	/* Build mask up until first zero CHAR (used to mask of
+-	   potential search CHAR matches past the end of the string).  */
+-	blsmsk	%VGPR(rsi), %VGPR(rsi)
+-	/* Use `and` here to remove any out of bounds matches so we can
+-	   do a reverse scan on `rax` to find the last match.  */
+-	and	%VGPR(rsi), %VGPR(rax)
+-	jz	L(ret0)
+-	/* Get last match.  */
+-	bsr	%VGPR(rax), %VGPR(rax)
+
+	VPTESTN	%VMM(1), %VMM(1), %k1
+	KMOV	%k1, %VRCX
+	test	%VRCX, %VRCX
+	jz	L(align_more)
+
+	VPCMPEQ	%VMM(1), %VMM(0), %k0
+	KMOV	%k0, %VRAX
+	BLSMSK	%VRCX, %VRCX
+	and	%VRCX, %VRAX
+	jz	L(ret)
+
+	BSR	%VRAX, %VRAX
+ # ifdef USE_AS_WCSRCHR
+ 	leaq	(%rdi, %rax, CHAR_SIZE), %rax
+ # else
+-	addq	%rdi, %rax
+	add	%rdi, %rax
+ # endif
+-L(ret0):
+L(ret):
+ 	ret
+ 
+-	/* Returns for first vec x1/x2/x3 have hard coded backward
+-	   search path for earlier matches.  */
+-	.p2align 4,, 6
+-L(first_vec_x1):
+-	VPCMPEQ	%VMATCH, %VMM(2), %k1
+-	KMOV	%k1, %VGPR(rax)
+-	blsmsk	%VGPR(rcx), %VGPR(rcx)
+-	/* eax non-zero if search CHAR in range.  */
+-	and	%VGPR(rcx), %VGPR(rax)
+-	jnz	L(first_vec_x1_return)
+-
+-	/* fallthrough: no match in YMM2 then need to check for earlier
+-	   matches (in YMM1).  */
+-	.p2align 4,, 4
+-L(first_vec_x0_test):
+-	VPCMPEQ	%VMATCH, %VMM(1), %k1
+-	KMOV	%k1, %VGPR(rax)
+-	test	%VGPR(rax), %VGPR(rax)
+-	jz	L(ret1)
+-	bsr	%VGPR(rax), %VGPR(rax)
+L(vector_x2_end):
+	VPCMPEQ	%VMM(2), %VMM(0), %k2
+	KMOV	%k2, %VRAX
+	BLSMSK	%VRCX, %VRCX
+	and	%VRCX, %VRAX
+	jz	L(vector_x1_ret)
+
+	BSR	%VRAX, %VRAX
+	leaq	(VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax
+	ret
+
+	/* Check the first vector at very last to look for match.  */
+L(vector_x1_ret):
+	VPCMPEQ %VMM(1), %VMM(0), %k2
+	KMOV	%k2, %VRAX
+	test	%VRAX, %VRAX
+	jz	L(ret)
+
+	BSR	%VRAX, %VRAX
+ # ifdef USE_AS_WCSRCHR
+ 	leaq	(%rsi, %rax, CHAR_SIZE), %rax
+ # else
+-	addq	%rsi, %rax
+	add	%rsi, %rax
+ # endif
+-L(ret1):
+ 	ret
+ 
+-	.p2align 4,, 10
+-L(first_vec_x3):
+-	VPCMPEQ	%VMATCH, %VMM(4), %k1
+-	KMOV	%k1, %VGPR(rax)
+-	blsmsk	%VGPR(rcx), %VGPR(rcx)
+-	/* If no search CHAR match in range check YMM1/YMM2/YMM3.  */
+-	and	%VGPR(rcx), %VGPR(rax)
+-	jz	L(first_vec_x1_or_x2)
+-	bsr	%VGPR(rax), %VGPR(rax)
+-	leaq	(VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
+-	ret
+-	.p2align 4,, 4
+-
+-L(first_vec_x2):
+-	VPCMPEQ	%VMATCH, %VMM(3), %k1
+-	KMOV	%k1, %VGPR(rax)
+-	blsmsk	%VGPR(rcx), %VGPR(rcx)
+-	/* Check YMM3 for last match first. If no match try YMM2/YMM1.  */
+-	and	%VGPR(rcx), %VGPR(rax)
+-	jz	L(first_vec_x0_x1_test)
+-	bsr	%VGPR(rax), %VGPR(rax)
+-	leaq	(VEC_SIZE * 2)(%r8, %rax, CHAR_SIZE), %rax
+-	ret
+-
+-	.p2align 4,, 6
+-L(first_vec_x0_x1_test):
+-	VPCMPEQ	%VMATCH, %VMM(2), %k1
+-	KMOV	%k1, %VGPR(rax)
+-	/* Check YMM2 for last match first. If no match try YMM1.  */
+-	test	%VGPR(rax), %VGPR(rax)
+-	jz	L(first_vec_x0_test)
+-	.p2align 4,, 4
+-L(first_vec_x1_return):
+-	bsr	%VGPR(rax), %VGPR(rax)
+-	leaq	(VEC_SIZE)(%r8, %rax, CHAR_SIZE), %rax
+-	ret
+-
+-	.p2align 4,, 12
+-L(aligned_more):
+-L(page_cross_continue):
+-	/* Need to keep original pointer incase VEC(1) has last match.  */
+L(align_more):
+	/* Zero r8 to store match result.  */
+	xorl	%r8d, %r8d
+	/* Save pointer of first vector, in case if no match found.  */
+ 	movq	%rdi, %rsi
+	/* Align pointer to vector size.  */
+ 	andq	$-VEC_SIZE, %rdi
+-
+-	VMOVU	VEC_SIZE(%rdi), %VMM(2)
+	/* Loop unroll for 2 vector loop.  */
+	VMOVA	(VEC_SIZE)(%rdi), %VMM(2)
+ 	VPTESTN	%VMM(2), %VMM(2), %k0
+ 	KMOV	%k0, %VRCX
+-	movq	%rdi, %r8
+ 	test	%VRCX, %VRCX
+-	jnz	L(first_vec_x1)
+-
+-	VMOVU	(VEC_SIZE * 2)(%rdi), %VMM(3)
+-	VPTESTN	%VMM(3), %VMM(3), %k0
+-	KMOV	%k0, %VRCX
+-
+-	test	%VRCX, %VRCX
+-	jnz	L(first_vec_x2)
+-
+-	VMOVU	(VEC_SIZE * 3)(%rdi), %VMM(4)
+-	VPTESTN	%VMM(4), %VMM(4), %k0
+-	KMOV	%k0, %VRCX
+-
+-	/* Intentionally use 64-bit here.  EVEX256 version needs 1-byte
+-	   padding for efficient nop before loop alignment.  */
+-	test	%rcx, %rcx
+-	jnz	L(first_vec_x3)
+	jnz	L(vector_x2_end)
+ 
+	/* Save pointer of second vector, in case if no match
+	   found.  */
+	movq	%rdi, %r9
+	/* Align address to VEC_SIZE * 2 for loop.  */
+ 	andq	$-(VEC_SIZE * 2), %rdi
+-	.p2align 4
+-L(first_aligned_loop):
+-	/* Preserve VEC(1), VEC(2), VEC(3), and VEC(4) until we can
+-	   gurantee they don't store a match.  */
+-	VMOVA	(VEC_SIZE * 4)(%rdi), %VMM(5)
+-	VMOVA	(VEC_SIZE * 5)(%rdi), %VMM(6)
+-
+-	VPCMP	$4, %VMM(5), %VMATCH, %k2
+-	VPCMP	$4, %VMM(6), %VMATCH, %k3{%k2}
+-
+-	VPMIN	%VMM(5), %VMM(6), %VMM(7)
+ 
+-	VPTEST	%VMM(7), %VMM(7), %k1{%k3}
+-	subq	$(VEC_SIZE * -2), %rdi
+-	KORTEST_M %k1, %k1
+-	jc	L(first_aligned_loop)
+	.p2align 4,,11
+L(loop):
+	/* 2 vector loop, as it provide better performance as compared
+	   to 4 vector loop.  */
+	VMOVA	(VEC_SIZE * 2)(%rdi), %VMM(3)
+	VMOVA	(VEC_SIZE * 3)(%rdi), %VMM(4)
+	VPCMPEQ	%VMM(3), %VMM(0), %k1
+	VPCMPEQ	%VMM(4), %VMM(0), %k2
+	VPMINU	%VMM(3), %VMM(4), %VMM(5)
+	VPTESTN	%VMM(5), %VMM(5), %k0
+	KOR	%k1, %k2, %k3
+	subq	$-(VEC_SIZE * 2), %rdi
+	/* If k0 and k3 zero, match and end of string not found.  */
+	KORTEST	%k0, %k3
+	jz	L(loop)
+
+	/* If k0 is non zero, end of string found.  */
+	KORTEST %k0, %k0
+	jnz	L(endloop)
+
+	lea	VEC_SIZE(%rdi), %r8
+	/* A match found, it need to be stored in r8 before loop
+	   continue.  */
+	/* Check second vector first.  */
+	KMOV	%k2, %VRDX
+	test	%VRDX, %VRDX
+	jnz	L(loop_vec_x2_match)
+ 
+-	VPTESTN	%VMM(7), %VMM(7), %k1
+ 	KMOV	%k1, %VRDX
+-	test	%VRDX, %VRDX
+-	jz	L(second_aligned_loop_prep)
+	/* Match is in first vector, rdi offset need to be subtracted
+	  by VEC_SIZE.  */
+	sub	$VEC_SIZE, %r8
+
+	/* If second vector doesn't have match, first vector must
+	   have match.  */
+L(loop_vec_x2_match):
+	BSR	%VRDX, %VRDX
+# ifdef USE_AS_WCSRCHR
+	sal	$2, %rdx
+# endif
+	add	%rdx, %r8
+	jmp	L(loop)
+ 
+-	KORTEST_M %k3, %k3
+-	jnc	L(return_first_aligned_loop)
+L(endloop):
+	/* Check if string end in first loop vector.  */
+	VPTESTN	%VMM(3), %VMM(3), %k0
+	KMOV	%k0, %VRCX
+	test	%VRCX, %VRCX
+	jnz	L(loop_vector_x1_end)
+ 
+-	.p2align 4,, 6
+-L(first_vec_x1_or_x2_or_x3):
+-	VPCMPEQ	%VMM(4), %VMATCH, %k4
+-	KMOV	%k4, %VRAX
+	/* Check if it has match in first loop vector.  */
+	KMOV	%k1, %VRAX
+ 	test	%VRAX, %VRAX
+-	jz	L(first_vec_x1_or_x2)
+-	bsr	%VRAX, %VRAX
+-	leaq	(VEC_SIZE * 3)(%r8, %rax, CHAR_SIZE), %rax
+-	ret
+	jz	L(loop_vector_x2_end)
+ 
+-	.p2align 4,, 8
+-L(return_first_aligned_loop):
+-	VPTESTN	%VMM(5), %VMM(5), %k0
+	BSR	%VRAX, %VRAX
+	leaq	(%rdi, %rax, CHAR_SIZE), %r8
+
+	/* String must end in second loop vector.  */
+L(loop_vector_x2_end):
+	VPTESTN	%VMM(4), %VMM(4), %k0
+ 	KMOV	%k0, %VRCX
+-	blsmsk	%VRCX, %VRCX
+-	jnc	L(return_first_new_match_first)
+-	blsmsk	%VRDX, %VRDX
+-	VPCMPEQ	%VMM(6), %VMATCH, %k0
+-	KMOV	%k0, %VRAX
+-	addq	$VEC_SIZE, %rdi
+-	and	%VRDX, %VRAX
+-	jnz	L(return_first_new_match_ret)
+-	subq	$VEC_SIZE, %rdi
+-L(return_first_new_match_first):
+ 	KMOV	%k2, %VRAX
+-# ifdef USE_AS_WCSRCHR
+-	xorl	$((1 << CHAR_PER_VEC)- 1), %VRAX
+	BLSMSK	%VRCX, %VRCX
+	/* Check if it has match in second loop vector.  */
+ 	and	%VRCX, %VRAX
+-# else
+-	andn	%VRCX, %VRAX, %VRAX
+-# endif
+-	jz	L(first_vec_x1_or_x2_or_x3)
+-L(return_first_new_match_ret):
+-	bsr	%VRAX, %VRAX
+-	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+-	ret
+	jz	L(check_last_match)
+ 
+-	.p2align 4,, 10
+-L(first_vec_x1_or_x2):
+-	VPCMPEQ	%VMM(3), %VMATCH, %k3
+-	KMOV	%k3, %VRAX
+-	test	%VRAX, %VRAX
+-	jz	L(first_vec_x0_x1_test)
+-	bsr	%VRAX, %VRAX
+-	leaq	(VEC_SIZE * 2)(%r8, %rax, CHAR_SIZE), %rax
+	BSR	%VRAX, %VRAX
+	leaq	(VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax
+ 	ret
+ 
+-	.p2align 4
+-	/* We can throw away the work done for the first 4x checks here
+-	   as we have a later match. This is the 'fast' path persay.  */
+-L(second_aligned_loop_prep):
+-L(second_aligned_loop_set_furthest_match):
+-	movq	%rdi, %rsi
+-	VMOVA	%VMM(5), %VMM(7)
+-	VMOVA	%VMM(6), %VMM(8)
+-	.p2align 4
+-L(second_aligned_loop):
+-	VMOVU	(VEC_SIZE * 4)(%rdi), %VMM(5)
+-	VMOVU	(VEC_SIZE * 5)(%rdi), %VMM(6)
+-	VPCMP	$4, %VMM(5), %VMATCH, %k2
+-	VPCMP	$4, %VMM(6), %VMATCH, %k3{%k2}
+-
+-	VPMIN	%VMM(5), %VMM(6), %VMM(4)
+-
+-	VPTEST	%VMM(4), %VMM(4), %k1{%k3}
+-	subq	$(VEC_SIZE * -2), %rdi
+-	KMOV	%k1, %VRCX
+-	inc	%RCX_M
+-	jz	L(second_aligned_loop)
+-	VPTESTN	%VMM(4), %VMM(4), %k1
+-	KMOV	%k1, %VRDX
+-	test	%VRDX, %VRDX
+-	jz	L(second_aligned_loop_set_furthest_match)
+-
+-	KORTEST_M %k3, %k3
+-	jnc	L(return_new_match)
+-	/* branch here because there is a significant advantage interms
+-	   of output dependency chance in using edx.  */
+	/* String end in first loop vector.  */
+L(loop_vector_x1_end):
+	KMOV	%k1, %VRAX
+	BLSMSK	%VRCX, %VRCX
+	/* Check if it has match in second loop vector.  */
+	and	%VRCX, %VRAX
+	jz	L(check_last_match)
+ 
+-L(return_old_match):
+-	VPCMPEQ	%VMM(8), %VMATCH, %k0
+-	KMOV	%k0, %VRCX
+-	bsr	%VRCX, %VRCX
+-	jnz	L(return_old_match_ret)
+	BSR	%VRAX, %VRAX
+	leaq	(%rdi, %rax, CHAR_SIZE), %rax
+	ret
+ 
+-	VPCMPEQ	%VMM(7), %VMATCH, %k0
+-	KMOV	%k0, %VRCX
+-	bsr	%VRCX, %VRCX
+-	subq	$VEC_SIZE, %rsi
+-L(return_old_match_ret):
+-	leaq	(VEC_SIZE * 3)(%rsi, %rcx, CHAR_SIZE), %rax
+	/* No match in first and second loop vector.  */
+L(check_last_match):
+	/* Check if any match recorded in r8.  */
+	test	%r8, %r8
+	jz	L(vector_x2_ret)
+	movq	%r8, %rax
+ 	ret
+ 
+-L(return_new_match):
+-	VPTESTN	%VMM(5), %VMM(5), %k0
+-	KMOV	%k0, %VRCX
+-	blsmsk	%VRCX, %VRCX
+-	jnc	L(return_new_match_first)
+-	dec	%VRDX
+-	VPCMPEQ	%VMM(6), %VMATCH, %k0
+-	KMOV	%k0, %VRAX
+-	addq	$VEC_SIZE, %rdi
+-	and	%VRDX, %VRAX
+-	jnz	L(return_new_match_ret)
+-	subq	$VEC_SIZE, %rdi
+-L(return_new_match_first):
+	/* No match recorded in r8. Check the second saved vector
+	   in beginning.  */
+L(vector_x2_ret):
+	VPCMPEQ %VMM(2), %VMM(0), %k2
+ 	KMOV	%k2, %VRAX
+-# ifdef USE_AS_WCSRCHR
+-	xorl	$((1 << CHAR_PER_VEC)- 1), %VRAX
+-	and	%VRCX, %VRAX
+-# else
+-	andn	%VRCX, %VRAX, %VRAX
+-# endif
+-	jz	L(return_old_match)
+-L(return_new_match_ret):
+-	bsr	%VRAX, %VRAX
+-	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+	test	%VRAX, %VRAX
+	jz	L(vector_x1_ret)
+
+	/* Match found in the second saved vector.  */
+	BSR	%VRAX, %VRAX
+	leaq	(VEC_SIZE)(%r9, %rax, CHAR_SIZE), %rax
+ 	ret
+ 
+-	.p2align 4,, 4
+-L(cross_page_boundary):
+-	xorq	%rdi, %rax
+-	mov	$-1, %VRDX
+-	VMOVU	(PAGE_SIZE - VEC_SIZE)(%rax), %VMM(6)
+-	VPTESTN	%VMM(6), %VMM(6), %k0
+-	KMOV	%k0, %VRSI
+L(page_cross):
+	mov	%rdi, %rax
+	movl	%edi, %ecx
+ 
+ # ifdef USE_AS_WCSRCHR
+-	movl	%edi, %ecx
+-	and	$(VEC_SIZE - 1), %ecx
+-	shrl	$2, %ecx
+	/* Calculate number of compare result bits to be skipped for
+	   wide string alignment adjustment.  */
+	andl	$(VEC_SIZE - 1), %ecx
+	sarl	$2, %ecx
+ # endif
+-	shlx	%SHIFT_REG, %VRDX, %VRDX
+-
+	/* ecx contains number of w[char] to be skipped as a result
+	   of address alignment.  */
+	andq    $-VEC_SIZE, %rax
+	VMOVA	(%rax), %VMM(1)
+	VPTESTN	%VMM(1), %VMM(1), %k1
+	KMOV	%k1, %VRAX
+	SHR     %cl, %VRAX
+	jz	L(page_cross_continue)
+	VPCMPEQ	%VMM(1), %VMM(0), %k0
+	KMOV	%k0, %VRDX
+	SHR     %cl, %VRDX
+	BLSMSK	%VRAX, %VRAX
+	and	%VRDX, %VRAX
+	jz	L(ret)
+	BSR	%VRAX, %VRAX
+ # ifdef USE_AS_WCSRCHR
+-	kmovw	%edx, %k1
+	leaq	(%rdi, %rax, CHAR_SIZE), %rax
+ # else
+-	KMOV	%VRDX, %k1
+	add	%rdi, %rax
+ # endif
+ 
+-	VPCOMPRESS %VMM(6), %VMM(1){%k1}{z}
+-	/* We could technically just jmp back after the vpcompress but
+-	   it doesn't save any 16-byte blocks.  */
+-	shrx	%SHIFT_REG, %VRSI, %VRSI
+-	test	%VRSI, %VRSI
+-	jnz	L(page_cross_return)
+-	jmp	L(page_cross_continue)
+-	/* 1-byte from cache line.  */
+-END(STRRCHR)
+	ret
+END (STRRCHR)
+ #endif
+diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S
+index 3bf6a5101422e4d1..85e3b0119f5dc923 100644
+--- a/sysdeps/x86_64/multiarch/strrchr-evex.S
+++ b/sysdeps/x86_64/multiarch/strrchr-evex.S
+@@ -1,8 +1,394 @@
+/* strrchr/wcsrchr optimized with 256-bit EVEX instructions.
+   Copyright (C) 2021-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
+
+# include <sysdep.h>
+
+ # ifndef STRRCHR
+ #  define STRRCHR	__strrchr_evex
+ # endif
+ 
+-#include "x86-evex256-vecs.h"
+-#include "reg-macros.h"
+# include "x86-evex256-vecs.h"
+
+# ifdef USE_AS_WCSRCHR
+#  define SHIFT_REG	rsi
+#  define kunpck_2x	kunpckbw
+#  define kmov_2x	kmovd
+#  define maskz_2x	ecx
+#  define maskm_2x	eax
+#  define CHAR_SIZE	4
+#  define VPMIN	vpminud
+#  define VPTESTN	vptestnmd
+#  define VPTEST	vptestmd
+#  define VPBROADCAST	vpbroadcastd
+#  define VPCMPEQ	vpcmpeqd
+#  define VPCMP	vpcmpd
+
+#  define USE_WIDE_CHAR
+# else
+#  define SHIFT_REG	rdi
+#  define kunpck_2x	kunpckdq
+#  define kmov_2x	kmovq
+#  define maskz_2x	rcx
+#  define maskm_2x	rax
+
+#  define CHAR_SIZE	1
+#  define VPMIN	vpminub
+#  define VPTESTN	vptestnmb
+#  define VPTEST	vptestmb
+#  define VPBROADCAST	vpbroadcastb
+#  define VPCMPEQ	vpcmpeqb
+#  define VPCMP	vpcmpb
+# endif
+
+# include "reg-macros.h"
+
+# define VMATCH	VMM(0)
+# define CHAR_PER_VEC	(VEC_SIZE / CHAR_SIZE)
+# define PAGE_SIZE	4096
+
+	.section SECTION(.text), "ax", @progbits
+ENTRY_P2ALIGN(STRRCHR, 6)
+	movl	%edi, %eax
+	/* Broadcast CHAR to VMATCH.  */
+	VPBROADCAST %esi, %VMATCH
+
+	andl	$(PAGE_SIZE - 1), %eax
+	cmpl	$(PAGE_SIZE - VEC_SIZE), %eax
+	jg	L(cross_page_boundary)
+L(page_cross_continue):
+	VMOVU	(%rdi), %VMM(1)
+	/* k0 has a 1 for each zero CHAR in VEC(1).  */
+	VPTESTN	%VMM(1), %VMM(1), %k0
+	KMOV	%k0, %VRSI
+	test	%VRSI, %VRSI
+	jz	L(aligned_more)
+	/* fallthrough: zero CHAR in first VEC.  */
+	/* K1 has a 1 for each search CHAR match in VEC(1).  */
+	VPCMPEQ	%VMATCH, %VMM(1), %k1
+	KMOV	%k1, %VRAX
+	/* Build mask up until first zero CHAR (used to mask of
+	   potential search CHAR matches past the end of the string).
+	 */
+	blsmsk	%VRSI, %VRSI
+	and	%VRSI, %VRAX
+	jz	L(ret0)
+	/* Get last match (the `and` removed any out of bounds matches).
+	 */
+	bsr	%VRAX, %VRAX
+# ifdef USE_AS_WCSRCHR
+	leaq	(%rdi, %rax, CHAR_SIZE), %rax
+# else
+	addq	%rdi, %rax
+# endif
+L(ret0):
+	ret
+
+	/* Returns for first vec x1/x2/x3 have hard coded backward
+	   search path for earlier matches.  */
+	.p2align 4,, 6
+L(first_vec_x1):
+	VPCMPEQ	%VMATCH, %VMM(2), %k1
+	KMOV	%k1, %VRAX
+	blsmsk	%VRCX, %VRCX
+	/* eax non-zero if search CHAR in range.  */
+	and	%VRCX, %VRAX
+	jnz	L(first_vec_x1_return)
+
+	/* fallthrough: no match in VEC(2) then need to check for
+	   earlier matches (in VEC(1)).  */
+	.p2align 4,, 4
+L(first_vec_x0_test):
+	VPCMPEQ	%VMATCH, %VMM(1), %k1
+	KMOV	%k1, %VRAX
+	test	%VRAX, %VRAX
+	jz	L(ret1)
+	bsr	%VRAX, %VRAX
+# ifdef USE_AS_WCSRCHR
+	leaq	(%rsi, %rax, CHAR_SIZE), %rax
+# else
+	addq	%rsi, %rax
+# endif
+L(ret1):
+	ret
+
+	.p2align 4,, 10
+L(first_vec_x1_or_x2):
+	VPCMPEQ	%VMM(3), %VMATCH, %k3
+	VPCMPEQ	%VMM(2), %VMATCH, %k2
+	/* K2 and K3 have 1 for any search CHAR match. Test if any
+	   matches between either of them. Otherwise check VEC(1).  */
+	KORTEST %k2, %k3
+	jz	L(first_vec_x0_test)
+
+	/* Guaranteed that VEC(2) and VEC(3) are within range so merge
+	   the two bitmasks then get last result.  */
+	kunpck_2x %k2, %k3, %k3
+	kmov_2x	%k3, %maskm_2x
+	bsr	%maskm_2x, %maskm_2x
+	leaq	(VEC_SIZE * 1)(%r8, %rax, CHAR_SIZE), %rax
+	ret
+
+	.p2align 4,, 7
+L(first_vec_x3):
+	VPCMPEQ	%VMATCH, %VMM(4), %k1
+	KMOV	%k1, %VRAX
+	blsmsk	%VRCX, %VRCX
+	/* If no search CHAR match in range check VEC(1)/VEC(2)/VEC(3).
+	 */
+	and	%VRCX, %VRAX
+	jz	L(first_vec_x1_or_x2)
+	bsr	%VRAX, %VRAX
+	leaq	(VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
+	ret
+
+
+	.p2align 4,, 6
+L(first_vec_x0_x1_test):
+	VPCMPEQ	%VMATCH, %VMM(2), %k1
+	KMOV	%k1, %VRAX
+	/* Check VEC(2) for last match first. If no match try VEC(1).
+	 */
+	test	%VRAX, %VRAX
+	jz	L(first_vec_x0_test)
+	.p2align 4,, 4
+L(first_vec_x1_return):
+	bsr	%VRAX, %VRAX
+	leaq	(VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax
+	ret
+
+
+	.p2align 4,, 10
+L(first_vec_x2):
+	VPCMPEQ	%VMATCH, %VMM(3), %k1
+	KMOV	%k1, %VRAX
+	blsmsk	%VRCX, %VRCX
+	/* Check VEC(3) for last match first. If no match try
+	   VEC(2)/VEC(1).  */
+	and	%VRCX, %VRAX
+	jz	L(first_vec_x0_x1_test)
+	bsr	%VRAX, %VRAX
+	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+	ret
+
+
+	.p2align 4,, 12
+L(aligned_more):
+	/* Need to keep original pointer in case VEC(1) has last match.
+	 */
+	movq	%rdi, %rsi
+	andq	$-VEC_SIZE, %rdi
+
+	VMOVU	VEC_SIZE(%rdi), %VMM(2)
+	VPTESTN	%VMM(2), %VMM(2), %k0
+	KMOV	%k0, %VRCX
+
+	test	%VRCX, %VRCX
+	jnz	L(first_vec_x1)
+
+	VMOVU	(VEC_SIZE * 2)(%rdi), %VMM(3)
+	VPTESTN	%VMM(3), %VMM(3), %k0
+	KMOV	%k0, %VRCX
+
+	test	%VRCX, %VRCX
+	jnz	L(first_vec_x2)
+
+	VMOVU	(VEC_SIZE * 3)(%rdi), %VMM(4)
+	VPTESTN	%VMM(4), %VMM(4), %k0
+	KMOV	%k0, %VRCX
+	movq	%rdi, %r8
+	test	%VRCX, %VRCX
+	jnz	L(first_vec_x3)
+
+	andq	$-(VEC_SIZE * 2), %rdi
+	.p2align 4,, 10
+L(first_aligned_loop):
+	/* Preserve VEC(1), VEC(2), VEC(3), and VEC(4) until we can
+	   guarantee they don't store a match.  */
+	VMOVA	(VEC_SIZE * 4)(%rdi), %VMM(5)
+	VMOVA	(VEC_SIZE * 5)(%rdi), %VMM(6)
+
+	VPCMPEQ	%VMM(5), %VMATCH, %k2
+	vpxord	%VMM(6), %VMATCH, %VMM(7)
+
+	VPMIN	%VMM(5), %VMM(6), %VMM(8)
+	VPMIN	%VMM(8), %VMM(7), %VMM(7)
+
+	VPTESTN	%VMM(7), %VMM(7), %k1
+	subq	$(VEC_SIZE * -2), %rdi
+	KORTEST %k1, %k2
+	jz	L(first_aligned_loop)
+
+	VPCMPEQ	%VMM(6), %VMATCH, %k3
+	VPTESTN	%VMM(8), %VMM(8), %k1
+
+	/* If k1 is zero, then we found a CHAR match but no null-term.
+	   We can now safely throw out VEC1-4.  */
+	KTEST	%k1, %k1
+	jz	L(second_aligned_loop_prep)
+
+	KORTEST %k2, %k3
+	jnz	L(return_first_aligned_loop)
+
+
+	.p2align 4,, 6
+L(first_vec_x1_or_x2_or_x3):
+	VPCMPEQ	%VMM(4), %VMATCH, %k4
+	KMOV	%k4, %VRAX
+	bsr	%VRAX, %VRAX
+	jz	L(first_vec_x1_or_x2)
+	leaq	(VEC_SIZE * 3)(%r8, %rax, CHAR_SIZE), %rax
+	ret
+
+
+	.p2align 4,, 8
+L(return_first_aligned_loop):
+	VPTESTN	%VMM(5), %VMM(5), %k0
+
+	/* Combined results from VEC5/6.  */
+	kunpck_2x %k0, %k1, %k0
+	kmov_2x	%k0, %maskz_2x
+
+	blsmsk	%maskz_2x, %maskz_2x
+	kunpck_2x %k2, %k3, %k3
+	kmov_2x	%k3, %maskm_2x
+	and	%maskz_2x, %maskm_2x
+	jz	L(first_vec_x1_or_x2_or_x3)
+
+	bsr	%maskm_2x, %maskm_2x
+	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+	ret
+
+	.p2align 4
+	/* We can throw away the work done for the first 4x checks here
+	   as we have a later match. This is the 'fast' path persay.
+	 */
+L(second_aligned_loop_prep):
+L(second_aligned_loop_set_furthest_match):
+	movq	%rdi, %rsi
+	/* Ideally we would safe k2/k3 but `kmov/kunpck` take uops on
+	   port0 and have noticeable overhead in the loop.  */
+	VMOVA	%VMM(5), %VMM(7)
+	VMOVA	%VMM(6), %VMM(8)
+	.p2align 4
+L(second_aligned_loop):
+	VMOVU	(VEC_SIZE * 4)(%rdi), %VMM(5)
+	VMOVU	(VEC_SIZE * 5)(%rdi), %VMM(6)
+	VPCMPEQ	%VMM(5), %VMATCH, %k2
+	vpxord	%VMM(6), %VMATCH, %VMM(3)
+
+	VPMIN	%VMM(5), %VMM(6), %VMM(4)
+	VPMIN	%VMM(3), %VMM(4), %VMM(3)
+
+	VPTESTN	%VMM(3), %VMM(3), %k1
+	subq	$(VEC_SIZE * -2), %rdi
+	KORTEST %k1, %k2
+	jz	L(second_aligned_loop)
+	VPCMPEQ	%VMM(6), %VMATCH, %k3
+	VPTESTN	%VMM(4), %VMM(4), %k1
+	KTEST	%k1, %k1
+	jz	L(second_aligned_loop_set_furthest_match)
+
+	/* branch here because we know we have a match in VEC7/8 but
+	   might not in VEC5/6 so the latter is expected to be less
+	   likely.  */
+	KORTEST %k2, %k3
+	jnz	L(return_new_match)
+
+L(return_old_match):
+	VPCMPEQ	%VMM(8), %VMATCH, %k0
+	KMOV	%k0, %VRCX
+	bsr	%VRCX, %VRCX
+	jnz	L(return_old_match_ret)
+
+	VPCMPEQ	%VMM(7), %VMATCH, %k0
+	KMOV	%k0, %VRCX
+	bsr	%VRCX, %VRCX
+	subq	$VEC_SIZE, %rsi
+L(return_old_match_ret):
+	leaq	(VEC_SIZE * 3)(%rsi, %rcx, CHAR_SIZE), %rax
+	ret
+
+	.p2align 4,, 10
+L(return_new_match):
+	VPTESTN	%VMM(5), %VMM(5), %k0
+
+	/* Combined results from VEC5/6.  */
+	kunpck_2x %k0, %k1, %k0
+	kmov_2x	%k0, %maskz_2x
+
+	blsmsk	%maskz_2x, %maskz_2x
+	kunpck_2x %k2, %k3, %k3
+	kmov_2x	%k3, %maskm_2x
+
+	/* Match at end was out-of-bounds so use last known match.  */
+	and	%maskz_2x, %maskm_2x
+	jz	L(return_old_match)
+
+	bsr	%maskm_2x, %maskm_2x
+	leaq	(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+	ret
+
+L(cross_page_boundary):
+	/* eax contains all the page offset bits of src (rdi). `xor rdi,
+	   rax` sets pointer will all page offset bits cleared so
+	   offset of (PAGE_SIZE - VEC_SIZE) will get last aligned VEC
+	   before page cross (guaranteed to be safe to read). Doing this
+	   as opposed to `movq %rdi, %rax; andq $-VEC_SIZE, %rax` saves
+	   a bit of code size.  */
+	xorq	%rdi, %rax
+	VMOVU	(PAGE_SIZE - VEC_SIZE)(%rax), %VMM(1)
+	VPTESTN	%VMM(1), %VMM(1), %k0
+	KMOV	%k0, %VRCX
+
+	/* Shift out zero CHAR matches that are before the beginning of
+	   src (rdi).  */
+# ifdef USE_AS_WCSRCHR
+	movl	%edi, %esi
+	andl	$(VEC_SIZE - 1), %esi
+	shrl	$2, %esi
+# endif
+	shrx	%VGPR(SHIFT_REG), %VRCX, %VRCX
+
+	test	%VRCX, %VRCX
+	jz	L(page_cross_continue)
+ 
+-#include "strrchr-evex-base.S"
+	/* Found zero CHAR so need to test for search CHAR.  */
+	VPCMP	$0, %VMATCH, %VMM(1), %k1
+	KMOV	%k1, %VRAX
+	/* Shift out search CHAR matches that are before the beginning of
+	   src (rdi).  */
+	shrx	%VGPR(SHIFT_REG), %VRAX, %VRAX
+
+	/* Check if any search CHAR match in range.  */
+	blsmsk	%VRCX, %VRCX
+	and	%VRCX, %VRAX
+	jz	L(ret3)
+	bsr	%VRAX, %VRAX
+# ifdef USE_AS_WCSRCHR
+	leaq	(%rdi, %rax, CHAR_SIZE), %rax
+# else
+	addq	%rdi, %rax
+# endif
+L(ret3):
+	ret
+END(STRRCHR)
+#endif
+diff --git a/sysdeps/x86_64/multiarch/wcsrchr-evex.S b/sysdeps/x86_64/multiarch/wcsrchr-evex.S
+index a584cd3f430ba9d5..e5c5fe3bf28a5966 100644
+--- a/sysdeps/x86_64/multiarch/wcsrchr-evex.S
+++ b/sysdeps/x86_64/multiarch/wcsrchr-evex.S
+@@ -4,5 +4,4 @@
+ 
+ #define STRRCHR	WCSRCHR
+ #define USE_AS_WCSRCHR 1
+-#define USE_WIDE_CHAR 1
+ #include "strrchr-evex.S"
--- a/glibc.spec
+++ b/glibc.spec
@ -1,4 +1,4 @@
-%global glibcsrcdir glibc-2.38.9000-170-g69239bd7a2
+%global glibcsrcdir glibc-2.38.9000-180-gdd32e1db38
 %global glibcversion 2.38.9000
 # Pre-release tarballs are pulled in from git using a command that is
 # effectively:
@ -159,7 +159,7 @@ Version: %{glibcversion}
 # - It allows using the Release number without the %%dist tag in the dependency
 #   generator to make the generated requires interchangeable between Rawhide
 #   and ELN (.elnYY < .fcXX).
-%global baserelease 13
+%global baserelease 14
 Release: %{baserelease}%{?dist}

 # In general, GPLv2+ is used by programs, LGPLv2+ is used for
@ -230,6 +230,7 @@ Patch9: glibc-rh827510.patch
 Patch13: glibc-fedora-localedata-rh61908.patch
 Patch17: glibc-cs-path.patch
 Patch23: glibc-python3.patch
+Patch24: glibc-rh2244688.patch

 ##############################################################################
 # Continued list of core "glibc" package information:
@ -2198,6 +2199,21 @@ update_gconv_modules_cache ()
 %files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared

 %changelog
+* Wed Oct 18 2023 Florian Weimer <fweimer@redhat.com> - 2.38.9000-14
+- Revert "x86: Prepare `strrchr-evex` and `strrchr-evex512` for AVX10" (#2244688)
+- Auto-sync with upstream branch master,
+  commit dd32e1db386c77c61850a7cbd0c126b7b3c63ece:
+- Revert "elf: Always call destructors in reverse constructor order (bug 30785)"
+- Revert "elf: Fix compile error with -DNDEBUG [BZ #18755]"
+- Add strlcat/wcslcat testcase.
+- Add strlcpy/wcslcpy testcase
+- Add LE DSCP code point from RFC-8622.
+- Add HWCAP2_MOPS from Linux 6.5 to AArch64 bits/hwcap.h
+- Add SCM_SECURITY, SCM_PIDFD to bits/socket.h
+- Add AT_HANDLE_FID from Linux 6.5 to bits/fcntl-linux.h
+- Avoid maybe-uninitialized warning in __kernel_rem_pio2
+- Fix WAIT_FOR_DEBUGGER for container tests.
+
 * Thu Oct 12 2023 Florian Weimer <fweimer@redhat.com> - 2.38.9000-13
 - Drop glibc-disable-werror-tst-realloc.patch, GCC was fixed long ago.
 - Auto-sync with upstream branch master,
--- a/2
+++ b/2
@ -1 +1 @@
-SHA512 (glibc-2.38.9000-170-g69239bd7a2.tar.xz) = 4dfb7ca7631d77effa335e075e6648e6dccb7edeba5f5a32d26fd0b2e6e84809c3c79508bdfb38a04b0c85803dc7bcf3aed3aea0e93b6ddb6d5072e4e104eec3
+SHA512 (glibc-2.38.9000-180-gdd32e1db38.tar.xz) = de61ce6e63bd4d424076ad5b902014b67ba5ba1123e14a6cbe9fe0b6c486cf96d7bde62f09ab98c477735b0397356828b95d0aa8caa90dbb4fd234cbf63a6cb3