From 2e44c0f9f3d371c7bf44b6dcc1b609fe7a4c0cc2 Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Wed, 18 Oct 2023 11:41:45 +0200 Subject: [PATCH] Auto-sync with upstream branch master Upstream commit: dd32e1db386c77c61850a7cbd0c126b7b3c63ece - Revert "x86: Prepare `strrchr-evex` and `strrchr-evex512` for AVX10" (#2244688) - Revert "elf: Always call destructors in reverse constructor order (bug 30785)" - Revert "elf: Fix compile error with -DNDEBUG [BZ #18755]" - Add strlcat/wcslcat testcase. - Add strlcpy/wcslcpy testcase - Add LE DSCP code point from RFC-8622. - Add HWCAP2_MOPS from Linux 6.5 to AArch64 bits/hwcap.h - Add SCM_SECURITY, SCM_PIDFD to bits/socket.h - Add AT_HANDLE_FID from Linux 6.5 to bits/fcntl-linux.h - Avoid maybe-uninitialized warning in __kernel_rem_pio2 - Fix WAIT_FOR_DEBUGGER for container tests. --- glibc-rh2244688.patch | 967 ++++++++++++++++++++++++++++++++++++++++++ glibc.spec | 20 +- sources | 2 +- 3 files changed, 986 insertions(+), 3 deletions(-) create mode 100644 glibc-rh2244688.patch diff --git a/glibc-rh2244688.patch b/glibc-rh2244688.patch new file mode 100644 index 0000000..e2c5f37 --- /dev/null +++ b/glibc-rh2244688.patch @@ -0,0 +1,967 @@ +Author: Florian Weimer +Date: Wed Oct 18 11:12:29 2023 +0200 + + Revert "x86: Prepare `strrchr-evex` and `strrchr-evex512` for AVX10" + + This reverts commit a3c50bf46a1ca6d9d2b7d879176d345abf95a9de. + +diff --git a/sysdeps/x86_64/multiarch/strrchr-evex-base.S b/sysdeps/x86_64/multiarch/strrchr-evex-base.S +index cd6a0a870a02b9bd..58b2853ab69265e8 100644 +--- a/sysdeps/x86_64/multiarch/strrchr-evex-base.S ++++ b/sysdeps/x86_64/multiarch/strrchr-evex-base.S +@@ -1,4 +1,4 @@ +-/* Implementation for strrchr using evex256 and evex512. ++/* Placeholder function, not used by any processor at the moment. + Copyright (C) 2022-2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + +@@ -16,6 +16,8 @@ + License along with the GNU C Library; if not, see + . */ + ++/* UNUSED. Exists purely as reference implementation. */ ++ + #include + + #if ISA_SHOULD_BUILD (4) +@@ -23,351 +25,240 @@ + # include + + # ifdef USE_AS_WCSRCHR +-# if VEC_SIZE == 64 +-# define RCX_M cx +-# define KORTEST_M kortestw +-# else +-# define RCX_M cl +-# define KORTEST_M kortestb +-# endif +- +-# define SHIFT_REG VRCX + # define CHAR_SIZE 4 +-# define VPCMP vpcmpd +-# define VPMIN vpminud +-# define VPCOMPRESS vpcompressd +-# define VPTESTN vptestnmd +-# define VPTEST vptestmd +-# define VPBROADCAST vpbroadcastd ++# define VPBROADCAST vpbroadcastd + # define VPCMPEQ vpcmpeqd +- ++# define VPMINU vpminud ++# define VPTESTN vptestnmd + # else +-# define SHIFT_REG VRDI + # define CHAR_SIZE 1 +-# define VPCMP vpcmpb +-# define VPMIN vpminub +-# define VPCOMPRESS vpcompressb +-# define VPTESTN vptestnmb +-# define VPTEST vptestmb +-# define VPBROADCAST vpbroadcastb ++# define VPBROADCAST vpbroadcastb + # define VPCMPEQ vpcmpeqb +- +-# define RCX_M VRCX +-# define KORTEST_M KORTEST ++# define VPMINU vpminub ++# define VPTESTN vptestnmb + # endif + +-# define VMATCH VMM(0) +-# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + # define PAGE_SIZE 4096 ++# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + + .section SECTION(.text), "ax", @progbits +- /* Aligning entry point to 64 byte, provides better performance for +- one vector length string. */ +-ENTRY_P2ALIGN(STRRCHR, 6) +- movl %edi, %eax +- /* Broadcast CHAR to VMATCH. */ +- VPBROADCAST %esi, %VMATCH ++/* Aligning entry point to 64 byte, provides better performance for ++ one vector length string. */ ++ENTRY_P2ALIGN (STRRCHR, 6) + +- andl $(PAGE_SIZE - 1), %eax +- cmpl $(PAGE_SIZE - VEC_SIZE), %eax +- jg L(cross_page_boundary) ++ /* Broadcast CHAR to VMM(0). */ ++ VPBROADCAST %esi, %VMM(0) ++ movl %edi, %eax ++ sall $20, %eax ++ cmpl $((PAGE_SIZE - VEC_SIZE) << 20), %eax ++ ja L(page_cross) + ++L(page_cross_continue): ++ /* Compare [w]char for null, mask bit will be set for match. */ + VMOVU (%rdi), %VMM(1) +- /* k0 has a 1 for each zero CHAR in YMM1. */ +- VPTESTN %VMM(1), %VMM(1), %k0 +- KMOV %k0, %VGPR(rsi) +- test %VGPR(rsi), %VGPR(rsi) +- jz L(aligned_more) +- /* fallthrough: zero CHAR in first VEC. */ +-L(page_cross_return): +- /* K1 has a 1 for each search CHAR match in VEC(1). */ +- VPCMPEQ %VMATCH, %VMM(1), %k1 +- KMOV %k1, %VGPR(rax) +- /* Build mask up until first zero CHAR (used to mask of +- potential search CHAR matches past the end of the string). */ +- blsmsk %VGPR(rsi), %VGPR(rsi) +- /* Use `and` here to remove any out of bounds matches so we can +- do a reverse scan on `rax` to find the last match. */ +- and %VGPR(rsi), %VGPR(rax) +- jz L(ret0) +- /* Get last match. */ +- bsr %VGPR(rax), %VGPR(rax) ++ ++ VPTESTN %VMM(1), %VMM(1), %k1 ++ KMOV %k1, %VRCX ++ test %VRCX, %VRCX ++ jz L(align_more) ++ ++ VPCMPEQ %VMM(1), %VMM(0), %k0 ++ KMOV %k0, %VRAX ++ BLSMSK %VRCX, %VRCX ++ and %VRCX, %VRAX ++ jz L(ret) ++ ++ BSR %VRAX, %VRAX + # ifdef USE_AS_WCSRCHR + leaq (%rdi, %rax, CHAR_SIZE), %rax + # else +- addq %rdi, %rax ++ add %rdi, %rax + # endif +-L(ret0): ++L(ret): + ret + +- /* Returns for first vec x1/x2/x3 have hard coded backward +- search path for earlier matches. */ +- .p2align 4,, 6 +-L(first_vec_x1): +- VPCMPEQ %VMATCH, %VMM(2), %k1 +- KMOV %k1, %VGPR(rax) +- blsmsk %VGPR(rcx), %VGPR(rcx) +- /* eax non-zero if search CHAR in range. */ +- and %VGPR(rcx), %VGPR(rax) +- jnz L(first_vec_x1_return) +- +- /* fallthrough: no match in YMM2 then need to check for earlier +- matches (in YMM1). */ +- .p2align 4,, 4 +-L(first_vec_x0_test): +- VPCMPEQ %VMATCH, %VMM(1), %k1 +- KMOV %k1, %VGPR(rax) +- test %VGPR(rax), %VGPR(rax) +- jz L(ret1) +- bsr %VGPR(rax), %VGPR(rax) ++L(vector_x2_end): ++ VPCMPEQ %VMM(2), %VMM(0), %k2 ++ KMOV %k2, %VRAX ++ BLSMSK %VRCX, %VRCX ++ and %VRCX, %VRAX ++ jz L(vector_x1_ret) ++ ++ BSR %VRAX, %VRAX ++ leaq (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++ /* Check the first vector at very last to look for match. */ ++L(vector_x1_ret): ++ VPCMPEQ %VMM(1), %VMM(0), %k2 ++ KMOV %k2, %VRAX ++ test %VRAX, %VRAX ++ jz L(ret) ++ ++ BSR %VRAX, %VRAX + # ifdef USE_AS_WCSRCHR + leaq (%rsi, %rax, CHAR_SIZE), %rax + # else +- addq %rsi, %rax ++ add %rsi, %rax + # endif +-L(ret1): + ret + +- .p2align 4,, 10 +-L(first_vec_x3): +- VPCMPEQ %VMATCH, %VMM(4), %k1 +- KMOV %k1, %VGPR(rax) +- blsmsk %VGPR(rcx), %VGPR(rcx) +- /* If no search CHAR match in range check YMM1/YMM2/YMM3. */ +- and %VGPR(rcx), %VGPR(rax) +- jz L(first_vec_x1_or_x2) +- bsr %VGPR(rax), %VGPR(rax) +- leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax +- ret +- .p2align 4,, 4 +- +-L(first_vec_x2): +- VPCMPEQ %VMATCH, %VMM(3), %k1 +- KMOV %k1, %VGPR(rax) +- blsmsk %VGPR(rcx), %VGPR(rcx) +- /* Check YMM3 for last match first. If no match try YMM2/YMM1. */ +- and %VGPR(rcx), %VGPR(rax) +- jz L(first_vec_x0_x1_test) +- bsr %VGPR(rax), %VGPR(rax) +- leaq (VEC_SIZE * 2)(%r8, %rax, CHAR_SIZE), %rax +- ret +- +- .p2align 4,, 6 +-L(first_vec_x0_x1_test): +- VPCMPEQ %VMATCH, %VMM(2), %k1 +- KMOV %k1, %VGPR(rax) +- /* Check YMM2 for last match first. If no match try YMM1. */ +- test %VGPR(rax), %VGPR(rax) +- jz L(first_vec_x0_test) +- .p2align 4,, 4 +-L(first_vec_x1_return): +- bsr %VGPR(rax), %VGPR(rax) +- leaq (VEC_SIZE)(%r8, %rax, CHAR_SIZE), %rax +- ret +- +- .p2align 4,, 12 +-L(aligned_more): +-L(page_cross_continue): +- /* Need to keep original pointer incase VEC(1) has last match. */ ++L(align_more): ++ /* Zero r8 to store match result. */ ++ xorl %r8d, %r8d ++ /* Save pointer of first vector, in case if no match found. */ + movq %rdi, %rsi ++ /* Align pointer to vector size. */ + andq $-VEC_SIZE, %rdi +- +- VMOVU VEC_SIZE(%rdi), %VMM(2) ++ /* Loop unroll for 2 vector loop. */ ++ VMOVA (VEC_SIZE)(%rdi), %VMM(2) + VPTESTN %VMM(2), %VMM(2), %k0 + KMOV %k0, %VRCX +- movq %rdi, %r8 + test %VRCX, %VRCX +- jnz L(first_vec_x1) +- +- VMOVU (VEC_SIZE * 2)(%rdi), %VMM(3) +- VPTESTN %VMM(3), %VMM(3), %k0 +- KMOV %k0, %VRCX +- +- test %VRCX, %VRCX +- jnz L(first_vec_x2) +- +- VMOVU (VEC_SIZE * 3)(%rdi), %VMM(4) +- VPTESTN %VMM(4), %VMM(4), %k0 +- KMOV %k0, %VRCX +- +- /* Intentionally use 64-bit here. EVEX256 version needs 1-byte +- padding for efficient nop before loop alignment. */ +- test %rcx, %rcx +- jnz L(first_vec_x3) ++ jnz L(vector_x2_end) + ++ /* Save pointer of second vector, in case if no match ++ found. */ ++ movq %rdi, %r9 ++ /* Align address to VEC_SIZE * 2 for loop. */ + andq $-(VEC_SIZE * 2), %rdi +- .p2align 4 +-L(first_aligned_loop): +- /* Preserve VEC(1), VEC(2), VEC(3), and VEC(4) until we can +- gurantee they don't store a match. */ +- VMOVA (VEC_SIZE * 4)(%rdi), %VMM(5) +- VMOVA (VEC_SIZE * 5)(%rdi), %VMM(6) +- +- VPCMP $4, %VMM(5), %VMATCH, %k2 +- VPCMP $4, %VMM(6), %VMATCH, %k3{%k2} +- +- VPMIN %VMM(5), %VMM(6), %VMM(7) + +- VPTEST %VMM(7), %VMM(7), %k1{%k3} +- subq $(VEC_SIZE * -2), %rdi +- KORTEST_M %k1, %k1 +- jc L(first_aligned_loop) ++ .p2align 4,,11 ++L(loop): ++ /* 2 vector loop, as it provide better performance as compared ++ to 4 vector loop. */ ++ VMOVA (VEC_SIZE * 2)(%rdi), %VMM(3) ++ VMOVA (VEC_SIZE * 3)(%rdi), %VMM(4) ++ VPCMPEQ %VMM(3), %VMM(0), %k1 ++ VPCMPEQ %VMM(4), %VMM(0), %k2 ++ VPMINU %VMM(3), %VMM(4), %VMM(5) ++ VPTESTN %VMM(5), %VMM(5), %k0 ++ KOR %k1, %k2, %k3 ++ subq $-(VEC_SIZE * 2), %rdi ++ /* If k0 and k3 zero, match and end of string not found. */ ++ KORTEST %k0, %k3 ++ jz L(loop) ++ ++ /* If k0 is non zero, end of string found. */ ++ KORTEST %k0, %k0 ++ jnz L(endloop) ++ ++ lea VEC_SIZE(%rdi), %r8 ++ /* A match found, it need to be stored in r8 before loop ++ continue. */ ++ /* Check second vector first. */ ++ KMOV %k2, %VRDX ++ test %VRDX, %VRDX ++ jnz L(loop_vec_x2_match) + +- VPTESTN %VMM(7), %VMM(7), %k1 + KMOV %k1, %VRDX +- test %VRDX, %VRDX +- jz L(second_aligned_loop_prep) ++ /* Match is in first vector, rdi offset need to be subtracted ++ by VEC_SIZE. */ ++ sub $VEC_SIZE, %r8 ++ ++ /* If second vector doesn't have match, first vector must ++ have match. */ ++L(loop_vec_x2_match): ++ BSR %VRDX, %VRDX ++# ifdef USE_AS_WCSRCHR ++ sal $2, %rdx ++# endif ++ add %rdx, %r8 ++ jmp L(loop) + +- KORTEST_M %k3, %k3 +- jnc L(return_first_aligned_loop) ++L(endloop): ++ /* Check if string end in first loop vector. */ ++ VPTESTN %VMM(3), %VMM(3), %k0 ++ KMOV %k0, %VRCX ++ test %VRCX, %VRCX ++ jnz L(loop_vector_x1_end) + +- .p2align 4,, 6 +-L(first_vec_x1_or_x2_or_x3): +- VPCMPEQ %VMM(4), %VMATCH, %k4 +- KMOV %k4, %VRAX ++ /* Check if it has match in first loop vector. */ ++ KMOV %k1, %VRAX + test %VRAX, %VRAX +- jz L(first_vec_x1_or_x2) +- bsr %VRAX, %VRAX +- leaq (VEC_SIZE * 3)(%r8, %rax, CHAR_SIZE), %rax +- ret ++ jz L(loop_vector_x2_end) + +- .p2align 4,, 8 +-L(return_first_aligned_loop): +- VPTESTN %VMM(5), %VMM(5), %k0 ++ BSR %VRAX, %VRAX ++ leaq (%rdi, %rax, CHAR_SIZE), %r8 ++ ++ /* String must end in second loop vector. */ ++L(loop_vector_x2_end): ++ VPTESTN %VMM(4), %VMM(4), %k0 + KMOV %k0, %VRCX +- blsmsk %VRCX, %VRCX +- jnc L(return_first_new_match_first) +- blsmsk %VRDX, %VRDX +- VPCMPEQ %VMM(6), %VMATCH, %k0 +- KMOV %k0, %VRAX +- addq $VEC_SIZE, %rdi +- and %VRDX, %VRAX +- jnz L(return_first_new_match_ret) +- subq $VEC_SIZE, %rdi +-L(return_first_new_match_first): + KMOV %k2, %VRAX +-# ifdef USE_AS_WCSRCHR +- xorl $((1 << CHAR_PER_VEC)- 1), %VRAX ++ BLSMSK %VRCX, %VRCX ++ /* Check if it has match in second loop vector. */ + and %VRCX, %VRAX +-# else +- andn %VRCX, %VRAX, %VRAX +-# endif +- jz L(first_vec_x1_or_x2_or_x3) +-L(return_first_new_match_ret): +- bsr %VRAX, %VRAX +- leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax +- ret ++ jz L(check_last_match) + +- .p2align 4,, 10 +-L(first_vec_x1_or_x2): +- VPCMPEQ %VMM(3), %VMATCH, %k3 +- KMOV %k3, %VRAX +- test %VRAX, %VRAX +- jz L(first_vec_x0_x1_test) +- bsr %VRAX, %VRAX +- leaq (VEC_SIZE * 2)(%r8, %rax, CHAR_SIZE), %rax ++ BSR %VRAX, %VRAX ++ leaq (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax + ret + +- .p2align 4 +- /* We can throw away the work done for the first 4x checks here +- as we have a later match. This is the 'fast' path persay. */ +-L(second_aligned_loop_prep): +-L(second_aligned_loop_set_furthest_match): +- movq %rdi, %rsi +- VMOVA %VMM(5), %VMM(7) +- VMOVA %VMM(6), %VMM(8) +- .p2align 4 +-L(second_aligned_loop): +- VMOVU (VEC_SIZE * 4)(%rdi), %VMM(5) +- VMOVU (VEC_SIZE * 5)(%rdi), %VMM(6) +- VPCMP $4, %VMM(5), %VMATCH, %k2 +- VPCMP $4, %VMM(6), %VMATCH, %k3{%k2} +- +- VPMIN %VMM(5), %VMM(6), %VMM(4) +- +- VPTEST %VMM(4), %VMM(4), %k1{%k3} +- subq $(VEC_SIZE * -2), %rdi +- KMOV %k1, %VRCX +- inc %RCX_M +- jz L(second_aligned_loop) +- VPTESTN %VMM(4), %VMM(4), %k1 +- KMOV %k1, %VRDX +- test %VRDX, %VRDX +- jz L(second_aligned_loop_set_furthest_match) +- +- KORTEST_M %k3, %k3 +- jnc L(return_new_match) +- /* branch here because there is a significant advantage interms +- of output dependency chance in using edx. */ ++ /* String end in first loop vector. */ ++L(loop_vector_x1_end): ++ KMOV %k1, %VRAX ++ BLSMSK %VRCX, %VRCX ++ /* Check if it has match in second loop vector. */ ++ and %VRCX, %VRAX ++ jz L(check_last_match) + +-L(return_old_match): +- VPCMPEQ %VMM(8), %VMATCH, %k0 +- KMOV %k0, %VRCX +- bsr %VRCX, %VRCX +- jnz L(return_old_match_ret) ++ BSR %VRAX, %VRAX ++ leaq (%rdi, %rax, CHAR_SIZE), %rax ++ ret + +- VPCMPEQ %VMM(7), %VMATCH, %k0 +- KMOV %k0, %VRCX +- bsr %VRCX, %VRCX +- subq $VEC_SIZE, %rsi +-L(return_old_match_ret): +- leaq (VEC_SIZE * 3)(%rsi, %rcx, CHAR_SIZE), %rax ++ /* No match in first and second loop vector. */ ++L(check_last_match): ++ /* Check if any match recorded in r8. */ ++ test %r8, %r8 ++ jz L(vector_x2_ret) ++ movq %r8, %rax + ret + +-L(return_new_match): +- VPTESTN %VMM(5), %VMM(5), %k0 +- KMOV %k0, %VRCX +- blsmsk %VRCX, %VRCX +- jnc L(return_new_match_first) +- dec %VRDX +- VPCMPEQ %VMM(6), %VMATCH, %k0 +- KMOV %k0, %VRAX +- addq $VEC_SIZE, %rdi +- and %VRDX, %VRAX +- jnz L(return_new_match_ret) +- subq $VEC_SIZE, %rdi +-L(return_new_match_first): ++ /* No match recorded in r8. Check the second saved vector ++ in beginning. */ ++L(vector_x2_ret): ++ VPCMPEQ %VMM(2), %VMM(0), %k2 + KMOV %k2, %VRAX +-# ifdef USE_AS_WCSRCHR +- xorl $((1 << CHAR_PER_VEC)- 1), %VRAX +- and %VRCX, %VRAX +-# else +- andn %VRCX, %VRAX, %VRAX +-# endif +- jz L(return_old_match) +-L(return_new_match_ret): +- bsr %VRAX, %VRAX +- leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ++ test %VRAX, %VRAX ++ jz L(vector_x1_ret) ++ ++ /* Match found in the second saved vector. */ ++ BSR %VRAX, %VRAX ++ leaq (VEC_SIZE)(%r9, %rax, CHAR_SIZE), %rax + ret + +- .p2align 4,, 4 +-L(cross_page_boundary): +- xorq %rdi, %rax +- mov $-1, %VRDX +- VMOVU (PAGE_SIZE - VEC_SIZE)(%rax), %VMM(6) +- VPTESTN %VMM(6), %VMM(6), %k0 +- KMOV %k0, %VRSI ++L(page_cross): ++ mov %rdi, %rax ++ movl %edi, %ecx + + # ifdef USE_AS_WCSRCHR +- movl %edi, %ecx +- and $(VEC_SIZE - 1), %ecx +- shrl $2, %ecx ++ /* Calculate number of compare result bits to be skipped for ++ wide string alignment adjustment. */ ++ andl $(VEC_SIZE - 1), %ecx ++ sarl $2, %ecx + # endif +- shlx %SHIFT_REG, %VRDX, %VRDX +- ++ /* ecx contains number of w[char] to be skipped as a result ++ of address alignment. */ ++ andq $-VEC_SIZE, %rax ++ VMOVA (%rax), %VMM(1) ++ VPTESTN %VMM(1), %VMM(1), %k1 ++ KMOV %k1, %VRAX ++ SHR %cl, %VRAX ++ jz L(page_cross_continue) ++ VPCMPEQ %VMM(1), %VMM(0), %k0 ++ KMOV %k0, %VRDX ++ SHR %cl, %VRDX ++ BLSMSK %VRAX, %VRAX ++ and %VRDX, %VRAX ++ jz L(ret) ++ BSR %VRAX, %VRAX + # ifdef USE_AS_WCSRCHR +- kmovw %edx, %k1 ++ leaq (%rdi, %rax, CHAR_SIZE), %rax + # else +- KMOV %VRDX, %k1 ++ add %rdi, %rax + # endif + +- VPCOMPRESS %VMM(6), %VMM(1){%k1}{z} +- /* We could technically just jmp back after the vpcompress but +- it doesn't save any 16-byte blocks. */ +- shrx %SHIFT_REG, %VRSI, %VRSI +- test %VRSI, %VRSI +- jnz L(page_cross_return) +- jmp L(page_cross_continue) +- /* 1-byte from cache line. */ +-END(STRRCHR) ++ ret ++END (STRRCHR) + #endif +diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S +index 3bf6a5101422e4d1..85e3b0119f5dc923 100644 +--- a/sysdeps/x86_64/multiarch/strrchr-evex.S ++++ b/sysdeps/x86_64/multiarch/strrchr-evex.S +@@ -1,8 +1,394 @@ ++/* strrchr/wcsrchr optimized with 256-bit EVEX instructions. ++ Copyright (C) 2021-2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#if ISA_SHOULD_BUILD (4) ++ ++# include ++ + # ifndef STRRCHR + # define STRRCHR __strrchr_evex + # endif + +-#include "x86-evex256-vecs.h" +-#include "reg-macros.h" ++# include "x86-evex256-vecs.h" ++ ++# ifdef USE_AS_WCSRCHR ++# define SHIFT_REG rsi ++# define kunpck_2x kunpckbw ++# define kmov_2x kmovd ++# define maskz_2x ecx ++# define maskm_2x eax ++# define CHAR_SIZE 4 ++# define VPMIN vpminud ++# define VPTESTN vptestnmd ++# define VPTEST vptestmd ++# define VPBROADCAST vpbroadcastd ++# define VPCMPEQ vpcmpeqd ++# define VPCMP vpcmpd ++ ++# define USE_WIDE_CHAR ++# else ++# define SHIFT_REG rdi ++# define kunpck_2x kunpckdq ++# define kmov_2x kmovq ++# define maskz_2x rcx ++# define maskm_2x rax ++ ++# define CHAR_SIZE 1 ++# define VPMIN vpminub ++# define VPTESTN vptestnmb ++# define VPTEST vptestmb ++# define VPBROADCAST vpbroadcastb ++# define VPCMPEQ vpcmpeqb ++# define VPCMP vpcmpb ++# endif ++ ++# include "reg-macros.h" ++ ++# define VMATCH VMM(0) ++# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) ++# define PAGE_SIZE 4096 ++ ++ .section SECTION(.text), "ax", @progbits ++ENTRY_P2ALIGN(STRRCHR, 6) ++ movl %edi, %eax ++ /* Broadcast CHAR to VMATCH. */ ++ VPBROADCAST %esi, %VMATCH ++ ++ andl $(PAGE_SIZE - 1), %eax ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ++ jg L(cross_page_boundary) ++L(page_cross_continue): ++ VMOVU (%rdi), %VMM(1) ++ /* k0 has a 1 for each zero CHAR in VEC(1). */ ++ VPTESTN %VMM(1), %VMM(1), %k0 ++ KMOV %k0, %VRSI ++ test %VRSI, %VRSI ++ jz L(aligned_more) ++ /* fallthrough: zero CHAR in first VEC. */ ++ /* K1 has a 1 for each search CHAR match in VEC(1). */ ++ VPCMPEQ %VMATCH, %VMM(1), %k1 ++ KMOV %k1, %VRAX ++ /* Build mask up until first zero CHAR (used to mask of ++ potential search CHAR matches past the end of the string). ++ */ ++ blsmsk %VRSI, %VRSI ++ and %VRSI, %VRAX ++ jz L(ret0) ++ /* Get last match (the `and` removed any out of bounds matches). ++ */ ++ bsr %VRAX, %VRAX ++# ifdef USE_AS_WCSRCHR ++ leaq (%rdi, %rax, CHAR_SIZE), %rax ++# else ++ addq %rdi, %rax ++# endif ++L(ret0): ++ ret ++ ++ /* Returns for first vec x1/x2/x3 have hard coded backward ++ search path for earlier matches. */ ++ .p2align 4,, 6 ++L(first_vec_x1): ++ VPCMPEQ %VMATCH, %VMM(2), %k1 ++ KMOV %k1, %VRAX ++ blsmsk %VRCX, %VRCX ++ /* eax non-zero if search CHAR in range. */ ++ and %VRCX, %VRAX ++ jnz L(first_vec_x1_return) ++ ++ /* fallthrough: no match in VEC(2) then need to check for ++ earlier matches (in VEC(1)). */ ++ .p2align 4,, 4 ++L(first_vec_x0_test): ++ VPCMPEQ %VMATCH, %VMM(1), %k1 ++ KMOV %k1, %VRAX ++ test %VRAX, %VRAX ++ jz L(ret1) ++ bsr %VRAX, %VRAX ++# ifdef USE_AS_WCSRCHR ++ leaq (%rsi, %rax, CHAR_SIZE), %rax ++# else ++ addq %rsi, %rax ++# endif ++L(ret1): ++ ret ++ ++ .p2align 4,, 10 ++L(first_vec_x1_or_x2): ++ VPCMPEQ %VMM(3), %VMATCH, %k3 ++ VPCMPEQ %VMM(2), %VMATCH, %k2 ++ /* K2 and K3 have 1 for any search CHAR match. Test if any ++ matches between either of them. Otherwise check VEC(1). */ ++ KORTEST %k2, %k3 ++ jz L(first_vec_x0_test) ++ ++ /* Guaranteed that VEC(2) and VEC(3) are within range so merge ++ the two bitmasks then get last result. */ ++ kunpck_2x %k2, %k3, %k3 ++ kmov_2x %k3, %maskm_2x ++ bsr %maskm_2x, %maskm_2x ++ leaq (VEC_SIZE * 1)(%r8, %rax, CHAR_SIZE), %rax ++ ret ++ ++ .p2align 4,, 7 ++L(first_vec_x3): ++ VPCMPEQ %VMATCH, %VMM(4), %k1 ++ KMOV %k1, %VRAX ++ blsmsk %VRCX, %VRCX ++ /* If no search CHAR match in range check VEC(1)/VEC(2)/VEC(3). ++ */ ++ and %VRCX, %VRAX ++ jz L(first_vec_x1_or_x2) ++ bsr %VRAX, %VRAX ++ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++ ++ .p2align 4,, 6 ++L(first_vec_x0_x1_test): ++ VPCMPEQ %VMATCH, %VMM(2), %k1 ++ KMOV %k1, %VRAX ++ /* Check VEC(2) for last match first. If no match try VEC(1). ++ */ ++ test %VRAX, %VRAX ++ jz L(first_vec_x0_test) ++ .p2align 4,, 4 ++L(first_vec_x1_return): ++ bsr %VRAX, %VRAX ++ leaq (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++ ++ .p2align 4,, 10 ++L(first_vec_x2): ++ VPCMPEQ %VMATCH, %VMM(3), %k1 ++ KMOV %k1, %VRAX ++ blsmsk %VRCX, %VRCX ++ /* Check VEC(3) for last match first. If no match try ++ VEC(2)/VEC(1). */ ++ and %VRCX, %VRAX ++ jz L(first_vec_x0_x1_test) ++ bsr %VRAX, %VRAX ++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++ ++ .p2align 4,, 12 ++L(aligned_more): ++ /* Need to keep original pointer in case VEC(1) has last match. ++ */ ++ movq %rdi, %rsi ++ andq $-VEC_SIZE, %rdi ++ ++ VMOVU VEC_SIZE(%rdi), %VMM(2) ++ VPTESTN %VMM(2), %VMM(2), %k0 ++ KMOV %k0, %VRCX ++ ++ test %VRCX, %VRCX ++ jnz L(first_vec_x1) ++ ++ VMOVU (VEC_SIZE * 2)(%rdi), %VMM(3) ++ VPTESTN %VMM(3), %VMM(3), %k0 ++ KMOV %k0, %VRCX ++ ++ test %VRCX, %VRCX ++ jnz L(first_vec_x2) ++ ++ VMOVU (VEC_SIZE * 3)(%rdi), %VMM(4) ++ VPTESTN %VMM(4), %VMM(4), %k0 ++ KMOV %k0, %VRCX ++ movq %rdi, %r8 ++ test %VRCX, %VRCX ++ jnz L(first_vec_x3) ++ ++ andq $-(VEC_SIZE * 2), %rdi ++ .p2align 4,, 10 ++L(first_aligned_loop): ++ /* Preserve VEC(1), VEC(2), VEC(3), and VEC(4) until we can ++ guarantee they don't store a match. */ ++ VMOVA (VEC_SIZE * 4)(%rdi), %VMM(5) ++ VMOVA (VEC_SIZE * 5)(%rdi), %VMM(6) ++ ++ VPCMPEQ %VMM(5), %VMATCH, %k2 ++ vpxord %VMM(6), %VMATCH, %VMM(7) ++ ++ VPMIN %VMM(5), %VMM(6), %VMM(8) ++ VPMIN %VMM(8), %VMM(7), %VMM(7) ++ ++ VPTESTN %VMM(7), %VMM(7), %k1 ++ subq $(VEC_SIZE * -2), %rdi ++ KORTEST %k1, %k2 ++ jz L(first_aligned_loop) ++ ++ VPCMPEQ %VMM(6), %VMATCH, %k3 ++ VPTESTN %VMM(8), %VMM(8), %k1 ++ ++ /* If k1 is zero, then we found a CHAR match but no null-term. ++ We can now safely throw out VEC1-4. */ ++ KTEST %k1, %k1 ++ jz L(second_aligned_loop_prep) ++ ++ KORTEST %k2, %k3 ++ jnz L(return_first_aligned_loop) ++ ++ ++ .p2align 4,, 6 ++L(first_vec_x1_or_x2_or_x3): ++ VPCMPEQ %VMM(4), %VMATCH, %k4 ++ KMOV %k4, %VRAX ++ bsr %VRAX, %VRAX ++ jz L(first_vec_x1_or_x2) ++ leaq (VEC_SIZE * 3)(%r8, %rax, CHAR_SIZE), %rax ++ ret ++ ++ ++ .p2align 4,, 8 ++L(return_first_aligned_loop): ++ VPTESTN %VMM(5), %VMM(5), %k0 ++ ++ /* Combined results from VEC5/6. */ ++ kunpck_2x %k0, %k1, %k0 ++ kmov_2x %k0, %maskz_2x ++ ++ blsmsk %maskz_2x, %maskz_2x ++ kunpck_2x %k2, %k3, %k3 ++ kmov_2x %k3, %maskm_2x ++ and %maskz_2x, %maskm_2x ++ jz L(first_vec_x1_or_x2_or_x3) ++ ++ bsr %maskm_2x, %maskm_2x ++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++ .p2align 4 ++ /* We can throw away the work done for the first 4x checks here ++ as we have a later match. This is the 'fast' path persay. ++ */ ++L(second_aligned_loop_prep): ++L(second_aligned_loop_set_furthest_match): ++ movq %rdi, %rsi ++ /* Ideally we would safe k2/k3 but `kmov/kunpck` take uops on ++ port0 and have noticeable overhead in the loop. */ ++ VMOVA %VMM(5), %VMM(7) ++ VMOVA %VMM(6), %VMM(8) ++ .p2align 4 ++L(second_aligned_loop): ++ VMOVU (VEC_SIZE * 4)(%rdi), %VMM(5) ++ VMOVU (VEC_SIZE * 5)(%rdi), %VMM(6) ++ VPCMPEQ %VMM(5), %VMATCH, %k2 ++ vpxord %VMM(6), %VMATCH, %VMM(3) ++ ++ VPMIN %VMM(5), %VMM(6), %VMM(4) ++ VPMIN %VMM(3), %VMM(4), %VMM(3) ++ ++ VPTESTN %VMM(3), %VMM(3), %k1 ++ subq $(VEC_SIZE * -2), %rdi ++ KORTEST %k1, %k2 ++ jz L(second_aligned_loop) ++ VPCMPEQ %VMM(6), %VMATCH, %k3 ++ VPTESTN %VMM(4), %VMM(4), %k1 ++ KTEST %k1, %k1 ++ jz L(second_aligned_loop_set_furthest_match) ++ ++ /* branch here because we know we have a match in VEC7/8 but ++ might not in VEC5/6 so the latter is expected to be less ++ likely. */ ++ KORTEST %k2, %k3 ++ jnz L(return_new_match) ++ ++L(return_old_match): ++ VPCMPEQ %VMM(8), %VMATCH, %k0 ++ KMOV %k0, %VRCX ++ bsr %VRCX, %VRCX ++ jnz L(return_old_match_ret) ++ ++ VPCMPEQ %VMM(7), %VMATCH, %k0 ++ KMOV %k0, %VRCX ++ bsr %VRCX, %VRCX ++ subq $VEC_SIZE, %rsi ++L(return_old_match_ret): ++ leaq (VEC_SIZE * 3)(%rsi, %rcx, CHAR_SIZE), %rax ++ ret ++ ++ .p2align 4,, 10 ++L(return_new_match): ++ VPTESTN %VMM(5), %VMM(5), %k0 ++ ++ /* Combined results from VEC5/6. */ ++ kunpck_2x %k0, %k1, %k0 ++ kmov_2x %k0, %maskz_2x ++ ++ blsmsk %maskz_2x, %maskz_2x ++ kunpck_2x %k2, %k3, %k3 ++ kmov_2x %k3, %maskm_2x ++ ++ /* Match at end was out-of-bounds so use last known match. */ ++ and %maskz_2x, %maskm_2x ++ jz L(return_old_match) ++ ++ bsr %maskm_2x, %maskm_2x ++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++L(cross_page_boundary): ++ /* eax contains all the page offset bits of src (rdi). `xor rdi, ++ rax` sets pointer will all page offset bits cleared so ++ offset of (PAGE_SIZE - VEC_SIZE) will get last aligned VEC ++ before page cross (guaranteed to be safe to read). Doing this ++ as opposed to `movq %rdi, %rax; andq $-VEC_SIZE, %rax` saves ++ a bit of code size. */ ++ xorq %rdi, %rax ++ VMOVU (PAGE_SIZE - VEC_SIZE)(%rax), %VMM(1) ++ VPTESTN %VMM(1), %VMM(1), %k0 ++ KMOV %k0, %VRCX ++ ++ /* Shift out zero CHAR matches that are before the beginning of ++ src (rdi). */ ++# ifdef USE_AS_WCSRCHR ++ movl %edi, %esi ++ andl $(VEC_SIZE - 1), %esi ++ shrl $2, %esi ++# endif ++ shrx %VGPR(SHIFT_REG), %VRCX, %VRCX ++ ++ test %VRCX, %VRCX ++ jz L(page_cross_continue) + +-#include "strrchr-evex-base.S" ++ /* Found zero CHAR so need to test for search CHAR. */ ++ VPCMP $0, %VMATCH, %VMM(1), %k1 ++ KMOV %k1, %VRAX ++ /* Shift out search CHAR matches that are before the beginning of ++ src (rdi). */ ++ shrx %VGPR(SHIFT_REG), %VRAX, %VRAX ++ ++ /* Check if any search CHAR match in range. */ ++ blsmsk %VRCX, %VRCX ++ and %VRCX, %VRAX ++ jz L(ret3) ++ bsr %VRAX, %VRAX ++# ifdef USE_AS_WCSRCHR ++ leaq (%rdi, %rax, CHAR_SIZE), %rax ++# else ++ addq %rdi, %rax ++# endif ++L(ret3): ++ ret ++END(STRRCHR) ++#endif +diff --git a/sysdeps/x86_64/multiarch/wcsrchr-evex.S b/sysdeps/x86_64/multiarch/wcsrchr-evex.S +index a584cd3f430ba9d5..e5c5fe3bf28a5966 100644 +--- a/sysdeps/x86_64/multiarch/wcsrchr-evex.S ++++ b/sysdeps/x86_64/multiarch/wcsrchr-evex.S +@@ -4,5 +4,4 @@ + + #define STRRCHR WCSRCHR + #define USE_AS_WCSRCHR 1 +-#define USE_WIDE_CHAR 1 + #include "strrchr-evex.S" diff --git a/glibc.spec b/glibc.spec index c791311..8bf170e 100644 --- a/glibc.spec +++ b/glibc.spec @@ -1,4 +1,4 @@ -%global glibcsrcdir glibc-2.38.9000-170-g69239bd7a2 +%global glibcsrcdir glibc-2.38.9000-180-gdd32e1db38 %global glibcversion 2.38.9000 # Pre-release tarballs are pulled in from git using a command that is # effectively: @@ -159,7 +159,7 @@ Version: %{glibcversion} # - It allows using the Release number without the %%dist tag in the dependency # generator to make the generated requires interchangeable between Rawhide # and ELN (.elnYY < .fcXX). -%global baserelease 13 +%global baserelease 14 Release: %{baserelease}%{?dist} # In general, GPLv2+ is used by programs, LGPLv2+ is used for @@ -230,6 +230,7 @@ Patch9: glibc-rh827510.patch Patch13: glibc-fedora-localedata-rh61908.patch Patch17: glibc-cs-path.patch Patch23: glibc-python3.patch +Patch24: glibc-rh2244688.patch ############################################################################## # Continued list of core "glibc" package information: @@ -2198,6 +2199,21 @@ update_gconv_modules_cache () %files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared %changelog +* Wed Oct 18 2023 Florian Weimer - 2.38.9000-14 +- Revert "x86: Prepare `strrchr-evex` and `strrchr-evex512` for AVX10" (#2244688) +- Auto-sync with upstream branch master, + commit dd32e1db386c77c61850a7cbd0c126b7b3c63ece: +- Revert "elf: Always call destructors in reverse constructor order (bug 30785)" +- Revert "elf: Fix compile error with -DNDEBUG [BZ #18755]" +- Add strlcat/wcslcat testcase. +- Add strlcpy/wcslcpy testcase +- Add LE DSCP code point from RFC-8622. +- Add HWCAP2_MOPS from Linux 6.5 to AArch64 bits/hwcap.h +- Add SCM_SECURITY, SCM_PIDFD to bits/socket.h +- Add AT_HANDLE_FID from Linux 6.5 to bits/fcntl-linux.h +- Avoid maybe-uninitialized warning in __kernel_rem_pio2 +- Fix WAIT_FOR_DEBUGGER for container tests. + * Thu Oct 12 2023 Florian Weimer - 2.38.9000-13 - Drop glibc-disable-werror-tst-realloc.patch, GCC was fixed long ago. - Auto-sync with upstream branch master, diff --git a/sources b/sources index 48a809e..90df9ab 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (glibc-2.38.9000-170-g69239bd7a2.tar.xz) = 4dfb7ca7631d77effa335e075e6648e6dccb7edeba5f5a32d26fd0b2e6e84809c3c79508bdfb38a04b0c85803dc7bcf3aed3aea0e93b6ddb6d5072e4e104eec3 +SHA512 (glibc-2.38.9000-180-gdd32e1db38.tar.xz) = de61ce6e63bd4d424076ad5b902014b67ba5ba1123e14a6cbe9fe0b6c486cf96d7bde62f09ab98c477735b0397356828b95d0aa8caa90dbb4fd234cbf63a6cb3