glibc/glibc-upstream-2.34-179.patch
Carlos O'Donell 73667d0be6 Import glibc-2.34-32.fc35 from f35
* Thu Apr 28 2022 Carlos O'Donell <carlos@redhat.com> - 2.34-32
- Sync with upstream branch release/2.34/master,
  commit c66c92181ddbd82306537a608e8c0282587131de:
- posix/glob.c: update from gnulib (BZ#25659)
- linux: Fix fchmodat with AT_SYMLINK_NOFOLLOW for 64 bit time_t (BZ#29097)

* Wed Apr 27 2022 Carlos O'Donell <carlos@redhat.com> - 2.34-31
- Sync with upstream branch release/2.34/master,
  commit 55640ed3fde48360a8e8083be4843bd2dc7cecfe:
- i386: Regenerate ulps
- linux: Fix missing internal 64 bit time_t stat usage
- x86: Optimize L(less_vec) case in memcmp-evex-movbe.S
- x86: Don't set Prefer_No_AVX512 for processors with AVX512 and AVX-VNNI
- x86-64: Use notl in EVEX strcmp [BZ #28646]
- x86: Shrink memcmp-sse4.S code size
- x86: Double size of ERMS rep_movsb_threshold in dl-cacheinfo.h
- x86: Optimize memmove-vec-unaligned-erms.S
- x86-64: Replace movzx with movzbl
- x86-64: Remove Prefer_AVX2_STRCMP
- x86-64: Improve EVEX strcmp with masked load
- x86: Replace sse2 instructions with avx in memcmp-evex-movbe.S
- x86: Optimize memset-vec-unaligned-erms.S
- x86: Optimize memcmp-evex-movbe.S for frontend behavior and size
- x86: Modify ENTRY in sysdep.h so that p2align can be specified
- x86-64: Optimize load of all bits set into ZMM register [BZ #28252]
- scripts/glibcelf.py: Mark as UNSUPPORTED on Python 3.5 and earlier
- dlfcn: Do not use rtld_active () to determine ld.so state (bug 29078)
- INSTALL: Rephrase -with-default-link documentation
- misc: Fix rare fortify crash on wchar funcs. [BZ 29030]
- Default to --with-default-link=no (bug 25812)
- scripts: Add glibcelf.py module

* Thu Apr 21 2022 Carlos O'Donell <carlos@redhat.com> - 2.34-30
- Sync with upstream branch release/2.34/master,
  commit 71326f1f2fd09dafb9c34404765fb88129e94237:
- nptl: Fix pthread_cancel cancelhandling atomic operations
- mips: Fix mips64n32 64 bit time_t stat support (BZ#29069)
- hurd: Fix arbitrary error code
- nptl: Handle spurious EINTR when thread cancellation is disabled (BZ#29029)
- S390: Add new s390 platform z16.
- NEWS: Update fixed bug list for LD_AUDIT backports.
- hppa: Fix bind-now audit (BZ #28857)
- elf: Replace tst-audit24bmod2.so with tst-audit24bmod2
- Fix elf/tst-audit25a with default bind now toolchains
- elf: Fix runtime linker auditing on aarch64 (BZ #26643)
- elf: Issue la_symbind for bind-now (BZ #23734)
- elf: Fix initial-exec TLS access on audit modules (BZ #28096)
- elf: Add la_activity during application exit
- elf: Do not fail for failed dlmopen on audit modules (BZ #28061)
- elf: Issue audit la_objopen for vDSO
- elf: Add audit tests for modules with TLSDESC
- elf: Avoid unnecessary slowdown from profiling with audit (BZ#15533)
- elf: Add _dl_audit_pltexit
- elf: Add _dl_audit_pltenter
- elf: Add _dl_audit_preinit
- elf: Add _dl_audit_symbind_alt and _dl_audit_symbind
- elf: Add _dl_audit_objclose
- elf: Add _dl_audit_objsearch
- elf: Add _dl_audit_activity_map and _dl_audit_activity_nsid
- elf: Add _dl_audit_objopen
- elf: Move la_activity (LA_ACT_ADD) after _dl_add_to_namespace_list() (BZ #28062)
- elf: Move LAV_CURRENT to link_lavcurrent.h
- elf: Fix elf_get_dynamic_info() for bootstrap
- elf: Fix dynamic-link.h usage on rtld.c
- elf: Fix elf_get_dynamic_info definition
- elf: Avoid nested functions in the loader [BZ #27220]
- powerpc: Delete unneeded ELF_MACHINE_BEFORE_RTLD_RELOC
- hppa: Use END instead of PSEUDO_END in swapcontext.S
- hppa: Implement swapcontext in assembler (bug 28960)

Resolves: #2003291
Resolves: #2064181
Resolves: #2072328
Resolves: #2075713
Resolves: #2077838
2022-04-29 11:01:33 -04:00

86 lines
3.7 KiB
Diff

commit a182bb7a3922404f79def09d79ef89678b4049f0
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Fri Oct 29 12:56:53 2021 -0700
x86-64: Remove Prefer_AVX2_STRCMP
Remove Prefer_AVX2_STRCMP to enable EVEX strcmp. When comparing 2 32-byte
strings, EVEX strcmp has been improved to require 1 load, 1 VPTESTM, 1
VPCMP, 1 KMOVD and 1 INCL instead of 2 loads, 3 VPCMPs, 2 KORDs, 1 KMOVD
and 1 TESTL while AVX2 strcmp requires 1 load, 2 VPCMPEQs, 1 VPMINU, 1
VPMOVMSKB and 1 TESTL. EVEX strcmp is now faster than AVX2 strcmp by up
to 40% on Tiger Lake and Ice Lake.
(cherry picked from commit 14dbbf46a007ae5df36646b51ad0c9e5f5259f30)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index de4e3c3b7258120d..f4d4049e391cbabd 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -574,14 +574,6 @@ disable_tsx:
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
|= bit_arch_Prefer_No_VZEROUPPER;
-
- /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp
- requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp
- requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB,
- AVX2 strcmp is faster than EVEX strcmp. */
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
- cpu_features->preferred[index_arch_Prefer_AVX2_STRCMP]
- |= bit_arch_Prefer_AVX2_STRCMP;
}
/* Avoid avoid short distance REP MOVSB on processor with FSRM. */
diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c
index 58f2fad4323d5d91..957db3ad229ba39f 100644
--- a/sysdeps/x86/cpu-tunables.c
+++ b/sysdeps/x86/cpu-tunables.c
@@ -239,8 +239,6 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features,
Fast_Copy_Backward,
disable, 18);
- CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH
- (n, cpu_features, Prefer_AVX2_STRCMP, AVX2, disable, 18);
}
break;
case 19:
diff --git a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
index 3bdc76cf71007948..8250bfcbecd29a9f 100644
--- a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
+++ b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
@@ -31,5 +31,4 @@ BIT (Prefer_ERMS)
BIT (Prefer_No_AVX512)
BIT (MathVec_Prefer_No_AVX512)
BIT (Prefer_FSRM)
-BIT (Prefer_AVX2_STRCMP)
BIT (Avoid_Short_Distance_REP_MOVSB)
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
index 62b7abeeee646ab4..7c2901bf44456259 100644
--- a/sysdeps/x86_64/multiarch/strcmp.c
+++ b/sysdeps/x86_64/multiarch/strcmp.c
@@ -43,8 +43,7 @@ IFUNC_SELECTOR (void)
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
index 60ba0fe356b31779..f94a421784bfe923 100644
--- a/sysdeps/x86_64/multiarch/strncmp.c
+++ b/sysdeps/x86_64/multiarch/strncmp.c
@@ -43,8 +43,7 @@ IFUNC_SELECTOR (void)
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))