From 2ec74d3b281ae834fbbbb573a55d43471c8cab9e Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Mon, 3 Jun 2024 12:20:49 -0400 Subject: [PATCH] 13.3.1-1 Resolves: RHEL-35627 --- .gitignore | 1 + gcc.spec | 84 +- gcc13-libstdc++-compat.patch | 32 +- gcc13-libstdc++-docs.patch | 4 +- gcc13-pr110792.patch | 163 -- ...uite-aarch64-add-fno-stack-protector.patch | 500 ---- gcc13-znver5.patch | 2246 +++++++++++++++++ sources | 2 +- 8 files changed, 2340 insertions(+), 692 deletions(-) delete mode 100644 gcc13-pr110792.patch delete mode 100644 gcc13-testsuite-aarch64-add-fno-stack-protector.patch create mode 100644 gcc13-znver5.patch diff --git a/.gitignore b/.gitignore index 6954633..b6c5006 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /gcc-13.1.1-20230614.tar.xz /gcc-13.2.1-20231205.tar.xz +/gcc-13.3.1-20240522.tar.xz diff --git a/gcc.spec b/gcc.spec index 05a3432..d2edbae 100644 --- a/gcc.spec +++ b/gcc.spec @@ -5,13 +5,13 @@ BuildRequires: scl-utils-build %{?scl:%global __strip %%{_scl_root}/usr/bin/strip} %{?scl:%global __objdump %%{_scl_root}/usr/bin/objdump} %{?scl:%scl_package gcc} -%global DATE 20231205 -%global gitrev f783814ad6a04ae5ef44595216596a2b75eda15b -%global gcc_version 13.2.1 +%global DATE 20240522 +%global gitrev 5632a1dc05eae4936ac0d8d7e8a5cb717bce5960 +%global gcc_version 13.3.1 %global gcc_major 13 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 6 +%global gcc_release 1 %global nvptx_tools_gitrev aa3404ad5a496cda5d79a50bedb1344fd63e8763 %global newlib_cygwin_gitrev 9e09d6ed83cce4777a5950412647ccc603040409 %global mpc_version 1.0.3 @@ -149,7 +149,7 @@ BuildRequires: scl-utils-build Summary: GCC version %{gcc_major} Name: %{?scl_prefix}gcc Version: %{gcc_version} -Release: %{gcc_release}.3%{?dist} +Release: %{gcc_release}%{?dist} # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD @@ -329,7 +329,7 @@ Patch8: gcc13-no-add-needed.patch Patch9: gcc13-Wno-format-security.patch Patch10: gcc13-rh1574936.patch Patch11: gcc13-d-shared-libphobos.patch -Patch12: gcc13-pr110792.patch +Patch12: gcc13-znver5.patch Patch50: isl-rh2155127.patch @@ -361,7 +361,6 @@ Patch3018: 0021-libstdc++-disable-tests.patch Patch3019: 0022-libstdc++-revert-behavior.patch Patch3021: gcc13-testsuite-p10.patch Patch3023: gcc13-testsuite-dwarf.patch -Patch3024: gcc13-testsuite-aarch64-add-fno-stack-protector.patch %if 0%{?rhel} == 9 %global nonsharedver 110 @@ -696,7 +695,7 @@ so that there cannot be any synchronization problems. %patch -P10 -p0 -b .rh1574936~ %endif %patch -P11 -p0 -b .d-shared-libphobos~ -%patch -P12 -p0 -b .pr110792~ +%patch -P12 -p1 -b .znver5~ %if 0%{?rhel} >= 6 %patch -P100 -p1 -b .fortran-fdec-duplicates~ @@ -744,7 +743,6 @@ rm -f libphobos/testsuite/libphobos.gc/forkgc2.d %endif %patch -P3021 -p1 -b .dts-test-21~ %patch -P3023 -p1 -b .dts-test-23~ -%patch -P3024 -p1 -b .dts-test-24~ find gcc/testsuite -name \*.pr96939~ | xargs rm -f @@ -1182,7 +1180,8 @@ CC="`$ANNOBIN_FLAGS --build-cc`" CXX="`$ANNOBIN_FLAGS --build-cxx`" \ CFLAGS="$ANNOBIN_CFLAGS1 $ANNOBIN_CFLAGS2 $ANNOBIN_LDFLAGS" \ CXXFLAGS="$ANNOBIN_CFLAGS1 `$ANNOBIN_FLAGS --build-includes` $ANNOBIN_CFLAGS2 $ANNOBIN_LDFLAGS" \ ./configure --with-gcc-plugin-dir=%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin \ - --without-annocheck --without-tests --without-docs --disable-rpath --without-debuginfod + --without-annocheck --without-tests --without-docs --disable-rpath --without-debuginfod \ + --without-clang-plugin --without-llvm-plugin make cd ../.. %endif @@ -2908,6 +2907,71 @@ fi %endif %changelog +* Thu May 30 2024 Marek Polacek 13.3.1-1 +- update from releases/gcc-13 branch + - GCC 13.3 release + - PRs analyzer/104042, analyzer/108171, analyzer/109251, analyzer/109577, + analyzer/110014, analyzer/110112, analyzer/110700, analyzer/110882, + analyzer/111289, analyzer/112790, analyzer/112889, analyzer/112969, + analyzer/113253, analyzer/113333, analyzer/114408, analyzer/114473, + bootstrap/106472, bootstrap/114369, c/112571, c/114780, c++/89224, + c++/97990, c++/100667, c++/103825, c++/110006, c++/111284, c++/112769, + c++/113141, c++/113966, c++/114303, c++/114377, c++/114537, + c++/114561, c++/114562, c++/114572, c++/114580, c++/114634, + c++/114691, c++/114709, debug/112718, driver/111700, fortran/36337, + fortran/50410, fortran/55978, fortran/89462, fortran/93678, + fortran/95374, fortran/101135, fortran/102003, fortran/103707, + fortran/103715, fortran/103716, fortran/104352, fortran/106987, + fortran/106999, fortran/107426, fortran/110987, fortran/112407, + fortran/113799, fortran/113866, fortran/113885, fortran/113956, + fortran/114001, fortran/114474, fortran/114535, fortran/114739, + fortran/114825, fortran/115039, gcov-profile/114115, + gcov-profile/114715, ipa/92606, ipa/108007, ipa/111571, ipa/112616, + ipa/113359, ipa/113907, ipa/113964, jit/110466, libgcc/111731, + libquadmath/114533, libstdc++/66146, libstdc++/93672, + libstdc++/104606, libstdc++/107800, libstdc++/108976, + libstdc++/110050, libstdc++/110054, libstdc++/113841, + libstdc++/114147, libstdc++/114316, libstdc++/114359, + libstdc++/114367, libstdc++/114401, libstdc++/114750, + libstdc++/114803, libstdc++/114863, libstdc++/115063, lto/114655, + middle-end/110027, middle-end/111151, middle-end/111632, + middle-end/111683, middle-end/112684, middle-end/112732, + middle-end/113396, middle-end/113622, middle-end/114070, + middle-end/114348, middle-end/114552, middle-end/114599, + middle-end/114734, middle-end/114753, middle-end/114907, + rtl-optimization/54052, rtl-optimization/114415, + rtl-optimization/114768, rtl-optimization/114924, sanitizer/97696, + sanitizer/114687, sanitizer/114743, sanitizer/114956, + sanitizer/115172, target/88309, target/101865, target/105522, + target/110621, target/111234, target/111600, target/111610, + target/111822, target/112397, target/113095, target/113233, + target/113950, target/114049, target/114130, target/114160, + target/114172, target/114175, target/114272, target/114747, + target/114752, target/114794, target/114837, target/114848, + target/114981, testsuite/111066, testsuite/112297, testsuite/114034, + testsuite/114036, testsuite/114662, tree-optimization/91838, + tree-optimization/109925, tree-optimization/110838, + tree-optimization/111009, tree-optimization/111268, + tree-optimization/111407, tree-optimization/111736, + tree-optimization/111882, tree-optimization/112281, + tree-optimization/112303, tree-optimization/112793, + tree-optimization/112961, tree-optimization/112991, + tree-optimization/113552, tree-optimization/113630, + tree-optimization/113670, tree-optimization/113831, + tree-optimization/113910, tree-optimization/114027, + tree-optimization/114115, tree-optimization/114121, + tree-optimization/114203, tree-optimization/114231, + tree-optimization/114246, tree-optimization/114375, + tree-optimization/114396, tree-optimization/114485, + tree-optimization/114566, tree-optimization/114672, + tree-optimization/114733, tree-optimization/114736, + tree-optimization/114749, tree-optimization/114787, + tree-optimization/114799, tree-optimization/114876, + tree-optimization/114965, tree-optimization/115143, + tree-optimization/115152, tree-optimization/115154 +- add --without-clang-plugin --without-llvm-plugin to annobin configure + options + * Tue Jan 9 2024 Marek Polacek 13.2.1-6.3 - use the system dir in --with-libstdcxx-zoneinfo (RHEL-21093) diff --git a/gcc13-libstdc++-compat.patch b/gcc13-libstdc++-compat.patch index c3fc803..6990125 100644 --- a/gcc13-libstdc++-compat.patch +++ b/gcc13-libstdc++-compat.patch @@ -12759,8 +12759,8 @@ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF --#line 12190 "configure" -+#line 12192 "configure" +-#line 12192 "configure" ++#line 12194 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12768,8 +12768,8 @@ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF --#line 12296 "configure" -+#line 12298 "configure" +-#line 12298 "configure" ++#line 12300 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12777,8 +12777,8 @@ # Fake what AC_TRY_COMPILE does. cat > conftest.$ac_ext << EOF --#line 16020 "configure" -+#line 16022 "configure" +-#line 16036 "configure" ++#line 16038 "configure" int main() { typedef bool atomic_type; @@ -12786,8 +12786,8 @@ rm -f conftest* cat > conftest.$ac_ext << EOF --#line 16055 "configure" -+#line 16057 "configure" +-#line 16071 "configure" ++#line 16073 "configure" int main() { typedef short atomic_type; @@ -12795,8 +12795,8 @@ rm -f conftest* cat > conftest.$ac_ext << EOF --#line 16090 "configure" -+#line 16092 "configure" +-#line 16106 "configure" ++#line 16108 "configure" int main() { // NB: _Atomic_word not necessarily int. @@ -12804,8 +12804,8 @@ rm -f conftest* cat > conftest.$ac_ext << EOF --#line 16126 "configure" -+#line 16128 "configure" +-#line 16142 "configure" ++#line 16144 "configure" int main() { typedef long long atomic_type; @@ -12813,8 +12813,8 @@ # unnecessary for this test. cat > conftest.$ac_ext << EOF --#line 16282 "configure" -+#line 16284 "configure" +-#line 16298 "configure" ++#line 16300 "configure" int main() { _Decimal32 d1; @@ -12822,8 +12822,8 @@ # unnecessary for this test. cat > conftest.$ac_ext << EOF --#line 16324 "configure" -+#line 16326 "configure" +-#line 16340 "configure" ++#line 16342 "configure" template struct same { typedef T2 type; }; diff --git a/gcc13-libstdc++-docs.patch b/gcc13-libstdc++-docs.patch index ce94f5d..b007822 100644 --- a/gcc13-libstdc++-docs.patch +++ b/gcc13-libstdc++-docs.patch @@ -4,7 +4,7 @@ FSF

-+ Release 13.2.1 ++ Release 13.3.1 +

Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation @@ -17,7 +17,7 @@

- The API documentation, rendered into HTML, can be viewed online + The API documentation, rendered into HTML, can be viewed locally -+ for the 13.2.1 release, ++ for the 13.3.1 release, + online for each GCC release and diff --git a/gcc13-pr110792.patch b/gcc13-pr110792.patch deleted file mode 100644 index ce129f9..0000000 --- a/gcc13-pr110792.patch +++ /dev/null @@ -1,163 +0,0 @@ -PR target/110792: Early clobber issues with rot32di2_doubleword on i386. - -This patch is a conservative fix for PR target/110792, a wrong-code -regression affecting doubleword rotations by BITS_PER_WORD, which -effectively swaps the highpart and lowpart words, when the source to be -rotated resides in memory. The issue is that if the register used to -hold the lowpart of the destination is mentioned in the address of -the memory operand, the current define_insn_and_split unintentionally -clobbers it before reading the highpart. - -Hence, for the testcase, the incorrectly generated code looks like: - - salq $4, %rdi // calculate address - movq WHIRL_S+8(%rdi), %rdi // accidentally clobber addr - movq WHIRL_S(%rdi), %rbp // load (wrong) lowpart - -Traditionally, the textbook way to fix this would be to add an -explicit early clobber to the instruction's constraints. - - (define_insn_and_split "32di2_doubleword" -- [(set (match_operand:DI 0 "register_operand" "=r,r,r") -+ [(set (match_operand:DI 0 "register_operand" "=r,r,&r") - (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o") - (const_int 32)))] - -but unfortunately this currently generates significantly worse code, -due to a strange choice of reloads (effectively memcpy), which ends up -looking like: - - salq $4, %rdi // calculate address - movdqa WHIRL_S(%rdi), %xmm0 // load the double word in SSE reg. - movaps %xmm0, -16(%rsp) // store the SSE reg back to the stack - movq -8(%rsp), %rdi // load highpart - movq -16(%rsp), %rbp // load lowpart - -Note that reload's "&" doesn't distinguish between the memory being -early clobbered, vs the registers used in an addressing mode being -early clobbered. - -The fix proposed in this patch is to remove the third alternative, that -allowed offsetable memory as an operand, forcing reload to place the -operand into a register before the rotation. This results in: - - salq $4, %rdi - movq WHIRL_S(%rdi), %rax - movq WHIRL_S+8(%rdi), %rdi - movq %rax, %rbp - -I believe there's a more advanced solution, by swapping the order of -the loads (if first destination register is mentioned in the address), -or inserting a lea insn (if both destination registers are mentioned -in the address), but this fix is a minimal "safe" solution, that -should hopefully be suitable for backporting. - -2023-08-06 Roger Sayle - -gcc/testsuite/ChangeLog - PR target/110792 - * gcc.target/i386/pr110792.c: Remove dg-final scan-assembler-not. - -2023-08-03 Roger Sayle - -gcc/ChangeLog - PR target/110792 - * config/i386/i386.md (ti3): For rotations by 64 bits - place operand in a register before gen_64ti2_doubleword. - (di3): Likewise, for rotations by 32 bits, place - operand in a register before gen_32di2_doubleword. - (32di2_doubleword): Constrain operand to be in register. - (64ti2_doubleword): Likewise. - -gcc/testsuite/ChangeLog - PR target/110792 - * g++.target/i386/pr110792.C: New 32-bit C++ test case. - * gcc.target/i386/pr110792.c: New 64-bit C test case. - ---- gcc/config/i386/i386.md -+++ gcc/config/i386/i386.md -@@ -15341,7 +15341,10 @@ (define_expand "ti3" - emit_insn (gen_ix86_ti3_doubleword - (operands[0], operands[1], operands[2])); - else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64) -- emit_insn (gen_64ti2_doubleword (operands[0], operands[1])); -+ { -+ operands[1] = force_reg (TImode, operands[1]); -+ emit_insn (gen_64ti2_doubleword (operands[0], operands[1])); -+ } - else - { - rtx amount = force_reg (QImode, operands[2]); -@@ -15376,7 +15379,10 @@ (define_expand "di3" - emit_insn (gen_ix86_di3_doubleword - (operands[0], operands[1], operands[2])); - else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32) -- emit_insn (gen_32di2_doubleword (operands[0], operands[1])); -+ { -+ operands[1] = force_reg (DImode, operands[1]); -+ emit_insn (gen_32di2_doubleword (operands[0], operands[1])); -+ } - else - FAIL; - -@@ -15544,8 +15550,8 @@ (define_insn_and_split "ix86_rotr3_doubleword" - }) - - (define_insn_and_split "32di2_doubleword" -- [(set (match_operand:DI 0 "register_operand" "=r,r,r") -- (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o") -+ [(set (match_operand:DI 0 "register_operand" "=r,r") -+ (any_rotate:DI (match_operand:DI 1 "register_operand" "0,r") - (const_int 32)))] - "!TARGET_64BIT" - "#" -@@ -15562,8 +15568,8 @@ (define_insn_and_split "32di2_doubleword" - }) - - (define_insn_and_split "64ti2_doubleword" -- [(set (match_operand:TI 0 "register_operand" "=r,r,r") -- (any_rotate:TI (match_operand:TI 1 "nonimmediate_operand" "0,r,o") -+ [(set (match_operand:TI 0 "register_operand" "=r,r") -+ (any_rotate:TI (match_operand:TI 1 "register_operand" "0,r") - (const_int 64)))] - "TARGET_64BIT" - "#" ---- gcc/testsuite/g++.target/i386/pr110792.C -+++ gcc/testsuite/g++.target/i386/pr110792.C -@@ -0,0 +1,16 @@ -+/* { dg-do compile { target ia32 } } */ -+/* { dg-options "-O2" } */ -+ -+template -+inline T rotr(T input) -+{ -+ return static_cast((input >> ROT) | (input << (8 * sizeof(T) - ROT))); -+} -+ -+unsigned long long WHIRL_S[256] = {0x18186018C07830D8}; -+unsigned long long whirl(unsigned char x0) -+{ -+ const unsigned long long s4 = WHIRL_S[x0&0xFF]; -+ return rotr<32>(s4); -+} -+/* { dg-final { scan-assembler-not "movl\tWHIRL_S\\+4\\(,%eax,8\\), %eax" } } */ ---- gcc/testsuite/gcc.target/i386/pr110792.c -+++ gcc/testsuite/gcc.target/i386/pr110792.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile { target int128 } } */ -+/* { dg-options "-O2" } */ -+ -+static inline unsigned __int128 rotr(unsigned __int128 input) -+{ -+ return ((input >> 64) | (input << (64))); -+} -+ -+unsigned __int128 WHIRL_S[256] = {((__int128)0x18186018C07830D8) << 64 |0x18186018C07830D8}; -+unsigned __int128 whirl(unsigned char x0) -+{ -+ register int t __asm("rdi") = x0&0xFF; -+ const unsigned __int128 s4 = WHIRL_S[t]; -+ register unsigned __int128 tt __asm("rdi") = rotr(s4); -+ asm("":::"memory"); -+ return tt; -+} diff --git a/gcc13-testsuite-aarch64-add-fno-stack-protector.patch b/gcc13-testsuite-aarch64-add-fno-stack-protector.patch deleted file mode 100644 index 01ca906..0000000 --- a/gcc13-testsuite-aarch64-add-fno-stack-protector.patch +++ /dev/null @@ -1,500 +0,0 @@ -From 3439b79cb7f97464d65316a94d40d49505fb2150 Mon Sep 17 00:00:00 2001 -From: Marek Polacek -Date: Wed, 6 Dec 2023 15:34:24 -0500 -Subject: [PATCH] aarch64: add -fno-stack-protector to tests - -These tests fail when the testsuite is executed with -fstack-protector-strong. -To avoid this, this patch adds -fno-stack-protector to dg-options. - -The list of FAILs is appended. As you can see, it's mostly about -scan-assembler-* which are sort of expected to fail with the stack -protector on. - -FAIL: gcc.target/aarch64/ldp_stp_unaligned_2.c scan-assembler-not mov\\tx[0-9]+, sp -FAIL: gcc.target/aarch64/shadow_call_stack_5.c scan-assembler-times stp\\\\tx29, x30, \\\\[sp\\\\] 1 -FAIL: gcc.target/aarch64/shadow_call_stack_5.c scan-assembler ldr\\\\tx29, \\\\[sp\\\\] -FAIL: gcc.target/aarch64/shadow_call_stack_6.c scan-assembler-times str\\\\tx30, \\\\[sp\\\\] 1 -FAIL: gcc.target/aarch64/shadow_call_stack_7.c scan-assembler-times stp\\\\tx19, x30, \\\\[sp, -[0-9]+\\\\]! 1 -FAIL: gcc.target/aarch64/shadow_call_stack_7.c scan-assembler ldr\\\\tx19, \\\\[sp\\\\], [0-9]+ -FAIL: gcc.target/aarch64/shadow_call_stack_8.c scan-assembler-times stp\\\\tx19, x20, \\\\[sp, -[0-9]+\\\\]! 1 -FAIL: gcc.target/aarch64/shadow_call_stack_8.c scan-assembler ldp\\\\tx19, x20, \\\\[sp\\\\], [0-9]+ -FAIL: gcc.target/aarch64/stack-check-12.c scan-assembler-times str\\\\txzr, 2 -FAIL: gcc.target/aarch64/stack-check-prologue-11.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 -FAIL: gcc.target/aarch64/stack-check-prologue-12.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 -FAIL: gcc.target/aarch64/stack-check-prologue-13.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 -FAIL: gcc.target/aarch64/stack-check-prologue-13.c scan-assembler-times str\\\\s+x30, \\\\[sp\\\\] 1 -FAIL: gcc.target/aarch64/stack-check-prologue-14.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 -FAIL: gcc.target/aarch64/stack-check-prologue-14.c scan-assembler-times str\\\\s+x30, \\\\[sp\\\\] 1 -FAIL: gcc.target/aarch64/stack-check-prologue-15.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 -FAIL: gcc.target/aarch64/stack-check-prologue-15.c scan-assembler-times str\\\\s+x30, \\\\[sp\\\\] 1 -FAIL: gcc.target/aarch64/stack-check-prologue-17.c check-function-bodies test1 -FAIL: gcc.target/aarch64/stack-check-prologue-17.c check-function-bodies test2 -FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies test1 -FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies test2 -FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies test3 -FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies test1 -FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies test2 -FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies test3 -FAIL: gcc.target/aarch64/stack-check-prologue-2.c scan-assembler-times str\\\\s+xzr, 0 -FAIL: gcc.target/aarch64/stack-check-prologue-5.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 -FAIL: gcc.target/aarch64/stack-check-prologue-6.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 -FAIL: gcc.target/aarch64/stack-check-prologue-8.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 2 -FAIL: gcc.target/aarch64/stack-check-prologue-9.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 -FAIL: gcc.target/aarch64/test_frame_1.c scan-assembler-times str\\tx30, \\\\[sp, -[0-9]+\\\\]! 2 -FAIL: gcc.target/aarch64/test_frame_10.c scan-assembler-times stp\\tx19, x30, \\\\[sp, [0-9]+\\\\] 1 -FAIL: gcc.target/aarch64/test_frame_10.c scan-assembler ldp\\tx19, x30, \\\\[sp, [0-9]+\\\\] -FAIL: gcc.target/aarch64/test_frame_11.c scan-assembler-times stp\\tx29, x30, \\\\[sp, -[0-9]+\\\\]! 2 -FAIL: gcc.target/aarch64/test_frame_13.c scan-assembler-times stp\\tx29, x30, \\\\[sp\\\\] 1 -FAIL: gcc.target/aarch64/test_frame_15.c scan-assembler-times stp\\tx29, x30, \\\\[sp, [0-9]+\\\\] 1 -FAIL: gcc.target/aarch64/test_frame_2.c scan-assembler-times stp\\tx19, x30, \\\\[sp, -[0-9]+\\\\]! 1 -FAIL: gcc.target/aarch64/test_frame_2.c scan-assembler ldp\\tx19, x30, \\\\[sp\\\\], [0-9]+ -FAIL: gcc.target/aarch64/test_frame_4.c scan-assembler-times stp\\tx19, x30, \\\\[sp, -[0-9]+\\\\]! 1 -FAIL: gcc.target/aarch64/test_frame_4.c scan-assembler ldp\\tx19, x30, \\\\[sp\\\\], [0-9]+ -FAIL: gcc.target/aarch64/test_frame_6.c scan-assembler-times str\\tx30, \\\\[sp\\\\] 1 -FAIL: gcc.target/aarch64/test_frame_7.c scan-assembler-times stp\\tx19, x30, \\\\[sp] 1 -FAIL: gcc.target/aarch64/test_frame_8.c scan-assembler-times str\\tx30, \\\\[sp, [0-9]+\\\\] 1 -FAIL: gcc.target/aarch64/test_frame_8.c scan-assembler ldr\\tx30, \\\\[sp, [0-9]+\\\\] -FAIL: gcc.target/aarch64/sve/struct_vect_24.c scan-assembler-times cmp\\\\s+x[0-9]+, 61440 4 -FAIL: gcc.target/aarch64/sve/struct_vect_24.c scan-assembler-times sub\\\\s+x[0-9]+, x[0-9]+, 61440 4 -FAIL: gcc.target/aarch64/sve/struct_vect_24.c scan-assembler-times cmp\\s+x[0-9]+, 61440 4 -FAIL: gcc.target/aarch64/sve/struct_vect_24.c scan-assembler-times sub\\s+x[0-9]+, x[0-9]+, 61440 4 - -gcc/testsuite/ChangeLog: - - * gcc.target/aarch64/ldp_stp_unaligned_2.c: Use -fno-stack-protector. - * gcc.target/aarch64/shadow_call_stack_5.c: Likewise. - * gcc.target/aarch64/shadow_call_stack_6.c: Likewise. - * gcc.target/aarch64/shadow_call_stack_7.c: Likewise. - * gcc.target/aarch64/shadow_call_stack_8.c: Likewise. - * gcc.target/aarch64/stack-check-12.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-11.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-12.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-13.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-14.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-15.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-17.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-18.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-2.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-5.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-6.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-8.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-9.c: Likewise. - * gcc.target/aarch64/sve/struct_vect_24.c: Likewise. - * gcc.target/aarch64/test_frame_1.c: Likewise. - * gcc.target/aarch64/test_frame_10.c: Likewise. - * gcc.target/aarch64/test_frame_11.c: Likewise. - * gcc.target/aarch64/test_frame_13.c: Likewise. - * gcc.target/aarch64/test_frame_15.c: Likewise. - * gcc.target/aarch64/test_frame_2.c: Likewise. - * gcc.target/aarch64/test_frame_4.c: Likewise. - * gcc.target/aarch64/test_frame_6.c: Likewise. - * gcc.target/aarch64/test_frame_7.c: Likewise. - * gcc.target/aarch64/test_frame_8.c: Likewise. ---- - gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c | 2 +- - gcc/testsuite/gcc.target/aarch64/shadow_call_stack_5.c | 2 +- - gcc/testsuite/gcc.target/aarch64/shadow_call_stack_6.c | 2 +- - gcc/testsuite/gcc.target/aarch64/shadow_call_stack_7.c | 2 +- - gcc/testsuite/gcc.target/aarch64/shadow_call_stack_8.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-12.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-11.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-12.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-13.c | 4 ++-- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-14.c | 4 ++-- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-15.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-2.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-5.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-6.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-8.c | 2 +- - gcc/testsuite/gcc.target/aarch64/stack-check-prologue-9.c | 2 +- - gcc/testsuite/gcc.target/aarch64/sve/struct_vect_24.c | 2 +- - gcc/testsuite/gcc.target/aarch64/test_frame_1.c | 2 +- - gcc/testsuite/gcc.target/aarch64/test_frame_10.c | 2 +- - gcc/testsuite/gcc.target/aarch64/test_frame_11.c | 2 +- - gcc/testsuite/gcc.target/aarch64/test_frame_13.c | 2 +- - gcc/testsuite/gcc.target/aarch64/test_frame_15.c | 2 +- - gcc/testsuite/gcc.target/aarch64/test_frame_2.c | 2 +- - gcc/testsuite/gcc.target/aarch64/test_frame_4.c | 2 +- - gcc/testsuite/gcc.target/aarch64/test_frame_6.c | 2 +- - gcc/testsuite/gcc.target/aarch64/test_frame_7.c | 2 +- - gcc/testsuite/gcc.target/aarch64/test_frame_8.c | 2 +- - 30 files changed, 32 insertions(+), 32 deletions(-) - -diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c -index 1e46755a39a..50d7d7a2d5d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c -@@ -1,4 +1,4 @@ --/* { dg-options "-O2 -fomit-frame-pointer" } */ -+/* { dg-options "-O2 -fomit-frame-pointer -fno-stack-protector" } */ - - /* Check that we split unaligned LDP/STP into base and aligned offset. */ - -diff --git a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_5.c b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_5.c -index d88357ca04d..d7f82984ff5 100644 ---- a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_5.c -+++ b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_5.c -@@ -7,7 +7,7 @@ - * optimized code should use "ldr x29, [sp]" to restore x29 only. */ - - /* { dg-do compile } */ --/* { dg-options "-O2 -fno-omit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps" } */ -+/* { dg-options "-O2 -fno-omit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_6.c b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_6.c -index 83b74834c6a..8d088aecc20 100644 ---- a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_6.c -+++ b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_6.c -@@ -7,7 +7,7 @@ - * optimized code should not restore x30 in epilogue. */ - - /* { dg-do compile } */ --/* { dg-options "-O2 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps" } */ -+/* { dg-options "-O2 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_7.c b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_7.c -index 5537fb3293a..a2f376e0091 100644 ---- a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_7.c -+++ b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_7.c -@@ -7,7 +7,7 @@ - * optimized code should use "ldr x19, [sp], x" to restore x19 only. */ - - /* { dg-do compile } */ --/* { dg-options "-O2 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps" } */ -+/* { dg-options "-O2 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_8.c b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_8.c -index b03f26f7bcf..5162cbb3917 100644 ---- a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_8.c -+++ b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_8.c -@@ -9,7 +9,7 @@ - * optimized code should not restore x30 in epilogue. */ - - /* { dg-do compile } */ --/* { dg-options "-O0 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps" } */ -+/* { dg-options "-O0 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps -fno-stack-protector" } */ - - int func1 (void) - { -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-12.c b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c -index be5a57a9ec6..e1a4c67b041 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-12.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-asynchronous-unwind-tables -fno-unwind-tables" } */ -+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-asynchronous-unwind-tables -fno-unwind-tables -fno-stack-protector" } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ - - typedef unsigned __attribute__((mode(DI))) uint64_t; -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-11.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-11.c -index 741f2f5fadc..d57aece05bb 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-11.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-11.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ -+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ - - #define SIZE (6 * 64 * 1024) + (1 * 32 * 1024) -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-12.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-12.c -index ece68003ade..895d130e4fa 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-12.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-12.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer" } */ -+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer -fno-stack-protector" } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ - - void -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-13.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-13.c -index 0fc900c6943..1f1a6c497be 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-13.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-13.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer" } */ -+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer -fno-stack-protector" } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ - - void h (void) __attribute__ ((noreturn)); -@@ -17,4 +17,4 @@ f (void) - - /* SIZE is more than 1 guard-size, but only one 64KB page is used, expect only 1 - probe. Leaf function and omitting leaf pointers, tail call to noreturn which -- may only omit an epilogue and not a prologue. Checking for LR saving. */ -\ No newline at end of file -+ may only omit an epilogue and not a prologue. Checking for LR saving. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-14.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-14.c -index ea733f861e7..facb3cb72a7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-14.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-14.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer" } */ -+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer -fno-stack-protector" } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ - - void h (void) __attribute__ ((noreturn)); -@@ -21,4 +21,4 @@ f (void) - probe at 1024 and one implicit probe due to LR being saved. Leaf function - and omitting leaf pointers, tail call to noreturn which may only omit an - epilogue and not a prologue and control flow in between. Checking for -- LR saving. */ -\ No newline at end of file -+ LR saving. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-15.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-15.c -index 63df4a5609a..f2ac60a6214 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-15.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-15.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer" } */ -+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer -fno-stack-protector" } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ - - void g (volatile int *x) ; -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -index f0ec1389771..1cf6fbbb085 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -@@ -1,4 +1,4 @@ --/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ -+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fno-stack-protector" } */ - /* { dg-final { check-function-bodies "**" "" } } */ - - void f(int, ...); -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -index 6383bec5ebc..2e06346c158 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -@@ -1,4 +1,4 @@ --/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ -+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fno-stack-protector" } */ - /* { dg-final { check-function-bodies "**" "" } } */ - - void f(int, ...); -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -index 562039b5e9b..38eab4d36ab 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -@@ -1,4 +1,4 @@ --/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */ -+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18 -fno-stack-protector" } */ - /* { dg-final { check-function-bodies "**" "" } } */ - - void f(int, ...); -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-2.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-2.c -index 61c52a251a7..b37f62cad27 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-2.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-2.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ -+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ - - #define SIZE 2 * 1024 -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-5.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-5.c -index 2ee16350127..34a438671d0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-5.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-5.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ -+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ - - #define SIZE 64 * 1024 -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-6.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-6.c -index 3c9b606cbe0..a4e34e2fe6a 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-6.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-6.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ -+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ - - #define SIZE 65 * 1024 -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-8.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-8.c -index 333f5fcc360..277dce4c71e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-8.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-8.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ -+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ - - #define SIZE 128 * 1024 -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-9.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-9.c -index a3ff89b5581..a21305541c1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-9.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-9.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ -+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ - - #define SIZE 6 * 64 * 1024 -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_24.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_24.c -index 68a9d5e3d2e..19be6de0c2e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_24.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_24.c -@@ -1,6 +1,6 @@ - /* { dg-do compile } */ - /* { dg-require-effective-target supports_stack_clash_protection } */ --/* { dg-options "-O3 -fopenmp-simd -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ -+/* { dg-options "-O3 -fopenmp-simd -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ - - #include - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_1.c b/gcc/testsuite/gcc.target/aarch64/test_frame_1.c -index f906b073545..c9b8822b4b1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_1.c -@@ -6,7 +6,7 @@ - * optimized code should use "str !" for stack adjustment. */ - - /* { dg-do run } */ --/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c -index c54ab2d0ccb..fe5cbd9ed05 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c -@@ -7,7 +7,7 @@ - * Use a single stack adjustment, no writeback. */ - - /* { dg-do run } */ --/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_11.c b/gcc/testsuite/gcc.target/aarch64/test_frame_11.c -index f162cc091e0..11cf471168d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_11.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_11.c -@@ -5,7 +5,7 @@ - * optimized code should use "stp !" for stack adjustment. */ - - /* { dg-do run } */ --/* { dg-options "-O2 --save-temps" } */ -+/* { dg-options "-O2 --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_13.c b/gcc/testsuite/gcc.target/aarch64/test_frame_13.c -index 74b3370fa46..ec56963c038 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_13.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_13.c -@@ -5,7 +5,7 @@ - * Use a single stack adjustment, no writeback. */ - - /* { dg-do run } */ --/* { dg-options "-O2 --save-temps" } */ -+/* { dg-options "-O2 --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_15.c b/gcc/testsuite/gcc.target/aarch64/test_frame_15.c -index bed6714b4fe..4247008de8e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_15.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_15.c -@@ -6,7 +6,7 @@ - * Use a single stack adjustment, no writeback. */ - - /* { dg-do run } */ --/* { dg-options "-O2 --save-temps" } */ -+/* { dg-options "-O2 --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c -index 0d715314cb8..9c4243b6480 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c -@@ -6,7 +6,7 @@ - * optimized code should use "stp !" for stack adjustment. */ - - /* { dg-do run } */ --/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c -index b41229c42f4..8d0bed93e44 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c -@@ -6,7 +6,7 @@ - * we can use "stp !" to optimize stack adjustment. */ - - /* { dg-do run } */ --/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_6.c b/gcc/testsuite/gcc.target/aarch64/test_frame_6.c -index 56259c945d2..2944a8bbe16 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_6.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_6.c -@@ -6,7 +6,7 @@ - * use a single stack adjustment, no writeback. */ - - /* { dg-do run } */ --/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c -index 5702656a5da..ca371632d81 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c -@@ -6,7 +6,7 @@ - * use a single stack adjustment, no writeback. */ - - /* { dg-do run } */ --/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_8.c b/gcc/testsuite/gcc.target/aarch64/test_frame_8.c -index 75a68b41e08..084e8fac373 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_8.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_8.c -@@ -5,7 +5,7 @@ - * number of callee-saved reg == 1. */ - - /* { dg-do run } */ --/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ - - #include "test_frame_common.h" - - -base-commit: 1bd15d87031e8bf8fe9585fbc166b315303f676c --- -2.43.0 - diff --git a/gcc13-znver5.patch b/gcc13-znver5.patch new file mode 100644 index 0000000..967d073 --- /dev/null +++ b/gcc13-znver5.patch @@ -0,0 +1,2246 @@ +From 9ae3d1ceadd90c134fc4365b36ceb552decb227f Mon Sep 17 00:00:00 2001 +From: Marek Polacek +Date: Thu, 30 May 2024 16:21:45 -0400 +Subject: [PATCH] Add AMD znver5 processor enablement with scheduler model + +2024-02-14 Jan Hubicka + Karthiban Anbazhagan + +gcc/ChangeLog: + * common/config/i386/cpuinfo.h (get_amd_cpu): Recognize znver5. + * common/config/i386/i386-common.cc (processor_names): Add znver5. + (processor_alias_table): Likewise. + * common/config/i386/i386-cpuinfo.h (processor_types): Add new zen + family. + (processor_subtypes): Add znver5. + * config.gcc (x86_64-*-* |...): Likewise. + * config/i386/driver-i386.cc (host_detect_local_cpu): Let + march=native detect znver5 cpu's. + * config/i386/i386-c.cc (ix86_target_macros_internal): Add + znver5. + * config/i386/i386-options.cc (m_ZNVER5): New definition + (processor_cost_table): Add znver5. + * config/i386/i386.cc (ix86_reassociation_width): Likewise. + * config/i386/i386.h (processor_type): Add PROCESSOR_ZNVER5 + (PTA_ZNVER5): New definition. + * config/i386/i386.md (define_attr "cpu"): Add znver5. + (Scheduling descriptions) Add znver5.md. + * config/i386/x86-tune-costs.h (znver5_cost): New definition. + * config/i386/x86-tune-sched.cc (ix86_issue_rate): Add znver5. + (ix86_adjust_cost): Likewise. + * config/i386/x86-tune.def (avx512_move_by_pieces): Add m_ZNVER5. + (avx512_store_by_pieces): Add m_ZNVER5. + * doc/extend.texi: Add znver5. + * doc/invoke.texi: Likewise. + * config/i386/znver4.md: Rename to zn4zn5.md; combine znver4 and znver5 Scheduler. + +gcc/testsuite/ChangeLog: + * g++.target/i386/mv29.C: Handle znver5 arch. + * gcc.target/i386/funcspec-56.inc:Likewise. +--- + gcc/common/config/i386/cpuinfo.h | 16 + + gcc/common/config/i386/i386-common.cc | 6 +- + gcc/common/config/i386/i386-cpuinfo.h | 2 + + gcc/config.gcc | 14 +- + gcc/config/i386/driver-i386.cc | 5 + + gcc/config/i386/i386-c.cc | 7 + + gcc/config/i386/i386-options.cc | 6 +- + gcc/config/i386/i386.cc | 3 +- + gcc/config/i386/i386.h | 3 + + gcc/config/i386/i386.md | 5 +- + gcc/config/i386/x86-tune-costs.h | 136 +++ + gcc/config/i386/x86-tune-sched.cc | 2 + + gcc/config/i386/x86-tune.def | 4 +- + gcc/config/i386/{znver4.md => zn4zn5.md} | 817 ++++++++++++++++-- + gcc/doc/extend.texi | 3 + + gcc/doc/invoke.texi | 10 + + gcc/testsuite/g++.target/i386/mv29.C | 6 + + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + + 18 files changed, 986 insertions(+), 61 deletions(-) + rename gcc/config/i386/{znver4.md => zn4zn5.md} (55%) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 441fae0cdc9..a2e28e47a7d 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -310,6 +310,22 @@ get_amd_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_subtype = AMDFAM19H_ZNVER3; + } + break; ++ case 0x1a: ++ cpu_model->__cpu_type = AMDFAM1AH; ++ if (model <= 0x77) ++ { ++ cpu = "znver5"; ++ CHECK___builtin_cpu_is ("znver5"); ++ cpu_model->__cpu_subtype = AMDFAM1AH_ZNVER5; ++ } ++ else if (has_cpu_feature (cpu_model, cpu_features2, ++ FEATURE_AVX512VP2INTERSECT)) ++ { ++ cpu = "znver5"; ++ CHECK___builtin_cpu_is ("znver5"); ++ cpu_model->__cpu_subtype = AMDFAM1AH_ZNVER5; ++ } ++ break; + default: + break; + } +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index a8809889360..f3610155807 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1983,7 +1983,8 @@ const char *const processor_names[] = + "znver1", + "znver2", + "znver3", +- "znver4" ++ "znver4", ++ "znver5" + }; + + /* Guarantee that the array is aligned with enum processor_type. */ +@@ -2243,6 +2244,9 @@ const pta processor_alias_table[] = + {"znver4", PROCESSOR_ZNVER4, CPU_ZNVER4, + PTA_ZNVER4, + M_CPU_SUBTYPE (AMDFAM19H_ZNVER4), P_PROC_AVX512F}, ++ {"znver5", PROCESSOR_ZNVER5, CPU_ZNVER5, ++ PTA_ZNVER5, ++ M_CPU_SUBTYPE (AMDFAM1AH_ZNVER5), P_PROC_AVX512F}, + {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 | PTA_PRFCHW +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 254dfec70e5..77db97e36b3 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -62,6 +62,7 @@ enum processor_types + ZHAOXIN_FAM7H, + INTEL_SIERRAFOREST, + INTEL_GRANDRIDGE, ++ AMDFAM1AH, + CPU_TYPE_MAX, + BUILTIN_CPU_TYPE_MAX = CPU_TYPE_MAX + }; +@@ -99,6 +100,7 @@ enum processor_subtypes + AMDFAM19H_ZNVER4, + INTEL_COREI7_GRANITERAPIDS, + INTEL_COREI7_GRANITERAPIDS_D, ++ AMDFAM1AH_ZNVER5, + CPU_SUBTYPE_MAX + }; + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index c3b73d05eb7..ae332a88768 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -675,9 +675,9 @@ c7 esther" + # 64-bit x86 processors supported by --with-arch=. Each processor + # MUST be separated by exactly one space. + x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \ +-bdver3 bdver4 znver1 znver2 znver3 znver4 btver1 btver2 k8 k8-sse3 opteron \ +-opteron-sse3 nocona core2 corei7 corei7-avx core-avx-i core-avx2 atom \ +-slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ ++bdver3 bdver4 znver1 znver2 znver3 znver4 znver5 btver1 btver2 k8 k8-sse3 \ ++opteron opteron-sse3 nocona core2 corei7 corei7-avx core-avx-i core-avx2 \ ++atom slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ + silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ + skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \ + sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \ +@@ -3681,6 +3681,10 @@ case ${target} in + arch=znver4 + cpu=znver4 + ;; ++ znver5-*) ++ arch=znver5 ++ cpu=znver5 ++ ;; + bdver4-*) + arch=bdver4 + cpu=bdver4 +@@ -3814,6 +3818,10 @@ case ${target} in + arch=znver4 + cpu=znver4 + ;; ++ znver5-*) ++ arch=znver5 ++ cpu=znver5 ++ ;; + bdver4-*) + arch=bdver4 + cpu=bdver4 +diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc +index 6a998f5dea5..f4eee496924 100644 +--- a/gcc/config/i386/driver-i386.cc ++++ b/gcc/config/i386/driver-i386.cc +@@ -465,6 +465,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) + processor = PROCESSOR_GEODE; + else if (has_feature (FEATURE_MOVBE) && family == 22) + processor = PROCESSOR_BTVER2; ++ else if (has_feature (FEATURE_AVX512VP2INTERSECT)) ++ processor = PROCESSOR_ZNVER5; + else if (has_feature (FEATURE_AVX512F)) + processor = PROCESSOR_ZNVER4; + else if (has_feature (FEATURE_VAES)) +@@ -793,6 +795,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) + case PROCESSOR_ZNVER4: + cpu = "znver4"; + break; ++ case PROCESSOR_ZNVER5: ++ cpu = "znver5"; ++ break; + case PROCESSOR_BTVER1: + cpu = "btver1"; + break; +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 5b886e8a760..bbefb66e194 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -136,6 +136,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__znver4"); + def_or_undef (parse_in, "__znver4__"); + break; ++ case PROCESSOR_ZNVER5: ++ def_or_undef (parse_in, "__znver5"); ++ def_or_undef (parse_in, "__znver5__"); ++ break; + case PROCESSOR_BTVER1: + def_or_undef (parse_in, "__btver1"); + def_or_undef (parse_in, "__btver1__"); +@@ -353,6 +357,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + case PROCESSOR_ZNVER4: + def_or_undef (parse_in, "__tune_znver4__"); + break; ++ case PROCESSOR_ZNVER5: ++ def_or_undef (parse_in, "__tune_znver5__"); ++ break; + case PROCESSOR_BTVER1: + def_or_undef (parse_in, "__tune_btver1__"); + break; +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index c1229475138..86102851d4d 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -165,11 +165,12 @@ along with GCC; see the file COPYING3. If not see + #define m_ZNVER2 (HOST_WIDE_INT_1U<integer move cost is 2. */ ++ ++ /* reg-reg moves are done by renaming and thus they are even cheaper than ++ 1 cycle. Because reg-reg move cost is 2 and following tables correspond ++ to doubles of latencies, we do not model this correctly. It does not ++ seem to make practical difference to bump prices up even more. */ ++ 6, /* cost for loading QImode using ++ movzbl. */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer ++ registers. */ ++ 2, /* cost of reg,reg fld/fst. */ ++ {14, 14, 17}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode. */ ++ {12, 12, 16}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode. */ ++ 2, /* cost of moving MMX register. */ ++ {6, 6}, /* cost of loading MMX registers ++ in SImode and DImode. */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode. */ ++ 2, 2, 3, /* cost of moving XMM,YMM,ZMM ++ register. */ ++ {6, 6, 10, 10, 12}, /* cost of loading SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ {8, 8, 8, 12, 12}, /* cost of storing SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ 6, 8, /* SSE->integer and integer->SSE ++ moves. */ ++ 8, 8, /* mask->integer and integer->mask moves */ ++ {6, 6, 6}, /* cost of loading mask register ++ in QImode, HImode, SImode. */ ++ {8, 8, 8}, /* cost if storing mask register ++ in QImode, HImode, SImode. */ ++ 2, /* cost of moving mask register. */ ++ /* End of register allocator costs. */ ++ }, ++ ++ COSTS_N_INSNS (1), /* cost of an add instruction. */ ++ /* TODO: Lea with 3 components has cost 2. */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction. */ ++ COSTS_N_INSNS (1), /* variable shift costs. */ ++ COSTS_N_INSNS (1), /* constant shift costs. */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ ++ COSTS_N_INSNS (3), /* HI. */ ++ COSTS_N_INSNS (3), /* SI. */ ++ COSTS_N_INSNS (3), /* DI. */ ++ COSTS_N_INSNS (3)}, /* other. */ ++ 0, /* cost of multiply per each bit ++ set. */ ++ {COSTS_N_INSNS (10), /* cost of a divide/mod for QI. */ ++ COSTS_N_INSNS (11), /* HI. */ ++ COSTS_N_INSNS (13), /* SI. */ ++ COSTS_N_INSNS (16), /* DI. */ ++ COSTS_N_INSNS (16)}, /* other. */ ++ COSTS_N_INSNS (1), /* cost of movsx. */ ++ COSTS_N_INSNS (1), /* cost of movzx. */ ++ 8, /* "large" insn. */ ++ 9, /* MOVE_RATIO. */ ++ 6, /* CLEAR_RATIO */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer ++ registers. */ ++ {6, 6, 10, 10, 12}, /* cost of loading SSE registers ++ in 32bit, 64bit, 128bit, 256bit and 512bit */ ++ {8, 8, 8, 12, 12}, /* cost of storing SSE register ++ in 32bit, 64bit, 128bit, 256bit and 512bit */ ++ {6, 6, 6, 6, 6}, /* cost of unaligned loads. */ ++ {8, 8, 8, 8, 8}, /* cost of unaligned stores. */ ++ 2, 2, 2, /* cost of moving XMM,YMM,ZMM ++ register. */ ++ 6, /* cost of moving SSE register to integer. */ ++ /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops, ++ throughput 5. Approx 7 uops do not depend on vector size and every load ++ is 5 uops. */ ++ 14, 10, /* Gather load static, per_elt. */ ++ 14, 20, /* Gather store static, per_elt. */ ++ 32, /* size of l1 cache. */ ++ 1024, /* size of l2 cache. */ ++ 64, /* size of prefetch block. */ ++ /* New AMD processors never drop prefetches; if they cannot be performed ++ immediately, they are queued. We set number of simultaneous prefetches ++ to a large constant to reflect this (it probably is not a good idea not ++ to limit number of prefetches at all, as their execution also takes some ++ time). */ ++ 100, /* number of parallel prefetches. */ ++ 3, /* Branch cost. */ ++ COSTS_N_INSNS (7), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (7), /* cost of FMUL instruction. */ ++ /* Latency of fdiv is 8-15. */ ++ COSTS_N_INSNS (15), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (1), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ ++ /* Latency of fsqrt is 4-10. */ ++ COSTS_N_INSNS (25), /* cost of FSQRT instruction. */ ++ ++ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ ++ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ ++ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ ++ COSTS_N_INSNS (3), /* cost of MULSD instruction. */ ++ COSTS_N_INSNS (4), /* cost of FMA SS instruction. */ ++ COSTS_N_INSNS (4), /* cost of FMA SD instruction. */ ++ COSTS_N_INSNS (10), /* cost of DIVSS instruction. */ ++ /* 9-13. */ ++ COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ ++ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ ++ COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ ++ /* Zen can execute 4 integer operations per cycle. FP operations ++ take 3 cycles and it can execute 2 integer additions and 2 ++ multiplications thus reassociation may make sense up to with of 6. ++ SPEC2k6 bencharks suggests ++ that 4 works better than 6 probably due to register pressure. ++ ++ Integer vector operations are taken by FP unit and execute 3 vector ++ plus/minus operations per cycle but only one multiply. This is adjusted ++ in ix86_reassociation_width. */ ++ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ ++ znver2_memcpy, ++ znver2_memset, ++ COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ ++ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ ++ "16", /* Loop alignment. */ ++ "16", /* Jump alignment. */ ++ "0:0:8", /* Label alignment. */ ++ "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ ++}; ++ + /* skylake_cost should produce code tuned for Skylake familly of CPUs. */ + static stringop_algs skylake_memcpy[2] = { + {libcall, +diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc +index df4113d4eab..cbaba5f9e3c 100644 +--- a/gcc/config/i386/x86-tune-sched.cc ++++ b/gcc/config/i386/x86-tune-sched.cc +@@ -69,6 +69,7 @@ ix86_issue_rate (void) + case PROCESSOR_ZNVER2: + case PROCESSOR_ZNVER3: + case PROCESSOR_ZNVER4: ++ case PROCESSOR_ZNVER5: + case PROCESSOR_CORE2: + case PROCESSOR_NEHALEM: + case PROCESSOR_SANDYBRIDGE: +@@ -417,6 +418,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, + case PROCESSOR_ZNVER2: + case PROCESSOR_ZNVER3: + case PROCESSOR_ZNVER4: ++ case PROCESSOR_ZNVER5: + /* Stack engine allows to execute push&pop instructions in parall. */ + if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) + && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index 0fd5bb4430e..48ca9167bf4 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -572,12 +572,12 @@ DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", + /* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit + AVX instructions. */ + DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces", +- m_SAPPHIRERAPIDS | m_ZNVER4) ++ m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5) + + /* X86_TUNE_AVX512_STORE_BY_PIECES: Optimize store_by_pieces with 512-bit + AVX instructions. */ + DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces", +- m_SAPPHIRERAPIDS | m_ZNVER4) ++ m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5) + + /*****************************************************************************/ + /*****************************************************************************/ +diff --git a/gcc/config/i386/znver4.md b/gcc/config/i386/zn4zn5.md +similarity index 55% +rename from gcc/config/i386/znver4.md +rename to gcc/config/i386/zn4zn5.md +index 47802a7de2b..ba9cfbb5dfc 100644 +--- a/gcc/config/i386/znver4.md ++++ b/gcc/config/i386/zn4zn5.md +@@ -1,4 +1,4 @@ +-;; Copyright (C) 2012-2023 Free Software Foundation, Inc. ++;; Copyright (C) 2012-2024 Free Software Foundation, Inc. + ;; + ;; This file is part of GCC. + ;; +@@ -21,7 +21,7 @@ + (define_attr "znver4_decode" "direct,vector,double" + (const_string "direct")) + +-;; AMD znver4 Scheduling ++;; AMD znver4 and znver5 Scheduling + ;; Modeling automatons for zen decoders, integer execution pipes, + ;; AGU pipes, branch, floating point execution and fp store units. + (define_automaton "znver4, znver4_ieu, znver4_idiv, znver4_fdiv, znver4_agu, znver4_fpu, znver4_fp_store") +@@ -44,32 +44,44 @@ (define_reservation "znver4-direct" "znver4-decode0|znver4-decode1|znver4-decode + (define_reservation "znver4-double" "znver4-direct") + + +-;; Integer unit 4 ALU pipes. ++;; Integer unit 4 ALU pipes in znver4 6 ALU pipes in znver5. + (define_cpu_unit "znver4-ieu0" "znver4_ieu") + (define_cpu_unit "znver4-ieu1" "znver4_ieu") + (define_cpu_unit "znver4-ieu2" "znver4_ieu") + (define_cpu_unit "znver4-ieu3" "znver4_ieu") ++(define_cpu_unit "znver5-ieu4" "znver4_ieu") ++(define_cpu_unit "znver5-ieu5" "znver4_ieu") ++ + ;; Znver4 has an additional branch unit. + (define_cpu_unit "znver4-bru0" "znver4_ieu") ++ + (define_reservation "znver4-ieu" "znver4-ieu0|znver4-ieu1|znver4-ieu2|znver4-ieu3") ++(define_reservation "znver5-ieu" "znver4-ieu0|znver4-ieu1|znver4-ieu2|znver4-ieu3|znver5-ieu4|znver5-ieu5") + +-;; 3 AGU pipes in znver4 ++;; 3 AGU pipes in znver4 and 4 AGU pipes in znver5 + (define_cpu_unit "znver4-agu0" "znver4_agu") + (define_cpu_unit "znver4-agu1" "znver4_agu") + (define_cpu_unit "znver4-agu2" "znver4_agu") ++(define_cpu_unit "znver5-agu3" "znver4_agu") ++ + (define_reservation "znver4-agu-reserve" "znver4-agu0|znver4-agu1|znver4-agu2") ++(define_reservation "znver5-agu-reserve" "znver4-agu0|znver4-agu1|znver4-agu2|znver5-agu3") + + ;; Load is 4 cycles. We do not model reservation of load unit. + (define_reservation "znver4-load" "znver4-agu-reserve") + (define_reservation "znver4-store" "znver4-agu-reserve") ++(define_reservation "znver5-load" "znver5-agu-reserve") ++(define_reservation "znver5-store" "znver5-agu-reserve") + + ;; vectorpath (microcoded) instructions are single issue instructions. + ;; So, they occupy all the integer units. ++;; This is used for both Znver4 and Znver5, since reserving extra units not used otherwise ++;; is harmless. + (define_reservation "znver4-ivector" "znver4-ieu0+znver4-ieu1 +- +znver4-ieu2+znver4-ieu3+znver4-bru0 +- +znver4-agu0+znver4-agu1+znver4-agu2") ++ +znver4-ieu2+znver4-ieu3+znver5-ieu4+znver5-ieu5+znver4-bru0 ++ +znver4-agu0+znver4-agu1+znver4-agu2+znver5-agu3") + +-;; Floating point unit 4 FP pipes. ++;; Floating point unit 4 FP pipes in znver4 and znver5. + (define_cpu_unit "znver4-fpu0" "znver4_fpu") + (define_cpu_unit "znver4-fpu1" "znver4_fpu") + (define_cpu_unit "znver4-fpu2" "znver4_fpu") +@@ -77,10 +89,6 @@ (define_cpu_unit "znver4-fpu3" "znver4_fpu") + + (define_reservation "znver4-fpu" "znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") + +-(define_reservation "znver4-fvector" "znver4-fpu0+znver4-fpu1 +- +znver4-fpu2+znver4-fpu3 +- +znver4-agu0+znver4-agu1+znver4-agu2") +- + ;; DIV units + (define_cpu_unit "znver4-idiv" "znver4_idiv") + (define_cpu_unit "znver4-fdiv" "znver4_fdiv") +@@ -89,6 +97,19 @@ (define_cpu_unit "znver4-fdiv" "znver4_fdiv") + ;; throughput is limited to only one per cycle. + (define_cpu_unit "znver4-fp-store" "znver4_fp_store") + ++;; Floating point store unit 2 FP pipes in znver5. ++(define_cpu_unit "znver5-fp-store0" "znver4_fp_store") ++(define_cpu_unit "znver5-fp-store1" "znver4_fp_store") ++ ++;; This is used for both Znver4 and Znver5, since reserving extra units not used otherwise ++;; is harmless. ++(define_reservation "znver4-fvector" "znver4-fpu0+znver4-fpu1 ++ +znver4-fpu2+znver4-fpu3+znver5-fp-store0+znver5-fp-store1 ++ +znver4-agu0+znver4-agu1+znver4-agu2+znver5-agu3") ++ ++(define_reservation "znver5-fp-store256" "znver5-fp-store0|znver5-fp-store1") ++(define_reservation "znver5-fp-store-512" "znver5-fp-store0+znver5-fp-store1") ++ + + ;; Integer Instructions + ;; Move instructions +@@ -100,6 +121,13 @@ (define_insn_reservation "znver4_imov_double" 1 + (eq_attr "memory" "none")))) + "znver4-double,znver4-ieu") + ++(define_insn_reservation "znver5_imov_double" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "double") ++ (and (eq_attr "type" "imov") ++ (eq_attr "memory" "none")))) ++ "znver4-double,znver5-ieu") ++ + (define_insn_reservation "znver4_imov_double_load" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "znver1_decode" "double") +@@ -107,6 +135,13 @@ (define_insn_reservation "znver4_imov_double_load" 5 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-ieu") + ++(define_insn_reservation "znver5_imov_double_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "double") ++ (and (eq_attr "type" "imov") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver5-ieu") ++ + ;; imov, imovx + (define_insn_reservation "znver4_imov" 1 + (and (eq_attr "cpu" "znver4") +@@ -114,12 +149,24 @@ (define_insn_reservation "znver4_imov" 1 + (eq_attr "memory" "none"))) + "znver4-direct,znver4-ieu") + ++(define_insn_reservation "znver5_imov" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "imov,imovx") ++ (eq_attr "memory" "none"))) ++ "znver4-direct,znver5-ieu") ++ + (define_insn_reservation "znver4_imov_load" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "imov,imovx") + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu") + ++(define_insn_reservation "znver5_imov_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "imov,imovx") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver5-ieu") ++ + ;; Push Instruction + (define_insn_reservation "znver4_push" 1 + (and (eq_attr "cpu" "znver4") +@@ -127,12 +174,24 @@ (define_insn_reservation "znver4_push" 1 + (eq_attr "memory" "store"))) + "znver4-direct,znver4-store") + ++(define_insn_reservation "znver5_push" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "push") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver5-store") ++ + (define_insn_reservation "znver4_push_mem" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "push") + (eq_attr "memory" "both"))) + "znver4-direct,znver4-load,znver4-store") + ++(define_insn_reservation "znver5_push_mem" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "push") ++ (eq_attr "memory" "both"))) ++ "znver4-direct,znver5-load,znver5-store") ++ + ;; Pop instruction + (define_insn_reservation "znver4_pop" 4 + (and (eq_attr "cpu" "znver4") +@@ -140,16 +199,28 @@ (define_insn_reservation "znver4_pop" 4 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load") + ++(define_insn_reservation "znver5_pop" 4 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "pop") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load") ++ + (define_insn_reservation "znver4_pop_mem" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "pop") + (eq_attr "memory" "both"))) + "znver4-direct,znver4-load,znver4-store") + ++(define_insn_reservation "znver5_pop_mem" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "pop") ++ (eq_attr "memory" "both"))) ++ "znver4-direct,znver5-load,znver5-store") ++ + ;; Integer Instructions or General instructions + ;; Multiplications + (define_insn_reservation "znver4_imul" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "imul") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-ieu1") +@@ -160,30 +231,36 @@ (define_insn_reservation "znver4_imul_load" 7 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu1") + ++(define_insn_reservation "znver5_imul_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "imul") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-ieu1") ++ + ;; Divisions + (define_insn_reservation "znver4_idiv_DI" 18 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "none")))) + "znver4-double,znver4-idiv*10") + + (define_insn_reservation "znver4_idiv_SI" 12 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "none")))) + "znver4-double,znver4-idiv*6") + + (define_insn_reservation "znver4_idiv_HI" 10 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "none")))) + "znver4-double,znver4-idiv*4") + + (define_insn_reservation "znver4_idiv_QI" 9 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "none")))) +@@ -196,6 +273,13 @@ (define_insn_reservation "znver4_idiv_DI_load" 22 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-idiv*10") + ++(define_insn_reservation "znver5_idiv_DI_load" 22 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver4-idiv*10") ++ + (define_insn_reservation "znver4_idiv_SI_load" 16 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "idiv") +@@ -203,6 +287,13 @@ (define_insn_reservation "znver4_idiv_SI_load" 16 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-idiv*6") + ++(define_insn_reservation "znver5_idiv_SI_load" 16 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver4-idiv*6") ++ + (define_insn_reservation "znver4_idiv_HI_load" 14 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "idiv") +@@ -210,6 +301,13 @@ (define_insn_reservation "znver4_idiv_HI_load" 14 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-idiv*4") + ++(define_insn_reservation "znver5_idiv_HI_load" 14 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver4-idiv*4") ++ + (define_insn_reservation "znver4_idiv_QI_load" 13 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "idiv") +@@ -217,6 +315,13 @@ (define_insn_reservation "znver4_idiv_QI_load" 13 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-idiv*4") + ++(define_insn_reservation "znver5_idiv_QI_load" 13 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver4-idiv*4") ++ + ;; INTEGER/GENERAL Instructions + (define_insn_reservation "znver4_insn" 1 + (and (eq_attr "cpu" "znver4") +@@ -224,14 +329,26 @@ (define_insn_reservation "znver4_insn" 1 + (eq_attr "memory" "none,unknown"))) + "znver4-direct,znver4-ieu") + ++(define_insn_reservation "znver5_insn" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp") ++ (eq_attr "memory" "none,unknown"))) ++ "znver4-direct,znver5-ieu") ++ + (define_insn_reservation "znver4_insn_load" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp") + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu") + ++(define_insn_reservation "znver5_insn_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver5-ieu") ++ + (define_insn_reservation "znver4_insn2" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "icmov,setcc") + (eq_attr "memory" "none,unknown"))) + "znver4-direct,znver4-ieu0|znver4-ieu3") +@@ -242,8 +359,14 @@ (define_insn_reservation "znver4_insn2_load" 5 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu0|znver4-ieu3") + ++(define_insn_reservation "znver5_insn2_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "icmov,setcc") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-ieu0|znver4-ieu3") ++ + (define_insn_reservation "znver4_rotate" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "none,unknown"))) + "znver4-direct,znver4-ieu1|znver4-ieu2") +@@ -254,27 +377,51 @@ (define_insn_reservation "znver4_rotate_load" 5 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu1|znver4-ieu2") + ++(define_insn_reservation "znver5_rotate_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "rotate") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-ieu1|znver4-ieu2") ++ + (define_insn_reservation "znver4_insn_store" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp") + (eq_attr "memory" "store"))) + "znver4-direct,znver4-ieu,znver4-store") + ++(define_insn_reservation "znver5_insn_store" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver4-ieu,znver5-store") ++ + (define_insn_reservation "znver4_insn2_store" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "icmov,setcc") + (eq_attr "memory" "store"))) + "znver4-direct,znver4-ieu0|znver4-ieu3,znver4-store") + ++(define_insn_reservation "znver5_insn2_store" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "icmov,setcc") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver4-ieu0|znver4-ieu3,znver5-store") ++ + (define_insn_reservation "znver4_rotate_store" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "store"))) + "znver4-direct,znver4-ieu1|znver4-ieu2,znver4-store") + ++(define_insn_reservation "znver5_rotate_store" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "rotate") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver4-ieu1|znver4-ieu2,znver5-store") ++ + ;; alu1 instructions + (define_insn_reservation "znver4_alu1_vector" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "znver1_decode" "vector") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "none,unknown")))) +@@ -287,15 +434,27 @@ (define_insn_reservation "znver4_alu1_vector_load" 7 + (eq_attr "memory" "load")))) + "znver4-vector,znver4-load,znver4-ivector*3") + ++(define_insn_reservation "znver5_alu1_vector_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "vector") ++ (and (eq_attr "type" "alu1") ++ (eq_attr "memory" "load")))) ++ "znver4-vector,znver5-load,znver4-ivector*3") ++ + ;; Call Instruction + (define_insn_reservation "znver4_call" 1 + (and (eq_attr "cpu" "znver4") + (eq_attr "type" "call,callv")) + "znver4-double,znver4-ieu0|znver4-bru0,znver4-store") + ++(define_insn_reservation "znver5_call" 1 ++ (and (eq_attr "cpu" "znver5") ++ (eq_attr "type" "call,callv")) ++ "znver4-double,znver4-ieu0|znver4-bru0,znver5-store") ++ + ;; Branches + (define_insn_reservation "znver4_branch" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-ieu0|znver4-bru0") +@@ -306,8 +465,14 @@ (define_insn_reservation "znver4_branch_load" 5 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-ieu0|znver4-bru0") + ++(define_insn_reservation "znver5_branch_load" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-ieu0|znver4-bru0") ++ + (define_insn_reservation "znver4_branch_vector" 2 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "none,unknown"))) + "znver4-vector,znver4-ivector*2") +@@ -318,21 +483,36 @@ (define_insn_reservation "znver4_branch_vector_load" 6 + (eq_attr "memory" "load"))) + "znver4-vector,znver4-load,znver4-ivector*2") + ++(define_insn_reservation "znver5_branch_vector_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "load"))) ++ "znver4-vector,znver5-load,znver4-ivector*2") ++ + ;; LEA instruction with simple addressing + (define_insn_reservation "znver4_lea" 1 + (and (eq_attr "cpu" "znver4") + (eq_attr "type" "lea")) + "znver4-direct,znver4-ieu") + ++(define_insn_reservation "znver5_lea" 1 ++ (and (eq_attr "cpu" "znver5") ++ (eq_attr "type" "lea")) ++ "znver4-direct,znver5-ieu") + ;; Leave + (define_insn_reservation "znver4_leave" 1 + (and (eq_attr "cpu" "znver4") + (eq_attr "type" "leave")) + "znver4-double,znver4-ieu,znver4-store") + ++(define_insn_reservation "znver5_leave" 1 ++ (and (eq_attr "cpu" "znver5") ++ (eq_attr "type" "leave")) ++ "znver4-double,znver5-ieu,znver5-store") ++ + ;; STR and ISHIFT are microcoded. + (define_insn_reservation "znver4_str" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "str") + (eq_attr "memory" "none"))) + "znver4-vector,znver4-ivector*3") +@@ -343,8 +523,14 @@ (define_insn_reservation "znver4_str_load" 7 + (eq_attr "memory" "load"))) + "znver4-vector,znver4-load,znver4-ivector*3") + ++(define_insn_reservation "znver5_str_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "str") ++ (eq_attr "memory" "load"))) ++ "znver4-vector,znver5-load,znver4-ivector*3") ++ + (define_insn_reservation "znver4_ishift" 2 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ishift") + (eq_attr "memory" "none"))) + "znver4-vector,znver4-ivector*2") +@@ -355,9 +541,15 @@ (define_insn_reservation "znver4_ishift_load" 6 + (eq_attr "memory" "load"))) + "znver4-vector,znver4-load,znver4-ivector*2") + ++(define_insn_reservation "znver5_ishift_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ishift") ++ (eq_attr "memory" "load"))) ++ "znver4-vector,znver5-load,znver4-ivector*2") ++ + ;; Other vector type + (define_insn_reservation "znver4_ieu_vector" 5 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "other,multi") + (eq_attr "memory" "none,unknown"))) + "znver4-vector,znver4-ivector*5") +@@ -368,15 +560,21 @@ (define_insn_reservation "znver4_ieu_vector_load" 9 + (eq_attr "memory" "load"))) + "znver4-vector,znver4-load,znver4-ivector*5") + ++(define_insn_reservation "znver5_ieu_vector_load" 9 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "other,multi") ++ (eq_attr "memory" "load"))) ++ "znver4-vector,znver5-load,znver4-ivector*5") ++ + ;; Floating Point + ;; FP movs + (define_insn_reservation "znver4_fp_cmov" 4 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (eq_attr "type" "fcmov")) + "znver4-vector,znver4-fvector*3") + + (define_insn_reservation "znver4_fp_mov_direct" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (eq_attr "type" "fmov")) + "znver4-direct,znver4-fpu0|znver4-fpu1") + +@@ -388,6 +586,13 @@ (define_insn_reservation "znver4_fp_mov_direct_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") + ++(define_insn_reservation "znver5_fp_mov_direct_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1") ++ + ;;FST + (define_insn_reservation "znver4_fp_mov_direct_store" 6 + (and (eq_attr "cpu" "znver4") +@@ -396,6 +601,13 @@ (define_insn_reservation "znver4_fp_mov_direct_store" 6 + (eq_attr "memory" "store")))) + "znver4-direct,znver4-fpu0|znver4-fpu1,znver4-fp-store") + ++(define_insn_reservation "znver5_fp_mov_direct_store" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1,znver5-fp-store256") ++ + ;;FILD + (define_insn_reservation "znver4_fp_mov_double_load" 13 + (and (eq_attr "cpu" "znver4") +@@ -404,6 +616,13 @@ (define_insn_reservation "znver4_fp_mov_double_load" 13 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1") + ++(define_insn_reservation "znver5_fp_mov_double_load" 13 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "double") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1") ++ + ;;FIST + (define_insn_reservation "znver4_fp_mov_double_store" 7 + (and (eq_attr "cpu" "znver4") +@@ -412,9 +631,16 @@ (define_insn_reservation "znver4_fp_mov_double_store" 7 + (eq_attr "memory" "store")))) + "znver4-double,znver4-fpu1,znver4-fp-store") + ++(define_insn_reservation "znver5_fp_mov_double_store" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "znver1_decode" "double") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "store")))) ++ "znver4-double,znver4-fpu1,znver5-fp-store256") ++ + ;; FSQRT + (define_insn_reservation "znver4_fsqrt" 22 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "fpspc") + (and (eq_attr "mode" "XF") + (eq_attr "memory" "none")))) +@@ -422,20 +648,20 @@ (define_insn_reservation "znver4_fsqrt" 22 + + ;; FPSPC instructions + (define_insn_reservation "znver4_fp_spc" 6 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "fpspc") + (eq_attr "memory" "none"))) + "znver4-vector,znver4-fvector*6") + + (define_insn_reservation "znver4_fp_insn_vector" 6 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "znver1_decode" "vector") + (eq_attr "type" "mmxcvt,sselog1,ssemov"))) + "znver4-vector,znver4-fvector*6") + + ;; FADD, FSUB, FMUL + (define_insn_reservation "znver4_fp_op_mul" 7 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "fop,fmul") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu0") +@@ -446,9 +672,14 @@ (define_insn_reservation "znver4_fp_op_mul_load" 12 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fpu0") + ++(define_insn_reservation "znver5_fp_op_mul_load" 12 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "fop,fmul") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fpu0") + ;; FDIV + (define_insn_reservation "znver4_fp_div" 15 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fdiv*6") +@@ -459,6 +690,12 @@ (define_insn_reservation "znver4_fp_div_load" 20 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fdiv*6") + ++(define_insn_reservation "znver5_fp_div_load" 20 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "fdiv") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fdiv*6") ++ + (define_insn_reservation "znver4_fp_idiv_load" 24 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "fdiv") +@@ -466,15 +703,27 @@ (define_insn_reservation "znver4_fp_idiv_load" 24 + (eq_attr "memory" "load")))) + "znver4-double,znver4-load,znver4-fdiv*6") + ++(define_insn_reservation "znver5_fp_idiv_load" 24 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "fdiv") ++ (and (eq_attr "fp_int_src" "true") ++ (eq_attr "memory" "load")))) ++ "znver4-double,znver5-load,znver4-fdiv*6") ++ + ;; FABS, FCHS + (define_insn_reservation "znver4_fp_fsgn" 1 + (and (eq_attr "cpu" "znver4") + (eq_attr "type" "fsgn")) + "znver4-direct,znver4-fpu0|znver4-fpu1") + ++(define_insn_reservation "znver5_fp_fsgn" 1 ++ (and (eq_attr "cpu" "znver5") ++ (eq_attr "type" "fsgn")) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + ;; FCMP + (define_insn_reservation "znver4_fp_fcmp" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "fcmp") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu1") +@@ -486,14 +735,21 @@ (define_insn_reservation "znver4_fp_fcmp_double" 4 + (eq_attr "memory" "none")))) + "znver4-double,znver4-fpu1,znver4-fpu2") + ++(define_insn_reservation "znver5_fp_fcmp_double" 4 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "fcmp") ++ (and (eq_attr "znver1_decode" "double") ++ (eq_attr "memory" "none")))) ++ "znver4-double,znver4-fpu1,znver5-fp-store256") ++ + ;; MMX, SSE, SSEn.n instructions + (define_insn_reservation "znver4_fp_mmx " 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (eq_attr "type" "mmx")) + "znver4-direct,znver4-fpu1|znver4-fpu2") + + (define_insn_reservation "znver4_mmx_add_cmp" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "mmxadd,mmxcmp") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu") +@@ -504,32 +760,62 @@ (define_insn_reservation "znver4_mmx_add_cmp_load" 6 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fpu") + ++(define_insn_reservation "znver5_mmx_add_cmp_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxadd,mmxcmp") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fpu") ++ + (define_insn_reservation "znver4_mmx_insn" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1,mmxshft") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_mmx_insn" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1,mmxshft") ++ (eq_attr "memory" "none"))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_mmx_insn_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1,mmxshft") + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_mmx_insn_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1,mmxshft") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_mmx_mov" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "mmxmov") + (eq_attr "memory" "store"))) + "znver4-direct,znver4-fp-store") + ++(define_insn_reservation "znver5_mmx_mov" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxmov") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver5-fp-store256") ++ + (define_insn_reservation "znver4_mmx_mov_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "mmxmov") + (eq_attr "memory" "both"))) + "znver4-direct,znver4-load,znver4-fp-store") + ++(define_insn_reservation "znver5_mmx_mov_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxmov") ++ (eq_attr "memory" "both"))) ++ "znver4-direct,znver5-load,znver5-fp-store256") ++ + (define_insn_reservation "znver4_mmx_mul" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "mmxmul") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu0|znver4-fpu3") +@@ -540,9 +826,15 @@ (define_insn_reservation "znver4_mmx_mul_load" 8 + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu3") + ++(define_insn_reservation "znver5_mmx_mul_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mmxmul") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu3") ++ + ;; AVX instructions + (define_insn_reservation "znver4_sse_log" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sselog") + (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI") + (eq_attr "memory" "none")))) +@@ -555,6 +847,13 @@ (define_insn_reservation "znver4_sse_log_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu") + ++(define_insn_reservation "znver5_sse_log_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu") ++ + (define_insn_reservation "znver4_sse_log1" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sselog1") +@@ -562,6 +861,13 @@ (define_insn_reservation "znver4_sse_log1" 1 + (eq_attr "memory" "store")))) + "znver4-direct,znver4-fpu1|znver4-fpu2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_log1" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_log1_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sselog1") +@@ -569,20 +875,39 @@ (define_insn_reservation "znver4_sse_log1_load" 6 + (eq_attr "memory" "both")))) + "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_log1_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "both")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_comi" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "store"))) + "znver4-double,znver4-fpu2|znver4-fpu3,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_comi" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "store"))) ++ "znver4-double,znver4-fpu2|znver4-fpu3,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_comi_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "both"))) + "znver4-double,znver4-load,znver4-fpu2|znver4-fpu3,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_comi_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "both"))) ++ "znver4-double,znver5-load,znver4-fpu2|znver4-fpu3,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_test" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "prefix_extra" "1") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "none")))) +@@ -595,8 +920,15 @@ (define_insn_reservation "znver4_sse_test_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_sse_test_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_imul" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sseimul") + (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") + (eq_attr "memory" "none")))) +@@ -609,8 +941,15 @@ (define_insn_reservation "znver4_sse_imul_load" 8 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") + ++(define_insn_reservation "znver5_sse_imul_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseimul") ++ (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_mov" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") + (eq_attr "memory" "none")))) +@@ -623,6 +962,13 @@ (define_insn_reservation "znver4_sse_mov_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_sse_mov_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_mov_store" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemov") +@@ -630,8 +976,15 @@ (define_insn_reservation "znver4_sse_mov_store" 1 + (eq_attr "memory" "store")))) + "znver4-direct,znver4-fpu1|znver4-fpu2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_mov_store" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_mov_fp" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") + (eq_attr "memory" "none")))) +@@ -644,6 +997,13 @@ (define_insn_reservation "znver4_sse_mov_fp_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu") + ++(define_insn_reservation "znver5_sse_mov_fp_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu") ++ + (define_insn_reservation "znver4_sse_mov_fp_store" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemov") +@@ -651,8 +1011,22 @@ (define_insn_reservation "znver4_sse_mov_fp_store" 1 + (eq_attr "memory" "store")))) + "znver4-direct,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_mov_fp_store" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver5-fp-store256") ++ ++(define_insn_reservation "znver5_sse_mov_fp_store_512" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver5-fp-store-512") ++ + (define_insn_reservation "znver4_sse_add" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sseadd") + (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") + (eq_attr "memory" "none")))) +@@ -665,8 +1039,15 @@ (define_insn_reservation "znver4_sse_add_load" 8 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu2|znver4-fpu3") + ++(define_insn_reservation "znver5_sse_add_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_add1" 4 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sseadd1") + (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") + (eq_attr "memory" "none")))) +@@ -679,8 +1060,15 @@ (define_insn_reservation "znver4_sse_add1_load" 9 + (eq_attr "memory" "load")))) + "znver4-vector,znver4-load,znver4-fvector*2") + ++(define_insn_reservation "znver5_sse_add1_load" 9 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseadd1") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-vector,znver5-load,znver4-fvector*2") ++ + (define_insn_reservation "znver4_sse_iadd" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sseiadd") + (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") + (eq_attr "memory" "none")))) +@@ -693,8 +1081,15 @@ (define_insn_reservation "znver4_sse_iadd_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu") + ++(define_insn_reservation "znver5_sse_iadd_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "mode" "QI,HI,SI,DI,TI,OI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu") ++ + (define_insn_reservation "znver4_sse_mul" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssemul") + (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") + (eq_attr "memory" "none")))) +@@ -707,15 +1102,22 @@ (define_insn_reservation "znver4_sse_mul_load" 8 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") + ++(define_insn_reservation "znver5_sse_mul_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemul") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_div_pd" 13 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V4DF,V2DF,V1DF") + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fdiv*5") + + (define_insn_reservation "znver4_sse_div_ps" 10 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "V8SF,V4SF,V2SF,SF") + (eq_attr "memory" "none")))) +@@ -728,6 +1130,13 @@ (define_insn_reservation "znver4_sse_div_pd_load" 18 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fdiv*5") + ++(define_insn_reservation "znver5_sse_div_pd_load" 18 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V4DF,V2DF,V1DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fdiv*5") ++ + (define_insn_reservation "znver4_sse_div_ps_load" 15 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssediv") +@@ -735,8 +1144,15 @@ (define_insn_reservation "znver4_sse_div_ps_load" 15 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fdiv*3") + ++(define_insn_reservation "znver5_sse_div_ps_load" 15 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V8SF,V4SF,V2SF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fdiv*3") ++ + (define_insn_reservation "znver4_sse_cmp_avx" 1 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "prefix" "vex") + (eq_attr "memory" "none")))) +@@ -749,20 +1165,39 @@ (define_insn_reservation "znver4_sse_cmp_avx_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") + ++(define_insn_reservation "znver5_sse_cmp_avx_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "prefix" "vex") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_comi_avx" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "store"))) + "znver4-direct,znver4-fpu2+znver4-fpu3,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_comi_avx" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "store"))) ++ "znver4-direct,znver4-fpu2+znver4-fpu3,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_comi_avx_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "both"))) + "znver4-direct,znver4-load,znver4-fpu2+znver4-fpu3,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_comi_avx_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "both"))) ++ "znver4-direct,znver5-load,znver4-fpu2+znver4-fpu3,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_cvt" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") + (eq_attr "memory" "none")))) +@@ -775,8 +1210,15 @@ (define_insn_reservation "znver4_sse_cvt_load" 8 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu2|znver4-fpu3") + ++(define_insn_reservation "znver5_sse_cvt_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_icvt" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "none")))) +@@ -789,6 +1231,13 @@ (define_insn_reservation "znver4_sse_icvt_store" 4 + (eq_attr "memory" "store")))) + "znver4-double,znver4-fpu2|znver4-fpu3,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_icvt_store" 4 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "memory" "store")))) ++ "znver4-double,znver4-fpu2|znver4-fpu3,znver5-fp-store256") ++ + (define_insn_reservation "znver4_sse_shuf" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -796,6 +1245,13 @@ (define_insn_reservation "znver4_sse_shuf" 1 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_sse_shuf" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_shuf_load" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -803,8 +1259,15 @@ (define_insn_reservation "znver4_sse_shuf_load" 6 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu") + ++(define_insn_reservation "znver5_sse_shuf_load" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu") ++ + (define_insn_reservation "znver4_sse_ishuf" 3 +- (and (eq_attr "cpu" "znver4") ++ (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sseshuf") + (and (eq_attr "mode" "OI") + (eq_attr "memory" "none")))) +@@ -817,6 +1280,13 @@ (define_insn_reservation "znver4_sse_ishuf_load" 8 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2") + ++(define_insn_reservation "znver5_sse_ishuf_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "OI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + ;; AVX512 instructions + (define_insn_reservation "znver4_sse_log_evex" 1 + (and (eq_attr "cpu" "znver4") +@@ -825,6 +1295,13 @@ (define_insn_reservation "znver4_sse_log_evex" 1 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_log_evex" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_log_evex_load" 7 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sselog") +@@ -832,6 +1309,13 @@ (define_insn_reservation "znver4_sse_log_evex_load" 7 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_log_evex_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_log1_evex" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sselog1") +@@ -839,6 +1323,13 @@ (define_insn_reservation "znver4_sse_log1_evex" 1 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu1*2|znver4-fpu2*2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_log1_evex" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2,znver5-fp-store-512") ++ + (define_insn_reservation "znver4_sse_log1_evex_load" 7 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sselog1") +@@ -846,6 +1337,13 @@ (define_insn_reservation "znver4_sse_log1_evex_load" 7 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1*2|znver4-fpu2*2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_log1_evex_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "mode" "V16SF,V8DF,XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2,znver5-fp-store-512") ++ + (define_insn_reservation "znver4_sse_mul_evex" 3 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemul") +@@ -853,6 +1351,13 @@ (define_insn_reservation "znver4_sse_mul_evex" 3 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_mul_evex" 3 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemul") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_mul_evex_load" 9 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemul") +@@ -860,6 +1365,13 @@ (define_insn_reservation "znver4_sse_mul_evex_load" 9 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_mul_evex_load" 9 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemul") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_imul_evex" 3 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseimul") +@@ -867,6 +1379,13 @@ (define_insn_reservation "znver4_sse_imul_evex" 3 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu0*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_imul_evex" 3 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseimul") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_imul_evex_load" 9 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseimul") +@@ -874,6 +1393,13 @@ (define_insn_reservation "znver4_sse_imul_evex_load" 9 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_imul_evex_load" 9 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseimul") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_mov_evex" 4 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemov") +@@ -881,6 +1407,13 @@ (define_insn_reservation "znver4_sse_mov_evex" 4 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu1*2|znver4-fpu2*2") + ++(define_insn_reservation "znver5_sse_mov_evex" 2 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_mov_evex_load" 10 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemov") +@@ -888,6 +1421,13 @@ (define_insn_reservation "znver4_sse_mov_evex_load" 10 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1*2|znver4-fpu2*2") + ++(define_insn_reservation "znver5_sse_mov_evex_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver4-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_mov_evex_store" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemov") +@@ -895,6 +1435,13 @@ (define_insn_reservation "znver4_sse_mov_evex_store" 5 + (eq_attr "memory" "store")))) + "znver4-direct,znver4-fpu1*2|znver4-fpu2*2,znver4-fp-store") + ++(define_insn_reservation "znver5_sse_mov_evex_store" 3 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "store")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2,znver5-fp-store-512") ++ + (define_insn_reservation "znver4_sse_add_evex" 3 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseadd") +@@ -902,6 +1449,13 @@ (define_insn_reservation "znver4_sse_add_evex" 3 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_add_evex" 2 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_add_evex_load" 9 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseadd") +@@ -909,6 +1463,13 @@ (define_insn_reservation "znver4_sse_add_evex_load" 9 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_add_evex_load" 8 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver4-load,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_iadd_evex" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseiadd") +@@ -916,6 +1477,13 @@ (define_insn_reservation "znver4_sse_iadd_evex" 1 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_iadd_evex" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_iadd_evex_load" 7 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseiadd") +@@ -923,6 +1491,13 @@ (define_insn_reservation "znver4_sse_iadd_evex_load" 7 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_iadd_evex_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseiadd") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_div_pd_evex" 13 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssediv") +@@ -930,6 +1505,13 @@ (define_insn_reservation "znver4_sse_div_pd_evex" 13 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fdiv*9") + ++(define_insn_reservation "znver5_sse_div_pd_evex" 13 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V8DF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fdiv*9") ++ + (define_insn_reservation "znver4_sse_div_ps_evex" 10 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssediv") +@@ -937,6 +1519,13 @@ (define_insn_reservation "znver4_sse_div_ps_evex" 10 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fdiv*6") + ++(define_insn_reservation "znver5_sse_div_ps_evex" 10 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V16SF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fdiv*6") ++ + (define_insn_reservation "znver4_sse_div_pd_evex_load" 19 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssediv") +@@ -944,6 +1533,13 @@ (define_insn_reservation "znver4_sse_div_pd_evex_load" 19 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fdiv*9") + ++(define_insn_reservation "znver5_sse_div_pd_evex_load" 19 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V8DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fdiv*9") ++ + (define_insn_reservation "znver4_sse_div_ps_evex_load" 16 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssediv") +@@ -951,6 +1547,13 @@ (define_insn_reservation "znver4_sse_div_ps_evex_load" 16 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fdiv*6") + ++(define_insn_reservation "znver5_sse_div_ps_evex_load" 16 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssediv") ++ (and (eq_attr "mode" "V16SF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fdiv*6") ++ + (define_insn_reservation "znver4_sse_cmp_avx128" 3 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -959,6 +1562,14 @@ (define_insn_reservation "znver4_sse_cmp_avx128" 3 + (eq_attr "memory" "none"))))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx128" 3 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none"))))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cmp_avx128_load" 9 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -967,6 +1578,14 @@ (define_insn_reservation "znver4_sse_cmp_avx128_load" 9 + (eq_attr "memory" "load"))))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx128_load" 9 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load"))))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cmp_avx256" 4 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -975,6 +1594,14 @@ (define_insn_reservation "znver4_sse_cmp_avx256" 4 + (eq_attr "memory" "none"))))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx256" 4 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none"))))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cmp_avx256_load" 10 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -983,6 +1610,14 @@ (define_insn_reservation "znver4_sse_cmp_avx256_load" 10 + (eq_attr "memory" "load"))))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx256_load" 10 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load"))))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cmp_avx512" 5 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -991,6 +1626,14 @@ (define_insn_reservation "znver4_sse_cmp_avx512" 5 + (eq_attr "memory" "none"))))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx512" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "none"))))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cmp_avx512_load" 11 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecmp") +@@ -999,6 +1642,14 @@ (define_insn_reservation "znver4_sse_cmp_avx512_load" 11 + (eq_attr "memory" "load"))))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_cmp_avx512_load" 11 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecmp") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (and (eq_attr "prefix" "evex") ++ (eq_attr "memory" "load"))))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_cvt_evex" 6 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecvt") +@@ -1006,6 +1657,13 @@ (define_insn_reservation "znver4_sse_cvt_evex" 6 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu1*2|znver4-fpu2*2,znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_cvt_evex" 6 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_cvt_evex_load" 12 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssecvt") +@@ -1013,6 +1671,13 @@ (define_insn_reservation "znver4_sse_cvt_evex_load" 12 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1*2|znver4-fpu2*2,znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_cvt_evex_load" 12 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssecvt") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2,znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_shuf_evex" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -1020,6 +1685,13 @@ (define_insn_reservation "znver4_sse_shuf_evex" 1 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_shuf_evex" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_shuf_evex_load" 7 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -1027,6 +1699,13 @@ (define_insn_reservation "znver4_sse_shuf_evex_load" 7 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + ++(define_insn_reservation "znver5_sse_shuf_evex_load" 7 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "V16SF,V8DF") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") ++ + (define_insn_reservation "znver4_sse_ishuf_evex" 4 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -1034,6 +1713,13 @@ (define_insn_reservation "znver4_sse_ishuf_evex" 4 + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu1*2|znver4-fpu2*2") + ++(define_insn_reservation "znver5_sse_ishuf_evex" 5 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "none")))) ++ "znver4-direct,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_ishuf_evex_load" 10 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") +@@ -1041,18 +1727,37 @@ (define_insn_reservation "znver4_sse_ishuf_evex_load" 10 + (eq_attr "memory" "load")))) + "znver4-direct,znver4-load,znver4-fpu1*2|znver4-fpu2*2") + ++(define_insn_reservation "znver5_sse_ishuf_evex_load" 10 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (and (eq_attr "mode" "XI") ++ (eq_attr "memory" "load")))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + (define_insn_reservation "znver4_sse_muladd" 4 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "ssemuladd") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_muladd" 4 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "ssemuladd") ++ (eq_attr "memory" "none"))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_muladd_load" 10 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sseshuf") + (eq_attr "memory" "load"))) + "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_muladd_load" 10 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "sseshuf") ++ (eq_attr "memory" "load"))) ++ "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2") ++ + ;; AVX512 mask instructions + + (define_insn_reservation "znver4_sse_mskmov" 2 +@@ -1061,8 +1766,20 @@ (define_insn_reservation "znver4_sse_mskmov" 2 + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") + ++(define_insn_reservation "znver5_sse_mskmov" 2 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "mskmov") ++ (eq_attr "memory" "none"))) ++ "znver4-direct,znver4-fpu0|znver4-fpu1") ++ + (define_insn_reservation "znver4_sse_msklog" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "msklog") + (eq_attr "memory" "none"))) + "znver4-direct,znver4-fpu2*2|znver4-fpu3*2") ++ ++(define_insn_reservation "znver5_sse_msklog" 1 ++ (and (eq_attr "cpu" "znver5") ++ (and (eq_attr "type" "msklog") ++ (eq_attr "memory" "none"))) ++ "znver4-direct,znver4-fpu0|znver4-fpu3") +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index d6fcd611339..5c6ce67b7d1 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -22060,6 +22060,9 @@ AMD Family 19h Zen version 3. + + @item znver4 + AMD Family 19h Zen version 4. ++ ++@item znver5 ++AMD Family 1ah Zen version 5. + @end table + + Here is an example: +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 792ce283bb9..9bec6eb01bd 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -32660,6 +32660,16 @@ WBNOINVD, PKU, VPCLMULQDQ, VAES, AVX512F, AVX512DQ, AVX512IFMA, AVX512CD, + AVX512BW, AVX512VL, AVX512BF16, AVX512VBMI, AVX512VBMI2, AVX512VNNI, + AVX512BITALG, AVX512VPOPCNTDQ, GFNI and 64-bit instruction set extensions.) + ++@item znver5 ++AMD Family 1ah core based CPUs with x86-64 instruction set support. (This ++supersets BMI, BMI2, CLWB, F16C, FMA, FSGSBASE, AVX, AVX2, ADCX, RDSEED, ++MWAITX, SHA, CLZERO, AES, PCLMUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A, ++SSSE3, SSE4.1, SSE4.2, ABM, XSAVEC, XSAVES, CLFLUSHOPT, POPCNT, RDPID, ++WBNOINVD, PKU, VPCLMULQDQ, VAES, AVX512F, AVX512DQ, AVX512IFMA, AVX512CD, ++AVX512BW, AVX512VL, AVX512BF16, AVX512VBMI, AVX512VBMI2, AVX512VNNI, ++AVX512BITALG, AVX512VPOPCNTDQ, GFNI, AVXVNNI, MOVDIRI, MOVDIR64B, ++AVX512VP2INTERSECT, PREFETCHI and 64-bit instruction set extensions.) ++ + @item btver1 + CPUs based on AMD Family 14h cores with x86-64 instruction set support. (This + supersets MMX, SSE, SSE2, SSE3, SSSE3, SSE4A, CX16, ABM and 64-bit +diff --git a/gcc/testsuite/g++.target/i386/mv29.C b/gcc/testsuite/g++.target/i386/mv29.C +index a8dd8ac4803..ab229534edd 100644 +--- a/gcc/testsuite/g++.target/i386/mv29.C ++++ b/gcc/testsuite/g++.target/i386/mv29.C +@@ -53,6 +53,10 @@ int __attribute__ ((target("arch=znver4"))) foo () { + return 10; + } + ++int __attribute__ ((target("arch=znver5"))) foo () { ++ return 11; ++} ++ + int main () + { + int val = foo (); +@@ -77,6 +81,8 @@ int main () + assert (val == 9); + else if (__builtin_cpu_is ("znver4")) + assert (val == 10); ++ else if (__builtin_cpu_is ("znver5")) ++ assert (val == 11); + else + assert (val == 0); + +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +index f466962c36c..6bfdcdade82 100644 +--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc ++++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +@@ -221,6 +221,7 @@ extern void test_arch_znver1 (void) __attribute__((__target__("arch= + extern void test_arch_znver2 (void) __attribute__((__target__("arch=znver2"))); + extern void test_arch_znver3 (void) __attribute__((__target__("arch=znver3"))); + extern void test_arch_znver4 (void) __attribute__((__target__("arch=znver4"))); ++extern void test_arch_znver5 (void) __attribute__((__target__("arch=znver5"))); + + extern void test_tune_nocona (void) __attribute__((__target__("tune=nocona"))); + extern void test_tune_core2 (void) __attribute__((__target__("tune=core2"))); +@@ -245,6 +246,7 @@ extern void test_tune_znver1 (void) __attribute__((__target__("tune= + extern void test_tune_znver2 (void) __attribute__((__target__("tune=znver2"))); + extern void test_tune_znver3 (void) __attribute__((__target__("tune=znver3"))); + extern void test_tune_znver4 (void) __attribute__((__target__("tune=znver4"))); ++extern void test_tune_znver5 (void) __attribute__((__target__("tune=znver5"))); + + extern void test_fpmath_sse (void) __attribute__((__target__("sse2,fpmath=sse"))); + extern void test_fpmath_387 (void) __attribute__((__target__("sse2,fpmath=387"))); +-- +2.45.1 + diff --git a/sources b/sources index 9509f03..d75b15c 100644 --- a/sources +++ b/sources @@ -1,4 +1,4 @@ -SHA512 (gcc-13.2.1-20231205.tar.xz) = f132a2d35d6ea0b0d82043870f1c9f60dda119e63df6833b16d20e2c93e2978f62698d4cc72a6e2eefbaeded0683107de9ebeb93923b96c57dcd612504aa6a1b +SHA512 (gcc-13.3.1-20240522.tar.xz) = a0d7a71101af71309a5b3677cff7e0b193d06d1eac0ca642573dda24614dc8f101342d576727a1a3ea3bf0adf6b53a1a472516b1a4164391a32af735bcce9923 SHA512 (isl-0.24.tar.bz2) = aab3bddbda96b801d0f56d2869f943157aad52a6f6e6a61745edd740234c635c38231af20bc3f1a08d416a5e973a90e18249078ed8e4ae2f1d5de57658738e95 SHA512 (newlib-cygwin-9e09d6ed83cce4777a5950412647ccc603040409.tar.xz) = bef3fa04f7b1a915fc1356ebed114698b5cc835e9fa04b0becff05a9efc76c59fb376482990873d222d7acdcfee3c4f30f5a4cb7f3be1f291f1fa5f1c7a9d983 SHA512 (nvptx-tools-aa3404ad5a496cda5d79a50bedb1344fd63e8763.tar.xz) = 33a024326426375533cb5dd9b68b2508f37540be418d2506bfa19a5f5866485e9af150469064e9059b68136ad8cb080b3b12e7eb5c6b7d1288cf6bfb3f6bb5d0