import gcc-8.3.1-4.5.el8
This commit is contained in:
parent
0e369741f6
commit
745403a9bb
@ -1,3 +1,3 @@
|
||||
1fe3aa7ce95faa0f4d7f08f0dfefd86ff4b43015 SOURCES/gcc-8.2.1-20180905.tar.xz
|
||||
8ee669ee60997110e6251c72dac66bf69bbe13c7 SOURCES/gcc-8.3.1-20190507.tar.xz
|
||||
3bdb3cc01fa7690a0e20ea5cfffcbe690f7665eb SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
|
||||
ce8eb83be0ac37fb5d5388df455a980fe37b4f13 SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,3 +1,3 @@
|
||||
SOURCES/gcc-8.2.1-20180905.tar.xz
|
||||
SOURCES/gcc-8.3.1-20190507.tar.xz
|
||||
SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
|
||||
SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz
|
||||
|
10060
SOURCES/gcc8-libgomp-20190503.patch
Normal file
10060
SOURCES/gcc8-libgomp-20190503.patch
Normal file
File diff suppressed because it is too large
Load Diff
41
SOURCES/gcc8-libgomp-testsuite.patch
Normal file
41
SOURCES/gcc8-libgomp-testsuite.patch
Normal file
@ -0,0 +1,41 @@
|
||||
--- libgomp/testsuite/libgomp-test-support.exp.in.jj 2018-04-25 09:40:31.323655308 +0200
|
||||
+++ libgomp/testsuite/libgomp-test-support.exp.in 2019-04-25 20:01:50.028243827 +0200
|
||||
@@ -2,4 +2,5 @@ set cuda_driver_include "@CUDA_DRIVER_IN
|
||||
set cuda_driver_lib "@CUDA_DRIVER_LIB@"
|
||||
set hsa_runtime_lib "@HSA_RUNTIME_LIB@"
|
||||
|
||||
+set offload_plugins "@offload_plugins@"
|
||||
set offload_targets "@offload_targets@"
|
||||
--- libgomp/testsuite/lib/libgomp.exp.jj 2018-04-25 09:40:31.584655429 +0200
|
||||
+++ libgomp/testsuite/lib/libgomp.exp 2019-05-24 11:41:51.015822702 +0200
|
||||
@@ -40,7 +40,7 @@ load_file libgomp-test-support.exp
|
||||
# Populate offload_targets_s (offloading targets separated by a space), and
|
||||
# offload_targets_s_openacc (the same, but with OpenACC names; OpenACC spells
|
||||
# some of them a little differently).
|
||||
-set offload_targets_s [split $offload_targets ","]
|
||||
+set offload_targets_s [split $offload_plugins ","]
|
||||
set offload_targets_s_openacc {}
|
||||
foreach offload_target_openacc $offload_targets_s {
|
||||
# Translate to OpenACC names, or skip if not yet supported.
|
||||
@@ -137,8 +137,8 @@ proc libgomp_init { args } {
|
||||
|
||||
# Add liboffloadmic build directory in LD_LIBRARY_PATH to support
|
||||
# non-fallback testing for Intel MIC targets
|
||||
- global offload_targets
|
||||
- if { [string match "*,intelmic,*" ",$offload_targets,"] } {
|
||||
+ global offload_plugins
|
||||
+ if { [string match "*,intelmic,*" ",$offload_plugins,"] } {
|
||||
append always_ld_library_path ":${blddir}/../liboffloadmic/.libs"
|
||||
append always_ld_library_path ":${blddir}/../liboffloadmic/plugin/.libs"
|
||||
# libstdc++ is required by liboffloadmic
|
||||
@@ -362,8 +362,8 @@ proc check_effective_target_offload_devi
|
||||
# Return 1 if configured for nvptx offloading.
|
||||
|
||||
proc check_effective_target_openacc_nvidia_accel_configured { } {
|
||||
- global offload_targets
|
||||
- if { ![string match "*,nvptx,*" ",$offload_targets,"] } {
|
||||
+ global offload_plugins
|
||||
+ if { ![string match "*,nvptx,*" ",$offload_plugins,"] } {
|
||||
return 0
|
||||
}
|
||||
# PR libgomp/65099: Currently, we only support offloading in 64-bit
|
@ -1,84 +0,0 @@
|
||||
PR libgcc/60790
|
||||
x86: Do not assume ELF constructors run before IFUNC resolvers.
|
||||
* config/x86/host-config.h (libat_feat1_ecx, libat_feat1_edx):
|
||||
Remove declarations.
|
||||
(__libat_feat1, __libat_feat1_init): Declare.
|
||||
(FEAT1_REGISTER): Define.
|
||||
(load_feat1): New function.
|
||||
(IFUNC_COND_1): Adjust.
|
||||
* config/x86/init.c (libat_feat1_ecx, libat_feat1_edx)
|
||||
(init_cpuid): Remove definitions.
|
||||
(__libat_feat1): New variable.
|
||||
(__libat_feat1_init): New function.
|
||||
|
||||
--- libatomic/config/x86/host-config.h (revision 264990)
|
||||
+++ libatomic/config/x86/host-config.h (working copy)
|
||||
@@ -25,13 +25,39 @@
|
||||
#if HAVE_IFUNC
|
||||
#include <cpuid.h>
|
||||
|
||||
-extern unsigned int libat_feat1_ecx HIDDEN;
|
||||
-extern unsigned int libat_feat1_edx HIDDEN;
|
||||
+#ifdef __x86_64__
|
||||
+# define FEAT1_REGISTER ecx
|
||||
+#else
|
||||
+# define FEAT1_REGISTER edx
|
||||
+#endif
|
||||
|
||||
+/* Value of the CPUID feature register FEAT1_REGISTER for the cmpxchg
|
||||
+ bit for IFUNC_COND1 below. */
|
||||
+extern unsigned int __libat_feat1 HIDDEN;
|
||||
+
|
||||
+/* Initialize libat_feat1 and return its value. */
|
||||
+unsigned int __libat_feat1_init (void) HIDDEN;
|
||||
+
|
||||
+/* Return the value of the relevant feature register for the relevant
|
||||
+ cmpxchg bit, or 0 if there is no CPUID support. */
|
||||
+static inline unsigned int
|
||||
+__attribute__ ((const))
|
||||
+load_feat1 (void)
|
||||
+{
|
||||
+ /* See the store in __libat_feat1_init. */
|
||||
+ unsigned int feat1 = __atomic_load_n (&__libat_feat1, __ATOMIC_RELAXED);
|
||||
+ if (feat1 == 0)
|
||||
+ /* Assume that initialization has not happened yet. This may get
|
||||
+ called repeatedly if the CPU does not have any feature bits at
|
||||
+ all. */
|
||||
+ feat1 = __libat_feat1_init ();
|
||||
+ return feat1;
|
||||
+}
|
||||
+
|
||||
#ifdef __x86_64__
|
||||
-# define IFUNC_COND_1 (libat_feat1_ecx & bit_CMPXCHG16B)
|
||||
+# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG16B)
|
||||
#else
|
||||
-# define IFUNC_COND_1 (libat_feat1_edx & bit_CMPXCHG8B)
|
||||
+# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG8B)
|
||||
#endif
|
||||
|
||||
#ifdef __x86_64__
|
||||
--- libatomic/config/x86/init.c (revision 264990)
|
||||
+++ libatomic/config/x86/init.c (working copy)
|
||||
@@ -26,13 +26,17 @@
|
||||
|
||||
#if HAVE_IFUNC
|
||||
|
||||
-unsigned int libat_feat1_ecx, libat_feat1_edx;
|
||||
+unsigned int __libat_feat1;
|
||||
|
||||
-static void __attribute__((constructor))
|
||||
-init_cpuid (void)
|
||||
+unsigned int
|
||||
+__libat_feat1_init (void)
|
||||
{
|
||||
- unsigned int eax, ebx;
|
||||
- __get_cpuid (1, &eax, &ebx, &libat_feat1_ecx, &libat_feat1_edx);
|
||||
+ unsigned int eax, ebx, ecx, edx;
|
||||
+ FEAT1_REGISTER = 0;
|
||||
+ __get_cpuid (1, &eax, &ebx, &ecx, &edx);
|
||||
+ /* See the load in load_feat1. */
|
||||
+ __atomic_store_n (&__libat_feat1, FEAT1_REGISTER, __ATOMIC_RELAXED);
|
||||
+ return FEAT1_REGISTER;
|
||||
}
|
||||
|
||||
#endif /* HAVE_IFUNC */
|
94
SOURCES/gcc8-pr85400.patch
Normal file
94
SOURCES/gcc8-pr85400.patch
Normal file
@ -0,0 +1,94 @@
|
||||
2018-05-10 Eric Botcazou <ebotcazou@adacore.com>
|
||||
|
||||
PR c++/85400
|
||||
* c-attribs.c (handle_visibility_attribute): Do not set no_add_attrs.
|
||||
|
||||
* decl2.c (adjust_var_decl_tls_model): New static function.
|
||||
(comdat_linkage): Call it on a variable.
|
||||
(maybe_make_one_only): Likewise.
|
||||
|
||||
--- gcc/c-family/c-attribs.c
|
||||
+++ gcc/c-family/c-attribs.c
|
||||
@@ -2299,14 +2299,13 @@ handle_visibility_attribute (tree *node, tree name, tree args,
|
||||
|
||||
static tree
|
||||
handle_tls_model_attribute (tree *node, tree name, tree args,
|
||||
- int ARG_UNUSED (flags), bool *no_add_attrs)
|
||||
+ int ARG_UNUSED (flags),
|
||||
+ bool *ARG_UNUSED (no_add_attrs))
|
||||
{
|
||||
tree id;
|
||||
tree decl = *node;
|
||||
enum tls_model kind;
|
||||
|
||||
- *no_add_attrs = true;
|
||||
-
|
||||
if (!VAR_P (decl) || !DECL_THREAD_LOCAL_P (decl))
|
||||
{
|
||||
warning (OPT_Wattributes, "%qE attribute ignored", name);
|
||||
--- gcc/cp/decl2.c
|
||||
+++ gcc/cp/decl2.c
|
||||
@@ -1838,6 +1838,17 @@ mark_vtable_entries (tree decl)
|
||||
}
|
||||
}
|
||||
|
||||
+/* Adjust the TLS model on variable DECL if need be, typically after
|
||||
+ the linkage of DECL has been modified. */
|
||||
+
|
||||
+static void
|
||||
+adjust_var_decl_tls_model (tree decl)
|
||||
+{
|
||||
+ if (CP_DECL_THREAD_LOCAL_P (decl)
|
||||
+ && !lookup_attribute ("tls_model", DECL_ATTRIBUTES (decl)))
|
||||
+ set_decl_tls_model (decl, decl_default_tls_model (decl));
|
||||
+}
|
||||
+
|
||||
/* Set DECL up to have the closest approximation of "initialized common"
|
||||
linkage available. */
|
||||
|
||||
@@ -1888,6 +1899,9 @@ comdat_linkage (tree decl)
|
||||
|
||||
if (TREE_PUBLIC (decl))
|
||||
DECL_COMDAT (decl) = 1;
|
||||
+
|
||||
+ if (VAR_P (decl))
|
||||
+ adjust_var_decl_tls_model (decl);
|
||||
}
|
||||
|
||||
/* For win32 we also want to put explicit instantiations in
|
||||
@@ -1926,6 +1940,8 @@ maybe_make_one_only (tree decl)
|
||||
/* Mark it needed so we don't forget to emit it. */
|
||||
node->forced_by_abi = true;
|
||||
TREE_USED (decl) = 1;
|
||||
+
|
||||
+ adjust_var_decl_tls_model (decl);
|
||||
}
|
||||
}
|
||||
}
|
||||
--- /dev/null
|
||||
+++ gcc/testsuite/g++.dg/tls/pr85400.C
|
||||
@@ -0,0 +1,24 @@
|
||||
+// PR c++/85400
|
||||
+// Testcase by Brian Vandenberg <phantall@gmail.com>
|
||||
+
|
||||
+// { dg-do link { target c++11 } }
|
||||
+// { dg-require-effective-target fpic }
|
||||
+// { dg-require-effective-target shared }
|
||||
+// { dg-require-effective-target tls }
|
||||
+// { dg-options "-shared -fPIC -O" }
|
||||
+// { dg-add-options tls }
|
||||
+
|
||||
+struct Test
|
||||
+{
|
||||
+ int blah (int y)
|
||||
+ {
|
||||
+ thread_local int mything = 3;
|
||||
+ mything = y > 0 ? y : mything;
|
||||
+ return mything;
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+int stuff (Test& test, int y)
|
||||
+{
|
||||
+ return test.blah(y);
|
||||
+}
|
39
SOURCES/gcc8-pr86098.patch
Normal file
39
SOURCES/gcc8-pr86098.patch
Normal file
@ -0,0 +1,39 @@
|
||||
2018-06-12 Jason Merrill <jason@redhat.com>
|
||||
|
||||
PR c++/86098 - ICE with template placeholder for TTP.
|
||||
* typeck.c (structural_comptypes) [TEMPLATE_TYPE_PARM]: Check
|
||||
CLASS_PLACEHOLDER_TEMPLATE.
|
||||
|
||||
--- gcc/cp/typeck.c
|
||||
+++ gcc/cp/typeck.c
|
||||
@@ -1375,6 +1375,11 @@ structural_comptypes (tree t1, tree t2, int strict)
|
||||
template parameters set, they can't be equal. */
|
||||
if (!comp_template_parms_position (t1, t2))
|
||||
return false;
|
||||
+ /* If T1 and T2 don't represent the same class template deduction,
|
||||
+ they aren't equal. */
|
||||
+ if (CLASS_PLACEHOLDER_TEMPLATE (t1)
|
||||
+ != CLASS_PLACEHOLDER_TEMPLATE (t2))
|
||||
+ return false;
|
||||
/* Constrained 'auto's are distinct from parms that don't have the same
|
||||
constraints. */
|
||||
if (!equivalent_placeholder_constraints (t1, t2))
|
||||
--- /dev/null
|
||||
+++ gcc/testsuite/g++.dg/cpp1z/class-deduction58.C
|
||||
@@ -0,0 +1,16 @@
|
||||
+// PR c++/86098
|
||||
+// { dg-additional-options -std=c++17 }
|
||||
+
|
||||
+template <class _Res> class future;
|
||||
+template <class T> T&& declval();
|
||||
+
|
||||
+template<template <class...> class T>
|
||||
+struct construct_deduced {
|
||||
+ template <class... AN>
|
||||
+ using deduced_t = decltype(T{declval<AN>()...});
|
||||
+ template<class... AN>
|
||||
+ deduced_t<AN...> operator()(AN&&... an) const;
|
||||
+};
|
||||
+
|
||||
+template<class T>
|
||||
+future<T> future_from(T singleSender);
|
30
SOURCES/gcc8-pr86747.patch
Normal file
30
SOURCES/gcc8-pr86747.patch
Normal file
@ -0,0 +1,30 @@
|
||||
2018-12-06 Alexandre Oliva <aoliva@redhat.com>
|
||||
|
||||
PR c++/86747
|
||||
* pt.c (tsubst_friend_class): Enter tsubsted class context.
|
||||
|
||||
--- gcc/cp/pt.c
|
||||
+++ gcc/cp/pt.c
|
||||
@@ -10558,7 +10558,10 @@ tsubst_friend_class (tree friend_tmpl, tree args)
|
||||
if (TREE_CODE (context) == NAMESPACE_DECL)
|
||||
push_nested_namespace (context);
|
||||
else
|
||||
- push_nested_class (context);
|
||||
+ {
|
||||
+ context = tsubst (context, args, tf_error, NULL_TREE);
|
||||
+ push_nested_class (context);
|
||||
+ }
|
||||
|
||||
tmpl = lookup_name_real (DECL_NAME (friend_tmpl), /*prefer_type=*/false,
|
||||
/*non_class=*/false, /*block_p=*/false,
|
||||
--- /dev/null
|
||||
+++ gcc/testsuite/g++.dg/pr86747.C
|
||||
@@ -0,0 +1,8 @@
|
||||
+// { dg-do compile }
|
||||
+
|
||||
+template <typename T> class A {
|
||||
+ template <void (A::*p)()> class C; // #1
|
||||
+ template <void (A::*q)()> friend class C; // #2
|
||||
+};
|
||||
+
|
||||
+A<double> a;
|
40
SOURCES/gcc8-pr90139.patch
Normal file
40
SOURCES/gcc8-pr90139.patch
Normal file
@ -0,0 +1,40 @@
|
||||
2019-04-19 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR middle-end/90139
|
||||
* tree-outof-ssa.c (get_temp_reg): If reg_mode is BLKmode, return
|
||||
assign_temp instead of gen_reg_rtx.
|
||||
|
||||
--- /dev/null
|
||||
+++ gcc/testsuite/gcc.c-torture/compile/pr90139.c
|
||||
@@ -0,0 +1,20 @@
|
||||
+/* PR middle-end/90139 */
|
||||
+
|
||||
+typedef float __attribute__((vector_size (sizeof (float)))) V;
|
||||
+void bar (int, V *);
|
||||
+int l;
|
||||
+
|
||||
+void
|
||||
+foo (void)
|
||||
+{
|
||||
+ V n, b, o;
|
||||
+ while (1)
|
||||
+ switch (l)
|
||||
+ {
|
||||
+ case 0:
|
||||
+ o = n;
|
||||
+ n = b;
|
||||
+ b = o;
|
||||
+ bar (1, &o);
|
||||
+ }
|
||||
+}
|
||||
--- gcc/tree-outof-ssa.c
|
||||
+++ gcc/tree-outof-ssa.c
|
||||
@@ -653,6 +653,8 @@ get_temp_reg (tree name)
|
||||
tree type = TREE_TYPE (name);
|
||||
int unsignedp;
|
||||
machine_mode reg_mode = promote_ssa_mode (name, &unsignedp);
|
||||
+ if (reg_mode == BLKmode)
|
||||
+ return assign_temp (type, 0, 0);
|
||||
rtx x = gen_reg_rtx (reg_mode);
|
||||
if (POINTER_TYPE_P (type))
|
||||
mark_reg_pointer (x, TYPE_ALIGN (TREE_TYPE (type)));
|
55
SOURCES/gcc8-pr90756.patch
Normal file
55
SOURCES/gcc8-pr90756.patch
Normal file
@ -0,0 +1,55 @@
|
||||
2019-07-04 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR rtl-optimization/90756
|
||||
* explow.c (promote_ssa_mode): Always use TYPE_MODE, don't bypass it
|
||||
for VECTOR_TYPE_P.
|
||||
|
||||
--- gcc/explow.c
|
||||
+++ gcc/explow.c
|
||||
@@ -892,16 +892,7 @@ promote_ssa_mode (const_tree name, int *punsignedp)
|
||||
|
||||
tree type = TREE_TYPE (name);
|
||||
int unsignedp = TYPE_UNSIGNED (type);
|
||||
- machine_mode mode = TYPE_MODE (type);
|
||||
-
|
||||
- /* Bypass TYPE_MODE when it maps vector modes to BLKmode. */
|
||||
- if (mode == BLKmode)
|
||||
- {
|
||||
- gcc_assert (VECTOR_TYPE_P (type));
|
||||
- mode = type->type_common.mode;
|
||||
- }
|
||||
-
|
||||
- machine_mode pmode = promote_mode (type, mode, &unsignedp);
|
||||
+ machine_mode pmode = promote_mode (type, TYPE_MODE (type), &unsignedp);
|
||||
if (punsignedp)
|
||||
*punsignedp = unsignedp;
|
||||
|
||||
--- /dev/null
|
||||
+++ gcc/testsuite/gcc.dg/pr90756.c
|
||||
@@ -0,0 +1,26 @@
|
||||
+/* PR rtl-optimization/90756 */
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -Wno-psabi" } */
|
||||
+/* { dg-additional-options "-mno-sse" { target ia32 } } */
|
||||
+
|
||||
+typedef float B __attribute__((vector_size(4 * sizeof (float))));
|
||||
+typedef unsigned long long C __attribute__((vector_size(4 * sizeof (long long))));
|
||||
+typedef short D __attribute__((vector_size(4 * sizeof (short))));
|
||||
+B z;
|
||||
+void foo (C);
|
||||
+C bar (D);
|
||||
+B baz ();
|
||||
+D qux (B);
|
||||
+
|
||||
+void
|
||||
+quux (int x)
|
||||
+{
|
||||
+ B n = z, b = z;
|
||||
+ while (1)
|
||||
+ switch (x)
|
||||
+ {
|
||||
+ case 0: n = baz (); /* FALLTHRU */
|
||||
+ case 1: { B o = n; n = b; b = o; } /* FALLTHRU */
|
||||
+ case 2: { D u = qux (b); C v = bar (u); foo (v); }
|
||||
+ }
|
||||
+}
|
@ -1,85 +0,0 @@
|
||||
2018-08-03 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* doc/gcov.texi (-x): Remove duplicate "to".
|
||||
* doc/invoke.texi (-Wnoexcept-type): Remove duplicate "calls".
|
||||
(-Wif-not-aligned): Remove duplicate "is".
|
||||
(-flto): Remove duplicate "the".
|
||||
(MicroBlaze Options): In examples of "-mcpu=cpu-type", remove
|
||||
duplicate "v5.00.b".
|
||||
(MSP430 Options): Remove duplicate "and" from the description
|
||||
of "-mgprel-sec=regexp".
|
||||
(x86 Options): Remove duplicate copies of "vmldLog102" and
|
||||
vmlsLog104 from description of "-mveclibabi=type".
|
||||
|
||||
--- gcc/doc/gcov.texi
|
||||
+++ gcc/doc/gcov.texi
|
||||
@@ -340,7 +340,7 @@ Print verbose informations related to basic blocks and arcs.
|
||||
|
||||
@item -x
|
||||
@itemx --hash-filenames
|
||||
-By default, gcov uses the full pathname of the source files to to create
|
||||
+By default, gcov uses the full pathname of the source files to create
|
||||
an output filename. This can lead to long filenames that can overflow
|
||||
filesystem limits. This option creates names of the form
|
||||
@file{@var{source-file}##@var{md5}.gcov},
|
||||
--- gcc/doc/invoke.texi
|
||||
+++ gcc/doc/invoke.texi
|
||||
@@ -3056,7 +3056,7 @@ void h() @{ f(g); @}
|
||||
@end smallexample
|
||||
|
||||
@noindent
|
||||
-In C++14, @code{f} calls calls @code{f<void(*)()>}, but in
|
||||
+In C++14, @code{f} calls @code{f<void(*)()>}, but in
|
||||
C++17 it calls @code{f<void(*)()noexcept>}.
|
||||
|
||||
@item -Wclass-memaccess @r{(C++ and Objective-C++ only)}
|
||||
@@ -4587,7 +4587,7 @@ The @option{-Wimplicit-fallthrough=3} warning is enabled by @option{-Wextra}.
|
||||
@opindex Wif-not-aligned
|
||||
@opindex Wno-if-not-aligned
|
||||
Control if warning triggered by the @code{warn_if_not_aligned} attribute
|
||||
-should be issued. This is is enabled by default.
|
||||
+should be issued. This is enabled by default.
|
||||
Use @option{-Wno-if-not-aligned} to disable it.
|
||||
|
||||
@item -Wignored-qualifiers @r{(C and C++ only)}
|
||||
@@ -9613,7 +9613,7 @@ for LTO, use @command{gcc-ar} and @command{gcc-ranlib} instead of @command{ar}
|
||||
and @command{ranlib};
|
||||
to show the symbols of object files with GIMPLE bytecode, use
|
||||
@command{gcc-nm}. Those commands require that @command{ar}, @command{ranlib}
|
||||
-and @command{nm} have been compiled with plugin support. At link time, use the the
|
||||
+and @command{nm} have been compiled with plugin support. At link time, use the
|
||||
flag @option{-fuse-linker-plugin} to ensure that the library participates in
|
||||
the LTO optimization process:
|
||||
|
||||
@@ -20159,7 +20159,7 @@ Use features of, and schedule code for, the given CPU.
|
||||
Supported values are in the format @samp{v@var{X}.@var{YY}.@var{Z}},
|
||||
where @var{X} is a major version, @var{YY} is the minor version, and
|
||||
@var{Z} is compatibility code. Example values are @samp{v3.00.a},
|
||||
-@samp{v4.00.b}, @samp{v5.00.a}, @samp{v5.00.b}, @samp{v5.00.b}, @samp{v6.00.a}.
|
||||
+@samp{v4.00.b}, @samp{v5.00.a}, @samp{v5.00.b}, @samp{v6.00.a}.
|
||||
|
||||
@item -mxl-soft-mul
|
||||
@opindex mxl-soft-mul
|
||||
@@ -21839,7 +21839,7 @@ GP-relative addressing. It is most useful in conjunction with
|
||||
The @var{regexp} is a POSIX Extended Regular Expression.
|
||||
|
||||
This option does not affect the behavior of the @option{-G} option, and
|
||||
-and the specified sections are in addition to the standard @code{.sdata}
|
||||
+the specified sections are in addition to the standard @code{.sdata}
|
||||
and @code{.sbss} small-data sections that are recognized by @option{-mgpopt}.
|
||||
|
||||
@item -mr0rel-sec=@var{regexp}
|
||||
@@ -27613,11 +27613,11 @@ To use this option, both @option{-ftree-vectorize} and
|
||||
ABI-compatible library must be specified at link time.
|
||||
|
||||
GCC currently emits calls to @code{vmldExp2},
|
||||
-@code{vmldLn2}, @code{vmldLog102}, @code{vmldLog102}, @code{vmldPow2},
|
||||
+@code{vmldLn2}, @code{vmldLog102}, @code{vmldPow2},
|
||||
@code{vmldTanh2}, @code{vmldTan2}, @code{vmldAtan2}, @code{vmldAtanh2},
|
||||
@code{vmldCbrt2}, @code{vmldSinh2}, @code{vmldSin2}, @code{vmldAsinh2},
|
||||
@code{vmldAsin2}, @code{vmldCosh2}, @code{vmldCos2}, @code{vmldAcosh2},
|
||||
-@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4}, @code{vmlsLog104},
|
||||
+@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4},
|
||||
@code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, @code{vmlsTan4},
|
||||
@code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, @code{vmlsSinh4},
|
||||
@code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, @code{vmlsCosh4},
|
@ -1,124 +0,0 @@
|
||||
commit e7c4d49ab27338e6bc8b0272c4036da58482bde0
|
||||
Author: krebbel <krebbel@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Mon Nov 26 15:15:57 2018 +0000
|
||||
|
||||
S/390: Fix flogr RTX.
|
||||
|
||||
The flogr instruction uses a 64 bit register pair target operand. In
|
||||
the RTX we model this as a write to a TImode register. Unfortunately
|
||||
the RTX's being assigned to the two parts of the target operand were
|
||||
swapped. This is no problem if in the end the flogr instruction will
|
||||
be emitted since the instruction still does what the clzdi expander
|
||||
expects. However, a problem arises when the RTX is used to optimize
|
||||
CLZ for a constant input operand. Even then it matters only if the
|
||||
expression couldn't be folded on tree level already.
|
||||
|
||||
In the testcase this happened thanks to loop unrolling on RTL level.
|
||||
The iteration variable is used as an argument to the clz
|
||||
builtin. Due to the loop unrolling it becomes a constant and after
|
||||
folding the broken RTX leads to a wrong assumption.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
2018-11-26 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
Backport from mainline
|
||||
2018-11-20 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
* config/s390/s390.md ("clztidi2"): Swap the RTX's written to the
|
||||
DImode parts of the target operand.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
2018-11-26 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
Backport from mainline
|
||||
2018-11-20 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
* gcc.target/s390/flogr-1.c: New test.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-8-branch@266465 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
|
||||
index c4d391bc9b5..53bb1985285 100644
|
||||
--- a/gcc/config/s390/s390.md
|
||||
+++ b/gcc/config/s390/s390.md
|
||||
@@ -8861,17 +8861,17 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
+; CLZ result is in hard reg op0 - this is the high part of the target operand
|
||||
+; The source with the left-most one bit cleared is in hard reg op0 + 1 - the low part
|
||||
(define_insn "clztidi2"
|
||||
[(set (match_operand:TI 0 "register_operand" "=d")
|
||||
(ior:TI
|
||||
- (ashift:TI
|
||||
- (zero_extend:TI
|
||||
- (xor:DI (match_operand:DI 1 "register_operand" "d")
|
||||
- (lshiftrt (match_operand:DI 2 "const_int_operand" "")
|
||||
- (subreg:SI (clz:DI (match_dup 1)) 4))))
|
||||
-
|
||||
- (const_int 64))
|
||||
- (zero_extend:TI (clz:DI (match_dup 1)))))
|
||||
+ (ashift:TI (zero_extend:TI (clz:DI (match_operand:DI 1 "register_operand" "d")))
|
||||
+ (const_int 64))
|
||||
+ (zero_extend:TI
|
||||
+ (xor:DI (match_dup 1)
|
||||
+ (lshiftrt (match_operand:DI 2 "const_int_operand" "")
|
||||
+ (subreg:SI (clz:DI (match_dup 1)) 4))))))
|
||||
(clobber (reg:CC CC_REGNUM))]
|
||||
"UINTVAL (operands[2]) == HOST_WIDE_INT_1U << 63
|
||||
&& TARGET_EXTIMM && TARGET_ZARCH"
|
||||
diff --git a/gcc/testsuite/gcc.target/s390/flogr-1.c b/gcc/testsuite/gcc.target/s390/flogr-1.c
|
||||
new file mode 100644
|
||||
index 00000000000..a3869000d62
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/s390/flogr-1.c
|
||||
@@ -0,0 +1,47 @@
|
||||
+/* { dg-do run } */
|
||||
+/* { dg-options "-O2 -funroll-loops -march=z9-109" } */
|
||||
+/* { dg-require-effective-target stdint_types } */
|
||||
+
|
||||
+/* Folding of the FLOGR caused a wrong value to be returned by
|
||||
+ __builtin_clz becuase of a problem in the RTX we emit for FLOGR.
|
||||
+ The problematic folding can only be triggered with constants inputs
|
||||
+ introduced on RTL level. In this case it happens with loop
|
||||
+ unrolling. */
|
||||
+
|
||||
+#include <stdint.h>
|
||||
+#include <assert.h>
|
||||
+
|
||||
+static inline uint32_t pow2_ceil_u32(uint32_t x) {
|
||||
+ if (x <= 1) {
|
||||
+ return x;
|
||||
+ }
|
||||
+ int msb_on_index;
|
||||
+ msb_on_index = (31 ^ __builtin_clz(x - 1));
|
||||
+ assert(msb_on_index < 31);
|
||||
+ return 1U << (msb_on_index + 1);
|
||||
+}
|
||||
+
|
||||
+void __attribute__((noinline,noclone))
|
||||
+die (int a)
|
||||
+{
|
||||
+ if (a)
|
||||
+ __builtin_abort ();
|
||||
+}
|
||||
+
|
||||
+void test_pow2_ceil_u32(void) {
|
||||
+ unsigned i;
|
||||
+
|
||||
+ for (i = 0; i < 18; i++) {
|
||||
+ uint32_t a_ = (pow2_ceil_u32(((uint32_t)1) << i));
|
||||
+ if (!(a_ == (((uint32_t)1) << i))) {
|
||||
+ die(1);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main(void) {
|
||||
+ test_pow2_ceil_u32();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
@ -1,572 +0,0 @@
|
||||
commit 87c504d3b293ebe6d36f3b50696cd307b02b0daa
|
||||
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Tue Jun 19 21:23:39 2018 +0000
|
||||
|
||||
2018-06-19 Aaron Sawdey <acsawdey@linux.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000-string.c (select_block_compare_mode): Check
|
||||
TARGET_EFFICIENT_OVERLAPPING_UNALIGNED here instead of in caller.
|
||||
(do_and3, do_and3_mask, do_compb3, do_rotl3): New functions.
|
||||
(expand_block_compare): Change select_block_compare_mode call.
|
||||
(expand_strncmp_align_check): Use new functions, fix comment.
|
||||
(emit_final_str_compare_gpr): New function.
|
||||
(expand_strn_compare): Refactor and clean up code.
|
||||
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Remove *.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@261769 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
||||
index 632d3359711..f9dd54eb639 100644
|
||||
--- a/gcc/config/rs6000/rs6000-string.c
|
||||
+++ b/gcc/config/rs6000/rs6000-string.c
|
||||
@@ -266,6 +266,7 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
||||
else if (bytes == GET_MODE_SIZE (QImode))
|
||||
return QImode;
|
||||
else if (bytes < GET_MODE_SIZE (SImode)
|
||||
+ && TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
|
||||
&& offset >= GET_MODE_SIZE (SImode) - bytes)
|
||||
/* This matches the case were we have SImode and 3 bytes
|
||||
and offset >= 1 and permits us to move back one and overlap
|
||||
@@ -273,6 +274,7 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
||||
unwanted bytes off of the input. */
|
||||
return SImode;
|
||||
else if (word_mode_ok && bytes < UNITS_PER_WORD
|
||||
+ && TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
|
||||
&& offset >= UNITS_PER_WORD-bytes)
|
||||
/* Similarly, if we can use DImode it will get matched here and
|
||||
can do an overlapping read that ends at the end of the block. */
|
||||
@@ -408,6 +410,54 @@ do_add3 (rtx dest, rtx src1, rtx src2)
|
||||
emit_insn (gen_addsi3 (dest, src1, src2));
|
||||
}
|
||||
|
||||
+/* Emit an and of the proper mode for DEST.
|
||||
+
|
||||
+ DEST is the destination register for the and.
|
||||
+ SRC1 is the first and input.
|
||||
+ SRC2 is the second and input.
|
||||
+
|
||||
+ Computes DEST = SRC1&SRC2. */
|
||||
+static void
|
||||
+do_and3 (rtx dest, rtx src1, rtx src2)
|
||||
+{
|
||||
+ if (GET_MODE (dest) == DImode)
|
||||
+ emit_insn (gen_anddi3 (dest, src1, src2));
|
||||
+ else
|
||||
+ emit_insn (gen_andsi3 (dest, src1, src2));
|
||||
+}
|
||||
+
|
||||
+/* Emit an cmpb of the proper mode for DEST.
|
||||
+
|
||||
+ DEST is the destination register for the cmpb.
|
||||
+ SRC1 is the first input.
|
||||
+ SRC2 is the second input.
|
||||
+
|
||||
+ Computes cmpb of SRC1, SRC2. */
|
||||
+static void
|
||||
+do_cmpb3 (rtx dest, rtx src1, rtx src2)
|
||||
+{
|
||||
+ if (GET_MODE (dest) == DImode)
|
||||
+ emit_insn (gen_cmpbdi3 (dest, src1, src2));
|
||||
+ else
|
||||
+ emit_insn (gen_cmpbsi3 (dest, src1, src2));
|
||||
+}
|
||||
+
|
||||
+/* Emit a rotl of the proper mode for DEST.
|
||||
+
|
||||
+ DEST is the destination register for the and.
|
||||
+ SRC1 is the first and input.
|
||||
+ SRC2 is the second and input.
|
||||
+
|
||||
+ Computes DEST = SRC1 rotated left by SRC2. */
|
||||
+static void
|
||||
+do_rotl3 (rtx dest, rtx src1, rtx src2)
|
||||
+{
|
||||
+ if (GET_MODE (dest) == DImode)
|
||||
+ emit_insn (gen_rotldi3 (dest, src1, src2));
|
||||
+ else
|
||||
+ emit_insn (gen_rotlsi3 (dest, src1, src2));
|
||||
+}
|
||||
+
|
||||
/* Generate rtl for a load, shift, and compare of less than a full word.
|
||||
|
||||
LOAD_MODE is the machine mode for the loads.
|
||||
@@ -1395,11 +1445,8 @@ expand_block_compare (rtx operands[])
|
||||
while (bytes > 0)
|
||||
{
|
||||
unsigned int align = compute_current_alignment (base_align, offset);
|
||||
- if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||
- load_mode = select_block_compare_mode (offset, bytes, align,
|
||||
- word_mode_ok);
|
||||
- else
|
||||
- load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
|
||||
+ load_mode = select_block_compare_mode (offset, bytes,
|
||||
+ align, word_mode_ok);
|
||||
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
if (bytes >= load_mode_size)
|
||||
cmp_bytes = load_mode_size;
|
||||
@@ -1627,22 +1674,19 @@ expand_block_compare (rtx operands[])
|
||||
return true;
|
||||
}
|
||||
|
||||
-/* Generate alignment check and branch code to set up for
|
||||
+/* Generate page crossing check and branch code to set up for
|
||||
strncmp when we don't have DI alignment.
|
||||
STRNCMP_LABEL is the label to branch if there is a page crossing.
|
||||
- SRC is the string pointer to be examined.
|
||||
+ SRC_ADDR is the string address to be examined.
|
||||
BYTES is the max number of bytes to compare. */
|
||||
static void
|
||||
-expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
|
||||
+expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes)
|
||||
{
|
||||
rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
|
||||
- rtx src_check = copy_addr_to_reg (XEXP (src, 0));
|
||||
- if (GET_MODE (src_check) == SImode)
|
||||
- emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
|
||||
- else
|
||||
- emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
|
||||
+ rtx src_pgoff = gen_reg_rtx (GET_MODE (src_addr));
|
||||
+ do_and3 (src_pgoff, src_addr, GEN_INT (0xfff));
|
||||
rtx cond = gen_reg_rtx (CCmode);
|
||||
- emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
|
||||
+ emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_pgoff,
|
||||
GEN_INT (4096 - bytes)));
|
||||
|
||||
rtx cmp_rtx = gen_rtx_GE (VOIDmode, cond, const0_rtx);
|
||||
@@ -1654,6 +1698,76 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
|
||||
LABEL_NUSES (strncmp_label) += 1;
|
||||
}
|
||||
|
||||
+/* Generate the final sequence that identifies the differing
|
||||
+ byte and generates the final result, taking into account
|
||||
+ zero bytes:
|
||||
+
|
||||
+ cmpb cmpb_result1, src1, src2
|
||||
+ cmpb cmpb_result2, src1, zero
|
||||
+ orc cmpb_result1, cmp_result1, cmpb_result2
|
||||
+ cntlzd get bit of first zero/diff byte
|
||||
+ addi convert for rldcl use
|
||||
+ rldcl rldcl extract diff/zero byte
|
||||
+ subf subtract for final result
|
||||
+
|
||||
+ STR1 is the reg rtx for data from string 1.
|
||||
+ STR2 is the reg rtx for data from string 2.
|
||||
+ RESULT is the reg rtx for the comparison result. */
|
||||
+
|
||||
+static void
|
||||
+emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
+{
|
||||
+ machine_mode m = GET_MODE (str1);
|
||||
+ rtx cmpb_diff = gen_reg_rtx (m);
|
||||
+ rtx cmpb_zero = gen_reg_rtx (m);
|
||||
+ rtx rot_amt = gen_reg_rtx (m);
|
||||
+ rtx zero_reg = gen_reg_rtx (m);
|
||||
+
|
||||
+ rtx rot1_1 = gen_reg_rtx (m);
|
||||
+ rtx rot1_2 = gen_reg_rtx (m);
|
||||
+ rtx rot2_1 = gen_reg_rtx (m);
|
||||
+ rtx rot2_2 = gen_reg_rtx (m);
|
||||
+
|
||||
+ if (m == SImode)
|
||||
+ {
|
||||
+ emit_insn (gen_cmpbsi3 (cmpb_diff, str1, str2));
|
||||
+ emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
|
||||
+ emit_insn (gen_cmpbsi3 (cmpb_zero, str1, zero_reg));
|
||||
+ emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
|
||||
+ emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
+ emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
|
||||
+ emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
+ emit_insn (gen_rotlsi3 (rot1_1, str1,
|
||||
+ gen_lowpart (SImode, rot_amt)));
|
||||
+ emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
||||
+ emit_insn (gen_rotlsi3 (rot2_1, str2,
|
||||
+ gen_lowpart (SImode, rot_amt)));
|
||||
+ emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
||||
+ emit_insn (gen_subsi3 (result, rot1_2, rot2_2));
|
||||
+ }
|
||||
+ else if (m == DImode)
|
||||
+ {
|
||||
+ emit_insn (gen_cmpbdi3 (cmpb_diff, str1, str2));
|
||||
+ emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
|
||||
+ emit_insn (gen_cmpbdi3 (cmpb_zero, str1, zero_reg));
|
||||
+ emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
|
||||
+ emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
+ emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
|
||||
+ emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
+ emit_insn (gen_rotldi3 (rot1_1, str1,
|
||||
+ gen_lowpart (SImode, rot_amt)));
|
||||
+ emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
||||
+ emit_insn (gen_rotldi3 (rot2_1, str2,
|
||||
+ gen_lowpart (SImode, rot_amt)));
|
||||
+ emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
||||
+ emit_insn (gen_subdi3 (result, rot1_2, rot2_2));
|
||||
+ }
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
/* Expand a string compare operation with length, and return
|
||||
true if successful. Return false if we should let the
|
||||
compiler generate normal code, probably a strncmp call.
|
||||
@@ -1684,8 +1798,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
align_rtx = operands[4];
|
||||
}
|
||||
unsigned HOST_WIDE_INT cmp_bytes = 0;
|
||||
- rtx src1 = orig_src1;
|
||||
- rtx src2 = orig_src2;
|
||||
+ rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
||||
+ rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
||||
|
||||
/* If we have a length, it must be constant. This simplifies things
|
||||
a bit as we don't have to generate code to check if we've exceeded
|
||||
@@ -1698,8 +1812,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
return false;
|
||||
|
||||
unsigned int base_align = UINTVAL (align_rtx);
|
||||
- int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
|
||||
- int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
|
||||
+ unsigned int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
|
||||
+ unsigned int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
|
||||
|
||||
/* targetm.slow_unaligned_access -- don't do unaligned stuff. */
|
||||
if (targetm.slow_unaligned_access (word_mode, align1)
|
||||
@@ -1751,8 +1865,9 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx final_move_label = gen_label_rtx ();
|
||||
rtx final_label = gen_label_rtx ();
|
||||
rtx begin_compare_label = NULL;
|
||||
+ unsigned int required_align = 8;
|
||||
|
||||
- if (base_align < 8)
|
||||
+ if (base_align < required_align)
|
||||
{
|
||||
/* Generate code that checks distance to 4k boundary for this case. */
|
||||
begin_compare_label = gen_label_rtx ();
|
||||
@@ -1775,14 +1890,14 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
}
|
||||
else
|
||||
{
|
||||
- align_test = ROUND_UP (align_test, 8);
|
||||
- base_align = 8;
|
||||
+ align_test = ROUND_UP (align_test, required_align);
|
||||
+ base_align = required_align;
|
||||
}
|
||||
|
||||
- if (align1 < 8)
|
||||
- expand_strncmp_align_check (strncmp_label, src1, align_test);
|
||||
- if (align2 < 8)
|
||||
- expand_strncmp_align_check (strncmp_label, src2, align_test);
|
||||
+ if (align1 < required_align)
|
||||
+ expand_strncmp_align_check (strncmp_label, src1_addr, align_test);
|
||||
+ if (align2 < required_align)
|
||||
+ expand_strncmp_align_check (strncmp_label, src2_addr, align_test);
|
||||
|
||||
/* Now generate the following sequence:
|
||||
- branch to begin_compare
|
||||
@@ -1799,25 +1914,13 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
|
||||
emit_label (strncmp_label);
|
||||
|
||||
- if (!REG_P (XEXP (src1, 0)))
|
||||
- {
|
||||
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
|
||||
- src1 = replace_equiv_address (src1, src1_reg);
|
||||
- }
|
||||
-
|
||||
- if (!REG_P (XEXP (src2, 0)))
|
||||
- {
|
||||
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
|
||||
- src2 = replace_equiv_address (src2, src2_reg);
|
||||
- }
|
||||
-
|
||||
if (no_length)
|
||||
{
|
||||
tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
|
||||
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
||||
target, LCT_NORMAL, GET_MODE (target),
|
||||
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
||||
- force_reg (Pmode, XEXP (src2, 0)), Pmode);
|
||||
+ force_reg (Pmode, src1_addr), Pmode,
|
||||
+ force_reg (Pmode, src2_addr), Pmode);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1830,8 +1933,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
|
||||
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
||||
target, LCT_NORMAL, GET_MODE (target),
|
||||
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
||||
- force_reg (Pmode, XEXP (src2, 0)), Pmode,
|
||||
+ force_reg (Pmode, src1_addr), Pmode,
|
||||
+ force_reg (Pmode, src2_addr), Pmode,
|
||||
len_rtx, Pmode);
|
||||
}
|
||||
|
||||
@@ -1847,12 +1950,12 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
|
||||
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
|
||||
|
||||
- /* Generate sequence of ld/ldbrx, cmpb to compare out
|
||||
+ /* Generate a sequence of GPR or VEC/VSX instructions to compare out
|
||||
to the length specified. */
|
||||
unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
|
||||
while (bytes_to_compare > 0)
|
||||
{
|
||||
- /* Compare sequence:
|
||||
+ /* GPR compare sequence:
|
||||
check each 8B with: ld/ld cmpd bne
|
||||
If equal, use rldicr/cmpb to check for zero byte.
|
||||
cleanup code at end:
|
||||
@@ -1866,13 +1969,10 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
|
||||
The last compare can branch around the cleanup code if the
|
||||
result is zero because the strings are exactly equal. */
|
||||
+
|
||||
unsigned int align = compute_current_alignment (base_align, offset);
|
||||
- if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||
- load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
|
||||
- word_mode_ok);
|
||||
- else
|
||||
- load_mode = select_block_compare_mode (0, bytes_to_compare, align,
|
||||
- word_mode_ok);
|
||||
+ load_mode = select_block_compare_mode (offset, bytes_to_compare,
|
||||
+ align, word_mode_ok);
|
||||
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
if (bytes_to_compare >= load_mode_size)
|
||||
cmp_bytes = load_mode_size;
|
||||
@@ -1895,25 +1995,10 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rid of the extra bytes. */
|
||||
cmp_bytes = bytes_to_compare;
|
||||
|
||||
- src1 = adjust_address (orig_src1, load_mode, offset);
|
||||
- src2 = adjust_address (orig_src2, load_mode, offset);
|
||||
-
|
||||
- if (!REG_P (XEXP (src1, 0)))
|
||||
- {
|
||||
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
|
||||
- src1 = replace_equiv_address (src1, src1_reg);
|
||||
- }
|
||||
- set_mem_size (src1, load_mode_size);
|
||||
-
|
||||
- if (!REG_P (XEXP (src2, 0)))
|
||||
- {
|
||||
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
|
||||
- src2 = replace_equiv_address (src2, src2_reg);
|
||||
- }
|
||||
- set_mem_size (src2, load_mode_size);
|
||||
-
|
||||
- do_load_for_compare (tmp_reg_src1, src1, load_mode);
|
||||
- do_load_for_compare (tmp_reg_src2, src2, load_mode);
|
||||
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
|
||||
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
|
||||
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
|
||||
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
|
||||
|
||||
/* We must always left-align the data we read, and
|
||||
clear any bytes to the right that are beyond the string.
|
||||
@@ -1926,16 +2011,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
{
|
||||
/* Rotate left first. */
|
||||
rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
||||
- if (word_mode == DImode)
|
||||
- {
|
||||
- emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
|
||||
- emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
|
||||
- emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
|
||||
- }
|
||||
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
||||
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
||||
}
|
||||
|
||||
if (cmp_bytes < word_mode_size)
|
||||
@@ -1944,16 +2021,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
turned into a rldicr instruction. */
|
||||
HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- if (word_mode == DImode)
|
||||
- {
|
||||
- emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
|
||||
- emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
|
||||
- emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
|
||||
- }
|
||||
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
||||
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
||||
}
|
||||
|
||||
/* Cases to handle. A and B are chunks of the two strings.
|
||||
@@ -2010,31 +2079,16 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
rtx condz = gen_reg_rtx (CCmode);
|
||||
rtx zero_reg = gen_reg_rtx (word_mode);
|
||||
- if (word_mode == SImode)
|
||||
- {
|
||||
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
- {
|
||||
- /* Don't want to look at zero bytes past end. */
|
||||
- HOST_WIDE_INT mb =
|
||||
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
|
||||
- }
|
||||
- }
|
||||
- else
|
||||
+ emit_move_insn (zero_reg, GEN_INT (0));
|
||||
+ do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
|
||||
+
|
||||
+ if (cmp_bytes < word_mode_size)
|
||||
{
|
||||
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
- {
|
||||
- /* Don't want to look at zero bytes past end. */
|
||||
- HOST_WIDE_INT mb =
|
||||
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
|
||||
- }
|
||||
+ /* Don't want to look at zero bytes past end. */
|
||||
+ HOST_WIDE_INT mb =
|
||||
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
+ do_and3 (cmpb_zero, cmpb_zero, mask);
|
||||
}
|
||||
|
||||
emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
|
||||
@@ -2054,22 +2108,10 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
if (equality_compare_rest)
|
||||
{
|
||||
/* Update pointers past what has been compared already. */
|
||||
- src1 = adjust_address (orig_src1, load_mode, offset);
|
||||
- src2 = adjust_address (orig_src2, load_mode, offset);
|
||||
-
|
||||
- if (!REG_P (XEXP (src1, 0)))
|
||||
- {
|
||||
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
|
||||
- src1 = replace_equiv_address (src1, src1_reg);
|
||||
- }
|
||||
- set_mem_size (src1, load_mode_size);
|
||||
-
|
||||
- if (!REG_P (XEXP (src2, 0)))
|
||||
- {
|
||||
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
|
||||
- src2 = replace_equiv_address (src2, src2_reg);
|
||||
- }
|
||||
- set_mem_size (src2, load_mode_size);
|
||||
+ rtx src1 = force_reg (Pmode,
|
||||
+ gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset)));
|
||||
+ rtx src2 = force_reg (Pmode,
|
||||
+ gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset)));
|
||||
|
||||
/* Construct call to strcmp/strncmp to compare the rest of the string. */
|
||||
if (no_length)
|
||||
@@ -2077,8 +2119,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
|
||||
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
||||
target, LCT_NORMAL, GET_MODE (target),
|
||||
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
||||
- force_reg (Pmode, XEXP (src2, 0)), Pmode);
|
||||
+ src1, Pmode, src2, Pmode);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -2087,9 +2128,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
|
||||
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
||||
target, LCT_NORMAL, GET_MODE (target),
|
||||
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
||||
- force_reg (Pmode, XEXP (src2, 0)), Pmode,
|
||||
- len_rtx, Pmode);
|
||||
+ src1, Pmode, src2, Pmode, len_rtx, Pmode);
|
||||
}
|
||||
|
||||
rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
|
||||
@@ -2102,63 +2141,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
if (cleanup_label)
|
||||
emit_label (cleanup_label);
|
||||
|
||||
- /* Generate the final sequence that identifies the differing
|
||||
- byte and generates the final result, taking into account
|
||||
- zero bytes:
|
||||
-
|
||||
- cmpb cmpb_result1, src1, src2
|
||||
- cmpb cmpb_result2, src1, zero
|
||||
- orc cmpb_result1, cmp_result1, cmpb_result2
|
||||
- cntlzd get bit of first zero/diff byte
|
||||
- addi convert for rldcl use
|
||||
- rldcl rldcl extract diff/zero byte
|
||||
- subf subtract for final result
|
||||
- */
|
||||
-
|
||||
- rtx cmpb_diff = gen_reg_rtx (word_mode);
|
||||
- rtx cmpb_zero = gen_reg_rtx (word_mode);
|
||||
- rtx rot_amt = gen_reg_rtx (word_mode);
|
||||
- rtx zero_reg = gen_reg_rtx (word_mode);
|
||||
-
|
||||
- rtx rot1_1 = gen_reg_rtx (word_mode);
|
||||
- rtx rot1_2 = gen_reg_rtx (word_mode);
|
||||
- rtx rot2_1 = gen_reg_rtx (word_mode);
|
||||
- rtx rot2_2 = gen_reg_rtx (word_mode);
|
||||
-
|
||||
- if (word_mode == SImode)
|
||||
- {
|
||||
- emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
|
||||
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
||||
- emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
|
||||
- emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
- emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
|
||||
- emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
- emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
|
||||
- gen_lowpart (SImode, rot_amt)));
|
||||
- emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
||||
- emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
|
||||
- gen_lowpart (SImode, rot_amt)));
|
||||
- emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
||||
- emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
|
||||
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
||||
- emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
|
||||
- emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
- emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
|
||||
- emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
- emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
|
||||
- gen_lowpart (SImode, rot_amt)));
|
||||
- emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
||||
- emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
|
||||
- gen_lowpart (SImode, rot_amt)));
|
||||
- emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
||||
- emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
|
||||
- }
|
||||
+ emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
|
||||
|
||||
emit_label (final_move_label);
|
||||
emit_insn (gen_movsi (target,
|
||||
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
|
||||
index 0fc77aa18b0..e6921e96a3d 100644
|
||||
--- a/gcc/config/rs6000/vsx.md
|
||||
+++ b/gcc/config/rs6000/vsx.md
|
||||
@@ -1210,7 +1210,7 @@
|
||||
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
|
||||
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
|
||||
;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
|
||||
-(define_insn "*vsx_mov<mode>_64bit"
|
||||
+(define_insn "vsx_mov<mode>_64bit"
|
||||
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
|
||||
"=ZwO, <VSa>, <VSa>, r, we, ?wQ,
|
||||
?&r, ??r, ??Y, <??r>, wo, v,
|
@ -1,472 +0,0 @@
|
||||
commit c7a833caa029b84ad579c3fabe006a80f718d7e1
|
||||
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Thu Aug 2 18:11:54 2018 +0000
|
||||
|
||||
2018-07-31 Aaron Sawdey <acsawdey@linux.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000-string.c (select_block_compare_mode): Move test
|
||||
for word_mode_ok here instead of passing as argument.
|
||||
(expand_block_compare): Change select_block_compare_mode() call.
|
||||
(expand_strncmp_gpr_sequence): New function.
|
||||
(expand_strn_compare): Make use of expand_strncmp_gpr_sequence.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@263273 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
||||
index f9dd54eb639..451e9ed33da 100644
|
||||
--- a/gcc/config/rs6000/rs6000-string.c
|
||||
+++ b/gcc/config/rs6000/rs6000-string.c
|
||||
@@ -238,13 +238,11 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
|
||||
|
||||
OFFSET is the current read offset from the beginning of the block.
|
||||
BYTES is the number of bytes remaining to be read.
|
||||
- ALIGN is the minimum alignment of the memory blocks being compared in bytes.
|
||||
- WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
|
||||
- the largest allowable mode. */
|
||||
+ ALIGN is the minimum alignment of the memory blocks being compared in bytes. */
|
||||
static machine_mode
|
||||
select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
||||
unsigned HOST_WIDE_INT bytes,
|
||||
- unsigned HOST_WIDE_INT align, bool word_mode_ok)
|
||||
+ unsigned HOST_WIDE_INT align)
|
||||
{
|
||||
/* First see if we can do a whole load unit
|
||||
as that will be more efficient than a larger load + shift. */
|
||||
@@ -257,6 +255,11 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
||||
/* The most we can read without potential page crossing. */
|
||||
unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
|
||||
|
||||
+ /* If we have an LE target without ldbrx and word_mode is DImode,
|
||||
+ then we must avoid using word_mode. */
|
||||
+ int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
|
||||
+ && word_mode == DImode);
|
||||
+
|
||||
if (word_mode_ok && bytes >= UNITS_PER_WORD)
|
||||
return word_mode;
|
||||
else if (bytes == GET_MODE_SIZE (SImode))
|
||||
@@ -1382,16 +1385,11 @@ expand_block_compare (rtx operands[])
|
||||
else
|
||||
cond = gen_reg_rtx (CCmode);
|
||||
|
||||
- /* If we have an LE target without ldbrx and word_mode is DImode,
|
||||
- then we must avoid using word_mode. */
|
||||
- int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
|
||||
- && word_mode == DImode);
|
||||
-
|
||||
/* Strategy phase. How many ops will this take and should we expand it? */
|
||||
|
||||
unsigned HOST_WIDE_INT offset = 0;
|
||||
machine_mode load_mode =
|
||||
- select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
|
||||
+ select_block_compare_mode (offset, bytes, base_align);
|
||||
unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
|
||||
/* We don't want to generate too much code. The loop code can take
|
||||
@@ -1445,8 +1443,7 @@ expand_block_compare (rtx operands[])
|
||||
while (bytes > 0)
|
||||
{
|
||||
unsigned int align = compute_current_alignment (base_align, offset);
|
||||
- load_mode = select_block_compare_mode (offset, bytes,
|
||||
- align, word_mode_ok);
|
||||
+ load_mode = select_block_compare_mode (offset, bytes, align);
|
||||
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
if (bytes >= load_mode_size)
|
||||
cmp_bytes = load_mode_size;
|
||||
@@ -1698,6 +1695,189 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
|
||||
LABEL_NUSES (strncmp_label) += 1;
|
||||
}
|
||||
|
||||
+/* Generate the sequence of compares for strcmp/strncmp using gpr instructions.
|
||||
+ BYTES_TO_COMPARE is the number of bytes to be compared.
|
||||
+ BASE_ALIGN is the smaller of the alignment of the two strings.
|
||||
+ ORIG_SRC1 is the unmodified rtx for the first string.
|
||||
+ ORIG_SRC2 is the unmodified rtx for the second string.
|
||||
+ TMP_REG_SRC1 is the register for loading the first string.
|
||||
+ TMP_REG_SRC2 is the register for loading the second string.
|
||||
+ RESULT_REG is the rtx for the result register.
|
||||
+ EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
|
||||
+ to strcmp/strncmp if we have equality at the end of the inline comparison.
|
||||
+ CLEANUP_LABEL is rtx for a label we generate if we need code to clean up
|
||||
+ and generate the final comparison result.
|
||||
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
+ set the final result. */
|
||||
+static void
|
||||
+expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
+ unsigned int base_align,
|
||||
+ rtx orig_src1, rtx orig_src2,
|
||||
+ rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
|
||||
+ bool equality_compare_rest, rtx &cleanup_label,
|
||||
+ rtx final_move_label)
|
||||
+{
|
||||
+ unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
|
||||
+ machine_mode load_mode;
|
||||
+ unsigned int load_mode_size;
|
||||
+ unsigned HOST_WIDE_INT cmp_bytes = 0;
|
||||
+ unsigned HOST_WIDE_INT offset = 0;
|
||||
+ rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
||||
+ rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
||||
+
|
||||
+ while (bytes_to_compare > 0)
|
||||
+ {
|
||||
+ /* GPR compare sequence:
|
||||
+ check each 8B with: ld/ld cmpd bne
|
||||
+ If equal, use rldicr/cmpb to check for zero byte.
|
||||
+ cleanup code at end:
|
||||
+ cmpb get byte that differs
|
||||
+ cmpb look for zero byte
|
||||
+ orc combine
|
||||
+ cntlzd get bit of first zero/diff byte
|
||||
+ subfic convert for rldcl use
|
||||
+ rldcl rldcl extract diff/zero byte
|
||||
+ subf subtract for final result
|
||||
+
|
||||
+ The last compare can branch around the cleanup code if the
|
||||
+ result is zero because the strings are exactly equal. */
|
||||
+
|
||||
+ unsigned int align = compute_current_alignment (base_align, offset);
|
||||
+ load_mode = select_block_compare_mode (offset, bytes_to_compare, align);
|
||||
+ load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
+ if (bytes_to_compare >= load_mode_size)
|
||||
+ cmp_bytes = load_mode_size;
|
||||
+ else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||
+ {
|
||||
+ /* Move this load back so it doesn't go past the end.
|
||||
+ P8/P9 can do this efficiently. */
|
||||
+ unsigned int extra_bytes = load_mode_size - bytes_to_compare;
|
||||
+ cmp_bytes = bytes_to_compare;
|
||||
+ if (extra_bytes < offset)
|
||||
+ {
|
||||
+ offset -= extra_bytes;
|
||||
+ cmp_bytes = load_mode_size;
|
||||
+ bytes_to_compare = cmp_bytes;
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ /* P7 and earlier can't do the overlapping load trick fast,
|
||||
+ so this forces a non-overlapping load and a shift to get
|
||||
+ rid of the extra bytes. */
|
||||
+ cmp_bytes = bytes_to_compare;
|
||||
+
|
||||
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
|
||||
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
|
||||
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
|
||||
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
|
||||
+
|
||||
+ /* We must always left-align the data we read, and
|
||||
+ clear any bytes to the right that are beyond the string.
|
||||
+ Otherwise the cmpb sequence won't produce the correct
|
||||
+ results. The beginning of the compare will be done
|
||||
+ with word_mode so will not have any extra shifts or
|
||||
+ clear rights. */
|
||||
+
|
||||
+ if (load_mode_size < word_mode_size)
|
||||
+ {
|
||||
+ /* Rotate left first. */
|
||||
+ rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
||||
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
||||
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
||||
+ }
|
||||
+
|
||||
+ if (cmp_bytes < word_mode_size)
|
||||
+ {
|
||||
+ /* Now clear right. This plus the rotate can be
|
||||
+ turned into a rldicr instruction. */
|
||||
+ HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
||||
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
||||
+ }
|
||||
+
|
||||
+ /* Cases to handle. A and B are chunks of the two strings.
|
||||
+ 1: Not end of comparison:
|
||||
+ A != B: branch to cleanup code to compute result.
|
||||
+ A == B: check for 0 byte, next block if not found.
|
||||
+ 2: End of the inline comparison:
|
||||
+ A != B: branch to cleanup code to compute result.
|
||||
+ A == B: check for 0 byte, call strcmp/strncmp
|
||||
+ 3: compared requested N bytes:
|
||||
+ A == B: branch to result 0.
|
||||
+ A != B: cleanup code to compute result. */
|
||||
+
|
||||
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
||||
+
|
||||
+ rtx dst_label;
|
||||
+ if (remain > 0 || equality_compare_rest)
|
||||
+ {
|
||||
+ /* Branch to cleanup code, otherwise fall through to do
|
||||
+ more compares. */
|
||||
+ if (!cleanup_label)
|
||||
+ cleanup_label = gen_label_rtx ();
|
||||
+ dst_label = cleanup_label;
|
||||
+ }
|
||||
+ else
|
||||
+ /* Branch to end and produce result of 0. */
|
||||
+ dst_label = final_move_label;
|
||||
+
|
||||
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
||||
+ rtx cond = gen_reg_rtx (CCmode);
|
||||
+
|
||||
+ /* Always produce the 0 result, it is needed if
|
||||
+ cmpb finds a 0 byte in this chunk. */
|
||||
+ rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
|
||||
+ rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
|
||||
+
|
||||
+ rtx cmp_rtx;
|
||||
+ if (remain == 0 && !equality_compare_rest)
|
||||
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
|
||||
+ else
|
||||
+ cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
||||
+
|
||||
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
+ lab_ref, pc_rtx);
|
||||
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
+ JUMP_LABEL (j) = dst_label;
|
||||
+ LABEL_NUSES (dst_label) += 1;
|
||||
+
|
||||
+ if (remain > 0 || equality_compare_rest)
|
||||
+ {
|
||||
+ /* Generate a cmpb to test for a 0 byte and branch
|
||||
+ to final result if found. */
|
||||
+ rtx cmpb_zero = gen_reg_rtx (word_mode);
|
||||
+ rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
+ rtx condz = gen_reg_rtx (CCmode);
|
||||
+ rtx zero_reg = gen_reg_rtx (word_mode);
|
||||
+ emit_move_insn (zero_reg, GEN_INT (0));
|
||||
+ do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
|
||||
+
|
||||
+ if (cmp_bytes < word_mode_size)
|
||||
+ {
|
||||
+ /* Don't want to look at zero bytes past end. */
|
||||
+ HOST_WIDE_INT mb =
|
||||
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
+ do_and3 (cmpb_zero, cmpb_zero, mask);
|
||||
+ }
|
||||
+
|
||||
+ emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
|
||||
+ rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
|
||||
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
|
||||
+ lab_ref_fin, pc_rtx);
|
||||
+ rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
+ JUMP_LABEL (j2) = final_move_label;
|
||||
+ LABEL_NUSES (final_move_label) += 1;
|
||||
+
|
||||
+ }
|
||||
+
|
||||
+ offset += cmp_bytes;
|
||||
+ bytes_to_compare -= cmp_bytes;
|
||||
+ }
|
||||
+
|
||||
+}
|
||||
+
|
||||
/* Generate the final sequence that identifies the differing
|
||||
byte and generates the final result, taking into account
|
||||
zero bytes:
|
||||
@@ -1797,7 +1977,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
bytes_rtx = operands[3];
|
||||
align_rtx = operands[4];
|
||||
}
|
||||
- unsigned HOST_WIDE_INT cmp_bytes = 0;
|
||||
+
|
||||
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
||||
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
||||
|
||||
@@ -1822,11 +2002,6 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
|
||||
gcc_assert (GET_MODE (target) == SImode);
|
||||
|
||||
- /* If we have an LE target without ldbrx and word_mode is DImode,
|
||||
- then we must avoid using word_mode. */
|
||||
- int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
|
||||
- && word_mode == DImode);
|
||||
-
|
||||
unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
|
||||
|
||||
unsigned HOST_WIDE_INT offset = 0;
|
||||
@@ -1839,7 +2014,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
bytes = UINTVAL (bytes_rtx);
|
||||
|
||||
machine_mode load_mode =
|
||||
- select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
|
||||
+ select_block_compare_mode (0, bytes, base_align);
|
||||
unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
compare_length = rs6000_string_compare_inline_limit * load_mode_size;
|
||||
|
||||
@@ -1867,6 +2042,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx begin_compare_label = NULL;
|
||||
unsigned int required_align = 8;
|
||||
|
||||
+ required_align = 8;
|
||||
+
|
||||
if (base_align < required_align)
|
||||
{
|
||||
/* Generate code that checks distance to 4k boundary for this case. */
|
||||
@@ -1952,159 +2129,15 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
|
||||
/* Generate a sequence of GPR or VEC/VSX instructions to compare out
|
||||
to the length specified. */
|
||||
- unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
|
||||
- while (bytes_to_compare > 0)
|
||||
- {
|
||||
- /* GPR compare sequence:
|
||||
- check each 8B with: ld/ld cmpd bne
|
||||
- If equal, use rldicr/cmpb to check for zero byte.
|
||||
- cleanup code at end:
|
||||
- cmpb get byte that differs
|
||||
- cmpb look for zero byte
|
||||
- orc combine
|
||||
- cntlzd get bit of first zero/diff byte
|
||||
- subfic convert for rldcl use
|
||||
- rldcl rldcl extract diff/zero byte
|
||||
- subf subtract for final result
|
||||
-
|
||||
- The last compare can branch around the cleanup code if the
|
||||
- result is zero because the strings are exactly equal. */
|
||||
-
|
||||
- unsigned int align = compute_current_alignment (base_align, offset);
|
||||
- load_mode = select_block_compare_mode (offset, bytes_to_compare,
|
||||
- align, word_mode_ok);
|
||||
- load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
- if (bytes_to_compare >= load_mode_size)
|
||||
- cmp_bytes = load_mode_size;
|
||||
- else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||
- {
|
||||
- /* Move this load back so it doesn't go past the end.
|
||||
- P8/P9 can do this efficiently. */
|
||||
- unsigned int extra_bytes = load_mode_size - bytes_to_compare;
|
||||
- cmp_bytes = bytes_to_compare;
|
||||
- if (extra_bytes < offset)
|
||||
- {
|
||||
- offset -= extra_bytes;
|
||||
- cmp_bytes = load_mode_size;
|
||||
- bytes_to_compare = cmp_bytes;
|
||||
- }
|
||||
- }
|
||||
- else
|
||||
- /* P7 and earlier can't do the overlapping load trick fast,
|
||||
- so this forces a non-overlapping load and a shift to get
|
||||
- rid of the extra bytes. */
|
||||
- cmp_bytes = bytes_to_compare;
|
||||
-
|
||||
- rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
|
||||
- do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
|
||||
- rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
|
||||
- do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
|
||||
-
|
||||
- /* We must always left-align the data we read, and
|
||||
- clear any bytes to the right that are beyond the string.
|
||||
- Otherwise the cmpb sequence won't produce the correct
|
||||
- results. The beginning of the compare will be done
|
||||
- with word_mode so will not have any extra shifts or
|
||||
- clear rights. */
|
||||
-
|
||||
- if (load_mode_size < word_mode_size)
|
||||
- {
|
||||
- /* Rotate left first. */
|
||||
- rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
||||
- do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
||||
- do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
||||
- }
|
||||
-
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
- {
|
||||
- /* Now clear right. This plus the rotate can be
|
||||
- turned into a rldicr instruction. */
|
||||
- HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
||||
- do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
||||
- }
|
||||
-
|
||||
- /* Cases to handle. A and B are chunks of the two strings.
|
||||
- 1: Not end of comparison:
|
||||
- A != B: branch to cleanup code to compute result.
|
||||
- A == B: check for 0 byte, next block if not found.
|
||||
- 2: End of the inline comparison:
|
||||
- A != B: branch to cleanup code to compute result.
|
||||
- A == B: check for 0 byte, call strcmp/strncmp
|
||||
- 3: compared requested N bytes:
|
||||
- A == B: branch to result 0.
|
||||
- A != B: cleanup code to compute result. */
|
||||
-
|
||||
- unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
||||
-
|
||||
- rtx dst_label;
|
||||
- if (remain > 0 || equality_compare_rest)
|
||||
- {
|
||||
- /* Branch to cleanup code, otherwise fall through to do
|
||||
- more compares. */
|
||||
- if (!cleanup_label)
|
||||
- cleanup_label = gen_label_rtx ();
|
||||
- dst_label = cleanup_label;
|
||||
- }
|
||||
- else
|
||||
- /* Branch to end and produce result of 0. */
|
||||
- dst_label = final_move_label;
|
||||
-
|
||||
- rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
||||
- rtx cond = gen_reg_rtx (CCmode);
|
||||
-
|
||||
- /* Always produce the 0 result, it is needed if
|
||||
- cmpb finds a 0 byte in this chunk. */
|
||||
- rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
|
||||
- rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
|
||||
-
|
||||
- rtx cmp_rtx;
|
||||
- if (remain == 0 && !equality_compare_rest)
|
||||
- cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
|
||||
- else
|
||||
- cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
||||
-
|
||||
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
- lab_ref, pc_rtx);
|
||||
- rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
- JUMP_LABEL (j) = dst_label;
|
||||
- LABEL_NUSES (dst_label) += 1;
|
||||
-
|
||||
- if (remain > 0 || equality_compare_rest)
|
||||
- {
|
||||
- /* Generate a cmpb to test for a 0 byte and branch
|
||||
- to final result if found. */
|
||||
- rtx cmpb_zero = gen_reg_rtx (word_mode);
|
||||
- rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
- rtx condz = gen_reg_rtx (CCmode);
|
||||
- rtx zero_reg = gen_reg_rtx (word_mode);
|
||||
- emit_move_insn (zero_reg, GEN_INT (0));
|
||||
- do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
|
||||
-
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
- {
|
||||
- /* Don't want to look at zero bytes past end. */
|
||||
- HOST_WIDE_INT mb =
|
||||
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- do_and3 (cmpb_zero, cmpb_zero, mask);
|
||||
- }
|
||||
-
|
||||
- emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
|
||||
- rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
|
||||
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
|
||||
- lab_ref_fin, pc_rtx);
|
||||
- rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
- JUMP_LABEL (j2) = final_move_label;
|
||||
- LABEL_NUSES (final_move_label) += 1;
|
||||
-
|
||||
- }
|
||||
-
|
||||
- offset += cmp_bytes;
|
||||
- bytes_to_compare -= cmp_bytes;
|
||||
- }
|
||||
-
|
||||
+ expand_strncmp_gpr_sequence(compare_length, base_align,
|
||||
+ orig_src1, orig_src2,
|
||||
+ tmp_reg_src1, tmp_reg_src2,
|
||||
+ result_reg,
|
||||
+ equality_compare_rest,
|
||||
+ cleanup_label, final_move_label);
|
||||
+
|
||||
+ offset = compare_length;
|
||||
+
|
||||
if (equality_compare_rest)
|
||||
{
|
||||
/* Update pointers past what has been compared already. */
|
@ -1,613 +0,0 @@
|
||||
commit e4108e7e619dcf7f21224382bc37ba2ef651eb43
|
||||
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Thu Aug 30 18:17:00 2018 +0000
|
||||
|
||||
2018-08-30 Aaron Sawdey <acsawdey@linux.ibm.com>
|
||||
|
||||
* config/rs6000/altivec.md (altivec_eq<mode>): Remove star.
|
||||
(altivec_vcmpequ<VI_char>_p): Remove star.
|
||||
* config/rs6000/rs6000-string.c (do_load_for_compare): Support
|
||||
vector load modes.
|
||||
(expand_strncmp_vec_sequence): New function.
|
||||
(emit_final_str_compare_vec): New function.
|
||||
(expand_strn_compare): Add support for vector strncmp.
|
||||
* config/rs6000/rs6000.opt (-mstring-compare-inline-limit): Change
|
||||
length specification to bytes.
|
||||
* config/rs6000/vsx.md (vsx_ld_elemrev_v16qi_internal): Remove star.
|
||||
(vcmpnezb_p): New pattern.
|
||||
* doc/invoke.texi (RS/6000 and PowerPC Options): Update documentation
|
||||
for option -mstring-compare-inline-limit.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@263991 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
|
||||
index 13f4654db6a..db4f926bd15 100644
|
||||
--- a/gcc/config/rs6000/altivec.md
|
||||
+++ b/gcc/config/rs6000/altivec.md
|
||||
@@ -608,7 +608,7 @@
|
||||
"vcmpbfp %0,%1,%2"
|
||||
[(set_attr "type" "veccmp")])
|
||||
|
||||
-(define_insn "*altivec_eq<mode>"
|
||||
+(define_insn "altivec_eq<mode>"
|
||||
[(set (match_operand:VI2 0 "altivec_register_operand" "=v")
|
||||
(eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
|
||||
(match_operand:VI2 2 "altivec_register_operand" "v")))]
|
||||
@@ -2438,7 +2438,7 @@
|
||||
|
||||
;; Compare vectors producing a vector result and a predicate, setting CR6 to
|
||||
;; indicate a combined status
|
||||
-(define_insn "*altivec_vcmpequ<VI_char>_p"
|
||||
+(define_insn "altivec_vcmpequ<VI_char>_p"
|
||||
[(set (reg:CC CR6_REGNO)
|
||||
(unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
|
||||
(match_operand:VI2 2 "register_operand" "v"))]
|
||||
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
||||
index 451e9ed33da..ff0414586d0 100644
|
||||
--- a/gcc/config/rs6000/rs6000-string.c
|
||||
+++ b/gcc/config/rs6000/rs6000-string.c
|
||||
@@ -157,6 +157,33 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
|
||||
{
|
||||
switch (GET_MODE (reg))
|
||||
{
|
||||
+ case E_V16QImode:
|
||||
+ switch (mode)
|
||||
+ {
|
||||
+ case E_V16QImode:
|
||||
+ if (!BYTES_BIG_ENDIAN)
|
||||
+ {
|
||||
+ if (TARGET_P9_VECTOR)
|
||||
+ emit_insn (gen_vsx_ld_elemrev_v16qi_internal (reg, mem));
|
||||
+ else
|
||||
+ {
|
||||
+ rtx reg_v2di = simplify_gen_subreg (V2DImode, reg,
|
||||
+ V16QImode, 0);
|
||||
+ gcc_assert (MEM_P (mem));
|
||||
+ rtx addr = XEXP (mem, 0);
|
||||
+ rtx mem_v2di = gen_rtx_MEM (V2DImode, addr);
|
||||
+ MEM_COPY_ATTRIBUTES (mem_v2di, mem);
|
||||
+ set_mem_size (mem, GET_MODE_SIZE (V2DImode));
|
||||
+ emit_insn (gen_vsx_ld_elemrev_v2di (reg_v2di, mem_v2di));
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ emit_insn (gen_vsx_movv2di_64bit (reg, mem));
|
||||
+ break;
|
||||
+ default:
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ break;
|
||||
case E_DImode:
|
||||
switch (mode)
|
||||
{
|
||||
@@ -227,6 +254,12 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
|
||||
gcc_unreachable ();
|
||||
}
|
||||
break;
|
||||
+
|
||||
+ case E_QImode:
|
||||
+ gcc_assert (mode == E_QImode);
|
||||
+ emit_move_insn (reg, mem);
|
||||
+ break;
|
||||
+
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
break;
|
||||
@@ -1705,17 +1738,17 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
|
||||
RESULT_REG is the rtx for the result register.
|
||||
EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
|
||||
to strcmp/strncmp if we have equality at the end of the inline comparison.
|
||||
- CLEANUP_LABEL is rtx for a label we generate if we need code to clean up
|
||||
- and generate the final comparison result.
|
||||
+ P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code
|
||||
+ to clean up and generate the final comparison result.
|
||||
FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
set the final result. */
|
||||
static void
|
||||
-expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
- unsigned int base_align,
|
||||
- rtx orig_src1, rtx orig_src2,
|
||||
- rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
|
||||
- bool equality_compare_rest, rtx &cleanup_label,
|
||||
- rtx final_move_label)
|
||||
+expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
+ unsigned int base_align,
|
||||
+ rtx orig_src1, rtx orig_src2,
|
||||
+ rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
|
||||
+ bool equality_compare_rest, rtx *p_cleanup_label,
|
||||
+ rtx final_move_label)
|
||||
{
|
||||
unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
|
||||
machine_mode load_mode;
|
||||
@@ -1724,6 +1757,8 @@ expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
unsigned HOST_WIDE_INT offset = 0;
|
||||
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
||||
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
||||
+ gcc_assert (p_cleanup_label != NULL);
|
||||
+ rtx cleanup_label = *p_cleanup_label;
|
||||
|
||||
while (bytes_to_compare > 0)
|
||||
{
|
||||
@@ -1876,6 +1911,178 @@ expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
bytes_to_compare -= cmp_bytes;
|
||||
}
|
||||
|
||||
+ *p_cleanup_label = cleanup_label;
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
|
||||
+ instructions.
|
||||
+
|
||||
+ BYTES_TO_COMPARE is the number of bytes to be compared.
|
||||
+ ORIG_SRC1 is the unmodified rtx for the first string.
|
||||
+ ORIG_SRC2 is the unmodified rtx for the second string.
|
||||
+ S1ADDR is the register to use for the base address of the first string.
|
||||
+ S2ADDR is the register to use for the base address of the second string.
|
||||
+ OFF_REG is the register to use for the string offset for loads.
|
||||
+ S1DATA is the register for loading the first string.
|
||||
+ S2DATA is the register for loading the second string.
|
||||
+ VEC_RESULT is the rtx for the vector result indicating the byte difference.
|
||||
+ EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
|
||||
+ to strcmp/strncmp if we have equality at the end of the inline comparison.
|
||||
+ P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code to clean up
|
||||
+ and generate the final comparison result.
|
||||
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
+ set the final result. */
|
||||
+static void
|
||||
+expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
+ rtx orig_src1, rtx orig_src2,
|
||||
+ rtx s1addr, rtx s2addr, rtx off_reg,
|
||||
+ rtx s1data, rtx s2data,
|
||||
+ rtx vec_result, bool equality_compare_rest,
|
||||
+ rtx *p_cleanup_label, rtx final_move_label)
|
||||
+{
|
||||
+ machine_mode load_mode;
|
||||
+ unsigned int load_mode_size;
|
||||
+ unsigned HOST_WIDE_INT cmp_bytes = 0;
|
||||
+ unsigned HOST_WIDE_INT offset = 0;
|
||||
+
|
||||
+ gcc_assert (p_cleanup_label != NULL);
|
||||
+ rtx cleanup_label = *p_cleanup_label;
|
||||
+
|
||||
+ emit_move_insn (s1addr, force_reg (Pmode, XEXP (orig_src1, 0)));
|
||||
+ emit_move_insn (s2addr, force_reg (Pmode, XEXP (orig_src2, 0)));
|
||||
+
|
||||
+ unsigned int i;
|
||||
+ rtx zr[16];
|
||||
+ for (i = 0; i < 16; i++)
|
||||
+ zr[i] = GEN_INT (0);
|
||||
+ rtvec zv = gen_rtvec_v (16, zr);
|
||||
+ rtx zero_reg = gen_reg_rtx (V16QImode);
|
||||
+ rs6000_expand_vector_init (zero_reg, gen_rtx_PARALLEL (V16QImode, zv));
|
||||
+
|
||||
+ while (bytes_to_compare > 0)
|
||||
+ {
|
||||
+ /* VEC/VSX compare sequence for P8:
|
||||
+ check each 16B with:
|
||||
+ lxvd2x 32,28,8
|
||||
+ lxvd2x 33,29,8
|
||||
+ vcmpequb 2,0,1 # compare strings
|
||||
+ vcmpequb 4,0,3 # compare w/ 0
|
||||
+ xxlorc 37,36,34 # first FF byte is either mismatch or end of string
|
||||
+ vcmpequb. 7,5,3 # reg 7 contains 0
|
||||
+ bnl 6,.Lmismatch
|
||||
+
|
||||
+ For the P8 LE case, we use lxvd2x and compare full 16 bytes
|
||||
+ but then use use vgbbd and a shift to get two bytes with the
|
||||
+ information we need in the correct order.
|
||||
+
|
||||
+ VEC/VSX compare sequence if TARGET_P9_VECTOR:
|
||||
+ lxvb16x/lxvb16x # load 16B of each string
|
||||
+ vcmpnezb. # produces difference location or zero byte location
|
||||
+ bne 6,.Lmismatch
|
||||
+
|
||||
+ Use the overlapping compare trick for the last block if it is
|
||||
+ less than 16 bytes.
|
||||
+ */
|
||||
+
|
||||
+ load_mode = V16QImode;
|
||||
+ load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
+
|
||||
+ if (bytes_to_compare >= load_mode_size)
|
||||
+ cmp_bytes = load_mode_size;
|
||||
+ else
|
||||
+ {
|
||||
+ /* Move this load back so it doesn't go past the end. P8/P9
|
||||
+ can do this efficiently. This is never called with less
|
||||
+ than 16 bytes so we should always be able to do this. */
|
||||
+ unsigned int extra_bytes = load_mode_size - bytes_to_compare;
|
||||
+ cmp_bytes = bytes_to_compare;
|
||||
+ gcc_assert (offset > extra_bytes);
|
||||
+ offset -= extra_bytes;
|
||||
+ cmp_bytes = load_mode_size;
|
||||
+ bytes_to_compare = cmp_bytes;
|
||||
+ }
|
||||
+
|
||||
+ /* The offset currently used is always kept in off_reg so that the
|
||||
+ cleanup code on P8 can use it to extract the differing byte. */
|
||||
+ emit_move_insn (off_reg, GEN_INT (offset));
|
||||
+
|
||||
+ rtx addr1 = gen_rtx_PLUS (Pmode, s1addr, off_reg);
|
||||
+ do_load_for_compare_from_addr (load_mode, s1data, addr1, orig_src1);
|
||||
+ rtx addr2 = gen_rtx_PLUS (Pmode, s2addr, off_reg);
|
||||
+ do_load_for_compare_from_addr (load_mode, s2data, addr2, orig_src2);
|
||||
+
|
||||
+ /* Cases to handle. A and B are chunks of the two strings.
|
||||
+ 1: Not end of comparison:
|
||||
+ A != B: branch to cleanup code to compute result.
|
||||
+ A == B: next block
|
||||
+ 2: End of the inline comparison:
|
||||
+ A != B: branch to cleanup code to compute result.
|
||||
+ A == B: call strcmp/strncmp
|
||||
+ 3: compared requested N bytes:
|
||||
+ A == B: branch to result 0.
|
||||
+ A != B: cleanup code to compute result. */
|
||||
+
|
||||
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
||||
+
|
||||
+ if (TARGET_P9_VECTOR)
|
||||
+ emit_insn (gen_vcmpnezb_p (vec_result, s1data, s2data));
|
||||
+ else
|
||||
+ {
|
||||
+ /* Emit instructions to do comparison and zero check. */
|
||||
+ rtx cmp_res = gen_reg_rtx (load_mode);
|
||||
+ rtx cmp_zero = gen_reg_rtx (load_mode);
|
||||
+ rtx cmp_combined = gen_reg_rtx (load_mode);
|
||||
+ emit_insn (gen_altivec_eqv16qi (cmp_res, s1data, s2data));
|
||||
+ emit_insn (gen_altivec_eqv16qi (cmp_zero, s1data, zero_reg));
|
||||
+ emit_insn (gen_orcv16qi3 (vec_result, cmp_zero, cmp_res));
|
||||
+ emit_insn (gen_altivec_vcmpequb_p (cmp_combined, vec_result, zero_reg));
|
||||
+ }
|
||||
+
|
||||
+ bool branch_to_cleanup = (remain > 0 || equality_compare_rest);
|
||||
+ rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO);
|
||||
+ rtx dst_label;
|
||||
+ rtx cmp_rtx;
|
||||
+ if (branch_to_cleanup)
|
||||
+ {
|
||||
+ /* Branch to cleanup code, otherwise fall through to do more
|
||||
+ compares. P8 and P9 use different CR bits because on P8
|
||||
+ we are looking at the result of a comparsion vs a
|
||||
+ register of zeroes so the all-true condition means no
|
||||
+ difference or zero was found. On P9, vcmpnezb sets a byte
|
||||
+ to 0xff if there is a mismatch or zero, so the all-false
|
||||
+ condition indicates we found no difference or zero. */
|
||||
+ if (!cleanup_label)
|
||||
+ cleanup_label = gen_label_rtx ();
|
||||
+ dst_label = cleanup_label;
|
||||
+ if (TARGET_P9_VECTOR)
|
||||
+ cmp_rtx = gen_rtx_NE (VOIDmode, cr6, const0_rtx);
|
||||
+ else
|
||||
+ cmp_rtx = gen_rtx_GE (VOIDmode, cr6, const0_rtx);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* Branch to final return or fall through to cleanup,
|
||||
+ result is already set to 0. */
|
||||
+ dst_label = final_move_label;
|
||||
+ if (TARGET_P9_VECTOR)
|
||||
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cr6, const0_rtx);
|
||||
+ else
|
||||
+ cmp_rtx = gen_rtx_LT (VOIDmode, cr6, const0_rtx);
|
||||
+ }
|
||||
+
|
||||
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
||||
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
+ lab_ref, pc_rtx);
|
||||
+ rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
+ JUMP_LABEL (j2) = dst_label;
|
||||
+ LABEL_NUSES (dst_label) += 1;
|
||||
+
|
||||
+ offset += cmp_bytes;
|
||||
+ bytes_to_compare -= cmp_bytes;
|
||||
+ }
|
||||
+ *p_cleanup_label = cleanup_label;
|
||||
+ return;
|
||||
}
|
||||
|
||||
/* Generate the final sequence that identifies the differing
|
||||
@@ -1948,6 +2155,96 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
return;
|
||||
}
|
||||
|
||||
+/* Generate the final sequence that identifies the differing
|
||||
+ byte and generates the final result, taking into account
|
||||
+ zero bytes:
|
||||
+
|
||||
+ P8:
|
||||
+ vgbbd 0,0
|
||||
+ vsldoi 0,0,0,9
|
||||
+ mfvsrd 9,32
|
||||
+ addi 10,9,-1 # count trailing zero bits
|
||||
+ andc 9,10,9
|
||||
+ popcntd 9,9
|
||||
+ lbzx 10,28,9 # use that offset to load differing byte
|
||||
+ lbzx 3,29,9
|
||||
+ subf 3,3,10 # subtract for final result
|
||||
+
|
||||
+ P9:
|
||||
+ vclzlsbb # counts trailing bytes with lsb=0
|
||||
+ vextublx # extract differing byte
|
||||
+
|
||||
+ STR1 is the reg rtx for data from string 1.
|
||||
+ STR2 is the reg rtx for data from string 2.
|
||||
+ RESULT is the reg rtx for the comparison result.
|
||||
+ S1ADDR is the register to use for the base address of the first string.
|
||||
+ S2ADDR is the register to use for the base address of the second string.
|
||||
+ ORIG_SRC1 is the unmodified rtx for the first string.
|
||||
+ ORIG_SRC2 is the unmodified rtx for the second string.
|
||||
+ OFF_REG is the register to use for the string offset for loads.
|
||||
+ VEC_RESULT is the rtx for the vector result indicating the byte difference.
|
||||
+ */
|
||||
+
|
||||
+static void
|
||||
+emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
|
||||
+ rtx s1addr, rtx s2addr,
|
||||
+ rtx orig_src1, rtx orig_src2,
|
||||
+ rtx off_reg, rtx vec_result)
|
||||
+{
|
||||
+ if (TARGET_P9_VECTOR)
|
||||
+ {
|
||||
+ rtx diffix = gen_reg_rtx (SImode);
|
||||
+ rtx chr1 = gen_reg_rtx (SImode);
|
||||
+ rtx chr2 = gen_reg_rtx (SImode);
|
||||
+ rtx chr1_di = simplify_gen_subreg (DImode, chr1, SImode, 0);
|
||||
+ rtx chr2_di = simplify_gen_subreg (DImode, chr2, SImode, 0);
|
||||
+ emit_insn (gen_vclzlsbb_v16qi (diffix, vec_result));
|
||||
+ emit_insn (gen_vextublx (chr1, diffix, str1));
|
||||
+ emit_insn (gen_vextublx (chr2, diffix, str2));
|
||||
+ do_sub3 (result, chr1_di, chr2_di);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ rtx diffix = gen_reg_rtx (DImode);
|
||||
+ rtx result_gbbd = gen_reg_rtx (V16QImode);
|
||||
+ /* Since each byte of the input is either 00 or FF, the bytes in
|
||||
+ dw0 and dw1 after vgbbd are all identical to each other. */
|
||||
+ emit_insn (gen_p8v_vgbbd (result_gbbd, vec_result));
|
||||
+ /* For LE, we shift by 9 and get BA in the low two bytes then CTZ.
|
||||
+ For BE, we shift by 7 and get AB in the high two bytes then CLZ. */
|
||||
+ rtx result_shifted = gen_reg_rtx (V16QImode);
|
||||
+ int shift_amt = (BYTES_BIG_ENDIAN) ? 7 : 9;
|
||||
+ emit_insn (gen_altivec_vsldoi_v16qi (result_shifted,result_gbbd,result_gbbd, GEN_INT (shift_amt)));
|
||||
+
|
||||
+ rtx diffix_df = simplify_gen_subreg (DFmode, diffix, DImode, 0);
|
||||
+ emit_insn (gen_p8_mfvsrd_3_v16qi (diffix_df, result_shifted));
|
||||
+ rtx count = gen_reg_rtx (DImode);
|
||||
+
|
||||
+ if (BYTES_BIG_ENDIAN)
|
||||
+ emit_insn (gen_clzdi2 (count, diffix));
|
||||
+ else
|
||||
+ emit_insn (gen_ctzdi2 (count, diffix));
|
||||
+
|
||||
+ /* P8 doesn't have a good solution for extracting one byte from
|
||||
+ a vsx reg like vextublx on P9 so we just compute the offset
|
||||
+ of the differing byte and load it from each string. */
|
||||
+ do_add3 (off_reg, off_reg, count);
|
||||
+
|
||||
+ rtx chr1 = gen_reg_rtx (QImode);
|
||||
+ rtx chr2 = gen_reg_rtx (QImode);
|
||||
+ rtx addr1 = gen_rtx_PLUS (Pmode, s1addr, off_reg);
|
||||
+ do_load_for_compare_from_addr (QImode, chr1, addr1, orig_src1);
|
||||
+ rtx addr2 = gen_rtx_PLUS (Pmode, s2addr, off_reg);
|
||||
+ do_load_for_compare_from_addr (QImode, chr2, addr2, orig_src2);
|
||||
+ machine_mode rmode = GET_MODE (result);
|
||||
+ rtx chr1_rm = simplify_gen_subreg (rmode, chr1, QImode, 0);
|
||||
+ rtx chr2_rm = simplify_gen_subreg (rmode, chr2, QImode, 0);
|
||||
+ do_sub3 (result, chr1_rm, chr2_rm);
|
||||
+ }
|
||||
+
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
/* Expand a string compare operation with length, and return
|
||||
true if successful. Return false if we should let the
|
||||
compiler generate normal code, probably a strncmp call.
|
||||
@@ -2002,21 +2299,43 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
|
||||
gcc_assert (GET_MODE (target) == SImode);
|
||||
|
||||
- unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
|
||||
+ unsigned int required_align = 8;
|
||||
|
||||
unsigned HOST_WIDE_INT offset = 0;
|
||||
unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */
|
||||
unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */
|
||||
+
|
||||
if (no_length)
|
||||
- /* Use this as a standin to determine the mode to use. */
|
||||
- bytes = rs6000_string_compare_inline_limit * word_mode_size;
|
||||
+ bytes = rs6000_string_compare_inline_limit;
|
||||
else
|
||||
bytes = UINTVAL (bytes_rtx);
|
||||
|
||||
- machine_mode load_mode =
|
||||
- select_block_compare_mode (0, bytes, base_align);
|
||||
- unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
- compare_length = rs6000_string_compare_inline_limit * load_mode_size;
|
||||
+ /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
|
||||
+ least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
|
||||
+ at least POWER8. That way we can rely on overlapping compares to
|
||||
+ do the final comparison of less than 16 bytes. Also I do not want
|
||||
+ to deal with making this work for 32 bits. */
|
||||
+ int use_vec = (bytes >= 16 && !TARGET_32BIT && TARGET_EFFICIENT_UNALIGNED_VSX);
|
||||
+
|
||||
+ if (use_vec)
|
||||
+ required_align = 16;
|
||||
+
|
||||
+ machine_mode load_mode;
|
||||
+ rtx tmp_reg_src1, tmp_reg_src2;
|
||||
+ if (use_vec)
|
||||
+ {
|
||||
+ load_mode = V16QImode;
|
||||
+ tmp_reg_src1 = gen_reg_rtx (V16QImode);
|
||||
+ tmp_reg_src2 = gen_reg_rtx (V16QImode);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ load_mode = select_block_compare_mode (0, bytes, base_align);
|
||||
+ tmp_reg_src1 = gen_reg_rtx (word_mode);
|
||||
+ tmp_reg_src2 = gen_reg_rtx (word_mode);
|
||||
+ }
|
||||
+
|
||||
+ compare_length = rs6000_string_compare_inline_limit;
|
||||
|
||||
/* If we have equality at the end of the last compare and we have not
|
||||
found the end of the string, we need to call strcmp/strncmp to
|
||||
@@ -2040,10 +2359,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx final_move_label = gen_label_rtx ();
|
||||
rtx final_label = gen_label_rtx ();
|
||||
rtx begin_compare_label = NULL;
|
||||
- unsigned int required_align = 8;
|
||||
-
|
||||
- required_align = 8;
|
||||
-
|
||||
+
|
||||
if (base_align < required_align)
|
||||
{
|
||||
/* Generate code that checks distance to 4k boundary for this case. */
|
||||
@@ -2060,7 +2376,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
the subsequent code generation are in agreement so we do not
|
||||
go past the length we tested for a 4k boundary crossing. */
|
||||
unsigned HOST_WIDE_INT align_test = compare_length;
|
||||
- if (align_test < 8)
|
||||
+ if (align_test < required_align)
|
||||
{
|
||||
align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test);
|
||||
base_align = align_test;
|
||||
@@ -2102,7 +2418,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
else
|
||||
{
|
||||
/* -m32 -mpowerpc64 results in word_mode being DImode even
|
||||
- though otherwise it is 32-bit. The length arg to strncmp
|
||||
+ though otherwise it is 32-bit. The length arg to strncmp
|
||||
is a size_t which will be the same size as pointers. */
|
||||
rtx len_rtx = gen_reg_rtx (Pmode);
|
||||
emit_move_insn (len_rtx, gen_int_mode (bytes, Pmode));
|
||||
@@ -2124,17 +2440,32 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
}
|
||||
|
||||
rtx cleanup_label = NULL;
|
||||
- rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
|
||||
- rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
|
||||
+ rtx s1addr = NULL, s2addr = NULL, off_reg = NULL, vec_result = NULL;
|
||||
|
||||
/* Generate a sequence of GPR or VEC/VSX instructions to compare out
|
||||
to the length specified. */
|
||||
- expand_strncmp_gpr_sequence(compare_length, base_align,
|
||||
- orig_src1, orig_src2,
|
||||
- tmp_reg_src1, tmp_reg_src2,
|
||||
- result_reg,
|
||||
- equality_compare_rest,
|
||||
- cleanup_label, final_move_label);
|
||||
+ if (use_vec)
|
||||
+ {
|
||||
+ s1addr = gen_reg_rtx (Pmode);
|
||||
+ s2addr = gen_reg_rtx (Pmode);
|
||||
+ off_reg = gen_reg_rtx (Pmode);
|
||||
+ vec_result = gen_reg_rtx (load_mode);
|
||||
+ emit_move_insn (result_reg, GEN_INT (0));
|
||||
+ expand_strncmp_vec_sequence (compare_length,
|
||||
+ orig_src1, orig_src2,
|
||||
+ s1addr, s2addr, off_reg,
|
||||
+ tmp_reg_src1, tmp_reg_src2,
|
||||
+ vec_result,
|
||||
+ equality_compare_rest,
|
||||
+ &cleanup_label, final_move_label);
|
||||
+ }
|
||||
+ else
|
||||
+ expand_strncmp_gpr_sequence (compare_length, base_align,
|
||||
+ orig_src1, orig_src2,
|
||||
+ tmp_reg_src1, tmp_reg_src2,
|
||||
+ result_reg,
|
||||
+ equality_compare_rest,
|
||||
+ &cleanup_label, final_move_label);
|
||||
|
||||
offset = compare_length;
|
||||
|
||||
@@ -2174,7 +2505,12 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
if (cleanup_label)
|
||||
emit_label (cleanup_label);
|
||||
|
||||
- emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
|
||||
+ if (use_vec)
|
||||
+ emit_final_str_compare_vec (tmp_reg_src1, tmp_reg_src2, result_reg,
|
||||
+ s1addr, s2addr, orig_src1, orig_src2,
|
||||
+ off_reg, vec_result);
|
||||
+ else
|
||||
+ emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
|
||||
|
||||
emit_label (final_move_label);
|
||||
emit_insn (gen_movsi (target,
|
||||
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
|
||||
index ace8a477550..ad1b8a29ac6 100644
|
||||
--- a/gcc/config/rs6000/rs6000.opt
|
||||
+++ b/gcc/config/rs6000/rs6000.opt
|
||||
@@ -342,8 +342,8 @@ Target Report Var(rs6000_block_compare_inline_loop_limit) Init(-1) RejectNegativ
|
||||
Max number of bytes to compare with loops.
|
||||
|
||||
mstring-compare-inline-limit=
|
||||
-Target Report Var(rs6000_string_compare_inline_limit) Init(8) RejectNegative Joined UInteger Save
|
||||
-Max number of pairs of load insns for compare.
|
||||
+Target Report Var(rs6000_string_compare_inline_limit) Init(64) RejectNegative Joined UInteger Save
|
||||
+Max number of bytes to compare.
|
||||
|
||||
misel
|
||||
Target Report Mask(ISEL) Var(rs6000_isa_flags)
|
||||
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
|
||||
index e6921e96a3d..01fb4213001 100644
|
||||
--- a/gcc/config/rs6000/vsx.md
|
||||
+++ b/gcc/config/rs6000/vsx.md
|
||||
@@ -1429,7 +1429,7 @@
|
||||
}
|
||||
})
|
||||
|
||||
-(define_insn "*vsx_ld_elemrev_v16qi_internal"
|
||||
+(define_insn "vsx_ld_elemrev_v16qi_internal"
|
||||
[(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
|
||||
(vec_select:V16QI
|
||||
(match_operand:V16QI 1 "memory_operand" "Z")
|
||||
@@ -5107,6 +5107,22 @@
|
||||
"vcmpnezb %0,%1,%2"
|
||||
[(set_attr "type" "vecsimple")])
|
||||
|
||||
+;; Vector Compare Not Equal or Zero Byte predicate or record-form
|
||||
+(define_insn "vcmpnezb_p"
|
||||
+ [(set (reg:CC CR6_REGNO)
|
||||
+ (unspec:CC
|
||||
+ [(match_operand:V16QI 1 "altivec_register_operand" "v")
|
||||
+ (match_operand:V16QI 2 "altivec_register_operand" "v")]
|
||||
+ UNSPEC_VCMPNEZB))
|
||||
+ (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
|
||||
+ (unspec:V16QI
|
||||
+ [(match_dup 1)
|
||||
+ (match_dup 2)]
|
||||
+ UNSPEC_VCMPNEZB))]
|
||||
+ "TARGET_P9_VECTOR"
|
||||
+ "vcmpnezb. %0,%1,%2"
|
||||
+ [(set_attr "type" "vecsimple")])
|
||||
+
|
||||
;; Vector Compare Not Equal Half Word (specified/not+eq:)
|
||||
(define_insn "vcmpneh"
|
||||
[(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index f2dd12b3d73..291e414fea2 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -24165,12 +24165,10 @@ target-specific.
|
||||
|
||||
@item -mstring-compare-inline-limit=@var{num}
|
||||
@opindex mstring-compare-inline-limit
|
||||
-Generate at most @var{num} pairs of load instructions to compare the
|
||||
-string inline. If the difference or end of string is not found at the
|
||||
+Compare at most @var{num} string bytes with inline code.
|
||||
+If the difference or end of string is not found at the
|
||||
end of the inline compare a call to @code{strcmp} or @code{strncmp} will
|
||||
-take care of the rest of the comparison. The default is 8 pairs of
|
||||
-loads, which will compare 64 bytes on a 64-bit target and 32 bytes on a
|
||||
-32-bit target.
|
||||
+take care of the rest of the comparison. The default is 64 bytes.
|
||||
|
||||
@item -G @var{num}
|
||||
@opindex G
|
@ -1,40 +0,0 @@
|
||||
commit 6f1a7440d9aac59fba0f2e2d8d0a9a0b82f480cb
|
||||
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Tue Oct 2 17:31:53 2018 +0000
|
||||
|
||||
2018-10-02 Aaron Sawdey <acsawdey@linux.ibm.com>
|
||||
|
||||
PR target/87474
|
||||
* config/rs6000/rs6000-string.c (expand_strn_compare): Check that both
|
||||
P8_VECTOR and VSX are enabled.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@264799 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
||||
index ff0414586d0..9c25bad97a1 100644
|
||||
--- a/gcc/config/rs6000/rs6000-string.c
|
||||
+++ b/gcc/config/rs6000/rs6000-string.c
|
||||
@@ -2205,6 +2205,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
|
||||
}
|
||||
else
|
||||
{
|
||||
+ gcc_assert (TARGET_P8_VECTOR);
|
||||
rtx diffix = gen_reg_rtx (DImode);
|
||||
rtx result_gbbd = gen_reg_rtx (V16QImode);
|
||||
/* Since each byte of the input is either 00 or FF, the bytes in
|
||||
@@ -2313,9 +2314,12 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
/* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
|
||||
least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
|
||||
at least POWER8. That way we can rely on overlapping compares to
|
||||
- do the final comparison of less than 16 bytes. Also I do not want
|
||||
- to deal with making this work for 32 bits. */
|
||||
- int use_vec = (bytes >= 16 && !TARGET_32BIT && TARGET_EFFICIENT_UNALIGNED_VSX);
|
||||
+ do the final comparison of less than 16 bytes. Also I do not
|
||||
+ want to deal with making this work for 32 bits. In addition, we
|
||||
+ have to make sure that we have at least P8_VECTOR (we don't allow
|
||||
+ P9_VECTOR without P8_VECTOR). */
|
||||
+ int use_vec = (bytes >= 16 && !TARGET_32BIT
|
||||
+ && TARGET_EFFICIENT_UNALIGNED_VSX && TARGET_P8_VECTOR);
|
||||
|
||||
if (use_vec)
|
||||
required_align = 16;
|
@ -1,510 +0,0 @@
|
||||
commit 08869d85bd2a7ec5468b3bb3f01a930eb93e6381
|
||||
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Fri Oct 26 19:33:31 2018 +0000
|
||||
|
||||
2018-10-26 Aaron Sawdey <acsawdey@linux.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000-string.c (expand_strncmp_gpr_sequence): Change to
|
||||
a shorter sequence with fewer branches.
|
||||
(emit_final_str_compare_gpr): Ditto.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@265546 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
||||
index 9c25bad97a1..96729d9663c 100644
|
||||
--- a/gcc/config/rs6000/rs6000-string.c
|
||||
+++ b/gcc/config/rs6000/rs6000-string.c
|
||||
@@ -259,7 +259,7 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
|
||||
gcc_assert (mode == E_QImode);
|
||||
emit_move_insn (reg, mem);
|
||||
break;
|
||||
-
|
||||
+
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
break;
|
||||
@@ -726,7 +726,7 @@ expand_compare_loop (rtx operands[])
|
||||
{
|
||||
if (GET_MODE_SIZE (GET_MODE (bytes_rtx)) > GET_MODE_SIZE (word_mode))
|
||||
/* Do not expect length longer than word_mode. */
|
||||
- return false;
|
||||
+ return false;
|
||||
else if (GET_MODE_SIZE (GET_MODE (bytes_rtx)) < GET_MODE_SIZE (word_mode))
|
||||
{
|
||||
bytes_rtx = force_reg (GET_MODE (bytes_rtx), bytes_rtx);
|
||||
@@ -770,7 +770,7 @@ expand_compare_loop (rtx operands[])
|
||||
rtx j;
|
||||
|
||||
/* Example of generated code for 35 bytes aligned 1 byte.
|
||||
-
|
||||
+
|
||||
mtctr 8
|
||||
li 6,0
|
||||
li 5,8
|
||||
@@ -798,7 +798,7 @@ expand_compare_loop (rtx operands[])
|
||||
popcntd 9,9
|
||||
subfe 10,10,10
|
||||
or 9,9,10
|
||||
-
|
||||
+
|
||||
Compiled with -fno-reorder-blocks for clarity. */
|
||||
|
||||
/* Structure of what we're going to do:
|
||||
@@ -1041,7 +1041,7 @@ expand_compare_loop (rtx operands[])
|
||||
if (!bytes_is_const)
|
||||
{
|
||||
/* If we're dealing with runtime length, we have to check if
|
||||
- it's zero after the loop. When length is known at compile
|
||||
+ it's zero after the loop. When length is known at compile
|
||||
time the no-remainder condition is dealt with above. By
|
||||
doing this after cleanup_label, we also deal with the
|
||||
case where length is 0 at the start and we bypass the
|
||||
@@ -1411,7 +1411,7 @@ expand_block_compare (rtx operands[])
|
||||
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
|
||||
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
|
||||
/* P7/P8 code uses cond for subfc. but P9 uses
|
||||
- it for cmpld which needs CCUNSmode. */
|
||||
+ it for cmpld which needs CCUNSmode. */
|
||||
rtx cond;
|
||||
if (TARGET_P9_MISC)
|
||||
cond = gen_reg_rtx (CCUNSmode);
|
||||
@@ -1655,7 +1655,7 @@ expand_block_compare (rtx operands[])
|
||||
emit_label (convert_label);
|
||||
|
||||
/* We need to produce DI result from sub, then convert to target SI
|
||||
- while maintaining <0 / ==0 / >0 properties. This sequence works:
|
||||
+ while maintaining <0 / ==0 / >0 properties. This sequence works:
|
||||
subfc L,A,B
|
||||
subfe H,H,H
|
||||
popcntd L,L
|
||||
@@ -1740,7 +1740,7 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
|
||||
to strcmp/strncmp if we have equality at the end of the inline comparison.
|
||||
P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code
|
||||
to clean up and generate the final comparison result.
|
||||
- FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
set the final result. */
|
||||
static void
|
||||
expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
@@ -1763,12 +1763,9 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
while (bytes_to_compare > 0)
|
||||
{
|
||||
/* GPR compare sequence:
|
||||
- check each 8B with: ld/ld cmpd bne
|
||||
- If equal, use rldicr/cmpb to check for zero byte.
|
||||
+ check each 8B with: ld/ld/cmpb/cmpb/orc./bne
|
||||
+
|
||||
cleanup code at end:
|
||||
- cmpb get byte that differs
|
||||
- cmpb look for zero byte
|
||||
- orc combine
|
||||
cntlzd get bit of first zero/diff byte
|
||||
subfic convert for rldcl use
|
||||
rldcl rldcl extract diff/zero byte
|
||||
@@ -1776,7 +1773,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
|
||||
The last compare can branch around the cleanup code if the
|
||||
result is zero because the strings are exactly equal. */
|
||||
-
|
||||
+
|
||||
unsigned int align = compute_current_alignment (base_align, offset);
|
||||
load_mode = select_block_compare_mode (offset, bytes_to_compare, align);
|
||||
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
@@ -1801,34 +1798,49 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
rid of the extra bytes. */
|
||||
cmp_bytes = bytes_to_compare;
|
||||
|
||||
- rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
|
||||
+ rtx offset_reg = gen_reg_rtx (Pmode);
|
||||
+ emit_move_insn (offset_reg, GEN_INT (offset));
|
||||
+
|
||||
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, offset_reg);
|
||||
do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
|
||||
- rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
|
||||
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, offset_reg);
|
||||
do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
|
||||
|
||||
/* We must always left-align the data we read, and
|
||||
clear any bytes to the right that are beyond the string.
|
||||
Otherwise the cmpb sequence won't produce the correct
|
||||
- results. The beginning of the compare will be done
|
||||
- with word_mode so will not have any extra shifts or
|
||||
- clear rights. */
|
||||
+ results. However if there is only one byte left, we
|
||||
+ can just subtract to get the final result so the shifts
|
||||
+ and clears are not needed. */
|
||||
|
||||
- if (load_mode_size < word_mode_size)
|
||||
- {
|
||||
- /* Rotate left first. */
|
||||
- rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
||||
- do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
||||
- do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
||||
- }
|
||||
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
||||
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
+ /* Loading just a single byte is a special case. If we are
|
||||
+ loading more than that, we have to check whether we are
|
||||
+ looking at the entire chunk of data. If not, rotate left and
|
||||
+ clear right so that bytes we aren't supposed to look at are
|
||||
+ zeroed, and the first byte we are supposed to compare is
|
||||
+ leftmost. */
|
||||
+ if (load_mode_size != 1)
|
||||
{
|
||||
- /* Now clear right. This plus the rotate can be
|
||||
- turned into a rldicr instruction. */
|
||||
- HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
||||
- do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
||||
+ if (load_mode_size < word_mode_size)
|
||||
+ {
|
||||
+ /* Rotate left first. */
|
||||
+ rtx sh = GEN_INT (BITS_PER_UNIT
|
||||
+ * (word_mode_size - load_mode_size));
|
||||
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
||||
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
||||
+ }
|
||||
+
|
||||
+ if (cmp_bytes < word_mode_size)
|
||||
+ {
|
||||
+ /* Now clear right. This plus the rotate can be
|
||||
+ turned into a rldicr instruction. */
|
||||
+ HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
||||
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Cases to handle. A and B are chunks of the two strings.
|
||||
@@ -1842,8 +1854,6 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
A == B: branch to result 0.
|
||||
A != B: cleanup code to compute result. */
|
||||
|
||||
- unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
||||
-
|
||||
rtx dst_label;
|
||||
if (remain > 0 || equality_compare_rest)
|
||||
{
|
||||
@@ -1857,54 +1867,89 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
/* Branch to end and produce result of 0. */
|
||||
dst_label = final_move_label;
|
||||
|
||||
- rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
||||
- rtx cond = gen_reg_rtx (CCmode);
|
||||
+ if (load_mode_size == 1)
|
||||
+ {
|
||||
+ /* Special case for comparing just single byte. */
|
||||
+ if (equality_compare_rest)
|
||||
+ {
|
||||
+ /* Use subf./bne to branch to final_move_label if the
|
||||
+ byte differs, otherwise fall through to the strncmp
|
||||
+ call. We must also check for a zero byte here as we
|
||||
+ must not make the library call if this is the end of
|
||||
+ the string. */
|
||||
+
|
||||
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
+ rtx cond = gen_reg_rtx (CCmode);
|
||||
+ rtx diff_rtx = gen_rtx_MINUS (word_mode,
|
||||
+ tmp_reg_src1, tmp_reg_src2);
|
||||
+ rs6000_emit_dot_insn (result_reg, diff_rtx, 2, cond);
|
||||
+ rtx cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
||||
+
|
||||
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
+ lab_ref, pc_rtx);
|
||||
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
+ JUMP_LABEL (j) = final_move_label;
|
||||
+ LABEL_NUSES (final_move_label) += 1;
|
||||
|
||||
- /* Always produce the 0 result, it is needed if
|
||||
- cmpb finds a 0 byte in this chunk. */
|
||||
- rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
|
||||
- rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
|
||||
+ /* Check for zero byte here before fall through to
|
||||
+ library call. This catches the case where the
|
||||
+ strings are equal and end in a zero byte at this
|
||||
+ position. */
|
||||
|
||||
- rtx cmp_rtx;
|
||||
- if (remain == 0 && !equality_compare_rest)
|
||||
- cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
|
||||
- else
|
||||
- cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
||||
+ rtx cond0 = gen_reg_rtx (CCmode);
|
||||
+ emit_move_insn (cond0, gen_rtx_COMPARE (CCmode, tmp_reg_src1,
|
||||
+ const0_rtx));
|
||||
|
||||
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
- lab_ref, pc_rtx);
|
||||
- rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
- JUMP_LABEL (j) = dst_label;
|
||||
- LABEL_NUSES (dst_label) += 1;
|
||||
+ rtx cmp0eq_rtx = gen_rtx_EQ (VOIDmode, cond0, const0_rtx);
|
||||
|
||||
- if (remain > 0 || equality_compare_rest)
|
||||
+ rtx ifelse0 = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp0eq_rtx,
|
||||
+ lab_ref, pc_rtx);
|
||||
+ rtx j0 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse0));
|
||||
+ JUMP_LABEL (j0) = final_move_label;
|
||||
+ LABEL_NUSES (final_move_label) += 1;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* This is the last byte to be compared so we can use
|
||||
+ subf to compute the final result and branch
|
||||
+ unconditionally to final_move_label. */
|
||||
+
|
||||
+ do_sub3 (result_reg, tmp_reg_src1, tmp_reg_src2);
|
||||
+
|
||||
+ rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
|
||||
+ JUMP_LABEL (j) = final_move_label;
|
||||
+ LABEL_NUSES (final_move_label) += 1;
|
||||
+ emit_barrier ();
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
{
|
||||
- /* Generate a cmpb to test for a 0 byte and branch
|
||||
- to final result if found. */
|
||||
rtx cmpb_zero = gen_reg_rtx (word_mode);
|
||||
- rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
- rtx condz = gen_reg_rtx (CCmode);
|
||||
+ rtx cmpb_diff = gen_reg_rtx (word_mode);
|
||||
rtx zero_reg = gen_reg_rtx (word_mode);
|
||||
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
||||
+ rtx cond = gen_reg_rtx (CCmode);
|
||||
+
|
||||
emit_move_insn (zero_reg, GEN_INT (0));
|
||||
+ do_cmpb3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2);
|
||||
do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
|
||||
+ rtx not_diff = gen_rtx_NOT (word_mode, cmpb_diff);
|
||||
+ rtx orc_rtx = gen_rtx_IOR (word_mode, not_diff, cmpb_zero);
|
||||
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
- {
|
||||
- /* Don't want to look at zero bytes past end. */
|
||||
- HOST_WIDE_INT mb =
|
||||
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- do_and3 (cmpb_zero, cmpb_zero, mask);
|
||||
- }
|
||||
+ rs6000_emit_dot_insn (result_reg, orc_rtx, 2, cond);
|
||||
|
||||
- emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
|
||||
- rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
|
||||
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
|
||||
- lab_ref_fin, pc_rtx);
|
||||
- rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
- JUMP_LABEL (j2) = final_move_label;
|
||||
- LABEL_NUSES (final_move_label) += 1;
|
||||
+ rtx cmp_rtx;
|
||||
+ if (remain == 0 && !equality_compare_rest)
|
||||
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
|
||||
+ else
|
||||
+ cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
||||
|
||||
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
+ lab_ref, pc_rtx);
|
||||
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
+ JUMP_LABEL (j) = dst_label;
|
||||
+ LABEL_NUSES (dst_label) += 1;
|
||||
}
|
||||
|
||||
offset += cmp_bytes;
|
||||
@@ -1915,7 +1960,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
return;
|
||||
}
|
||||
|
||||
-/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
|
||||
+/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
|
||||
instructions.
|
||||
|
||||
BYTES_TO_COMPARE is the number of bytes to be compared.
|
||||
@@ -1931,7 +1976,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
to strcmp/strncmp if we have equality at the end of the inline comparison.
|
||||
P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code to clean up
|
||||
and generate the final comparison result.
|
||||
- FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
set the final result. */
|
||||
static void
|
||||
expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
@@ -1982,12 +2027,12 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
bne 6,.Lmismatch
|
||||
|
||||
Use the overlapping compare trick for the last block if it is
|
||||
- less than 16 bytes.
|
||||
+ less than 16 bytes.
|
||||
*/
|
||||
|
||||
load_mode = V16QImode;
|
||||
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
-
|
||||
+
|
||||
if (bytes_to_compare >= load_mode_size)
|
||||
cmp_bytes = load_mode_size;
|
||||
else
|
||||
@@ -2046,10 +2091,10 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
if (branch_to_cleanup)
|
||||
{
|
||||
/* Branch to cleanup code, otherwise fall through to do more
|
||||
- compares. P8 and P9 use different CR bits because on P8
|
||||
+ compares. P8 and P9 use different CR bits because on P8
|
||||
we are looking at the result of a comparsion vs a
|
||||
register of zeroes so the all-true condition means no
|
||||
- difference or zero was found. On P9, vcmpnezb sets a byte
|
||||
+ difference or zero was found. On P9, vcmpnezb sets a byte
|
||||
to 0xff if there is a mismatch or zero, so the all-false
|
||||
condition indicates we found no difference or zero. */
|
||||
if (!cleanup_label)
|
||||
@@ -2062,7 +2107,7 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
}
|
||||
else
|
||||
{
|
||||
- /* Branch to final return or fall through to cleanup,
|
||||
+ /* Branch to final return or fall through to cleanup,
|
||||
result is already set to 0. */
|
||||
dst_label = final_move_label;
|
||||
if (TARGET_P9_VECTOR)
|
||||
@@ -2088,10 +2133,7 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
/* Generate the final sequence that identifies the differing
|
||||
byte and generates the final result, taking into account
|
||||
zero bytes:
|
||||
-
|
||||
- cmpb cmpb_result1, src1, src2
|
||||
- cmpb cmpb_result2, src1, zero
|
||||
- orc cmpb_result1, cmp_result1, cmpb_result2
|
||||
+
|
||||
cntlzd get bit of first zero/diff byte
|
||||
addi convert for rldcl use
|
||||
rldcl rldcl extract diff/zero byte
|
||||
@@ -2105,10 +2147,7 @@ static void
|
||||
emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
{
|
||||
machine_mode m = GET_MODE (str1);
|
||||
- rtx cmpb_diff = gen_reg_rtx (m);
|
||||
- rtx cmpb_zero = gen_reg_rtx (m);
|
||||
rtx rot_amt = gen_reg_rtx (m);
|
||||
- rtx zero_reg = gen_reg_rtx (m);
|
||||
|
||||
rtx rot1_1 = gen_reg_rtx (m);
|
||||
rtx rot1_2 = gen_reg_rtx (m);
|
||||
@@ -2117,12 +2156,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
|
||||
if (m == SImode)
|
||||
{
|
||||
- emit_insn (gen_cmpbsi3 (cmpb_diff, str1, str2));
|
||||
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbsi3 (cmpb_zero, str1, zero_reg));
|
||||
- emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
|
||||
- emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
- emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
|
||||
+ emit_insn (gen_clzsi2 (rot_amt, result));
|
||||
emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
emit_insn (gen_rotlsi3 (rot1_1, str1,
|
||||
gen_lowpart (SImode, rot_amt)));
|
||||
@@ -2134,12 +2168,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
}
|
||||
else if (m == DImode)
|
||||
{
|
||||
- emit_insn (gen_cmpbdi3 (cmpb_diff, str1, str2));
|
||||
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbdi3 (cmpb_zero, str1, zero_reg));
|
||||
- emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
|
||||
- emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
- emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
|
||||
+ emit_insn (gen_clzdi2 (rot_amt, result));
|
||||
emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
emit_insn (gen_rotldi3 (rot1_1, str1,
|
||||
gen_lowpart (SImode, rot_amt)));
|
||||
@@ -2151,7 +2180,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
-
|
||||
+
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2169,10 +2198,10 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
lbzx 10,28,9 # use that offset to load differing byte
|
||||
lbzx 3,29,9
|
||||
subf 3,3,10 # subtract for final result
|
||||
-
|
||||
+
|
||||
P9:
|
||||
vclzlsbb # counts trailing bytes with lsb=0
|
||||
- vextublx # extract differing byte
|
||||
+ vextublx # extract differing byte
|
||||
|
||||
STR1 is the reg rtx for data from string 1.
|
||||
STR2 is the reg rtx for data from string 2.
|
||||
@@ -2208,7 +2237,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
|
||||
gcc_assert (TARGET_P8_VECTOR);
|
||||
rtx diffix = gen_reg_rtx (DImode);
|
||||
rtx result_gbbd = gen_reg_rtx (V16QImode);
|
||||
- /* Since each byte of the input is either 00 or FF, the bytes in
|
||||
+ /* Since each byte of the input is either 00 or FF, the bytes in
|
||||
dw0 and dw1 after vgbbd are all identical to each other. */
|
||||
emit_insn (gen_p8v_vgbbd (result_gbbd, vec_result));
|
||||
/* For LE, we shift by 9 and get BA in the low two bytes then CTZ.
|
||||
@@ -2226,7 +2255,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
|
||||
else
|
||||
emit_insn (gen_ctzdi2 (count, diffix));
|
||||
|
||||
- /* P8 doesn't have a good solution for extracting one byte from
|
||||
+ /* P8 doesn't have a good solution for extracting one byte from
|
||||
a vsx reg like vextublx on P9 so we just compute the offset
|
||||
of the differing byte and load it from each string. */
|
||||
do_add3 (off_reg, off_reg, count);
|
||||
@@ -2247,7 +2276,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
|
||||
}
|
||||
|
||||
/* Expand a string compare operation with length, and return
|
||||
- true if successful. Return false if we should let the
|
||||
+ true if successful. Return false if we should let the
|
||||
compiler generate normal code, probably a strncmp call.
|
||||
|
||||
OPERANDS[0] is the target (result).
|
||||
@@ -2279,9 +2308,9 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
||||
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
||||
|
||||
- /* If we have a length, it must be constant. This simplifies things
|
||||
+ /* If we have a length, it must be constant. This simplifies things
|
||||
a bit as we don't have to generate code to check if we've exceeded
|
||||
- the length. Later this could be expanded to handle this case. */
|
||||
+ the length. Later this could be expanded to handle this case. */
|
||||
if (!no_length && !CONST_INT_P (bytes_rtx))
|
||||
return false;
|
||||
|
||||
@@ -2311,7 +2340,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
else
|
||||
bytes = UINTVAL (bytes_rtx);
|
||||
|
||||
- /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
|
||||
+ /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
|
||||
least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
|
||||
at least POWER8. That way we can rely on overlapping compares to
|
||||
do the final comparison of less than 16 bytes. Also I do not
|
||||
@@ -2363,7 +2392,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx final_move_label = gen_label_rtx ();
|
||||
rtx final_label = gen_label_rtx ();
|
||||
rtx begin_compare_label = NULL;
|
||||
-
|
||||
+
|
||||
if (base_align < required_align)
|
||||
{
|
||||
/* Generate code that checks distance to 4k boundary for this case. */
|
||||
@@ -2472,7 +2501,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
&cleanup_label, final_move_label);
|
||||
|
||||
offset = compare_length;
|
||||
-
|
||||
+
|
||||
if (equality_compare_rest)
|
||||
{
|
||||
/* Update pointers past what has been compared already. */
|
406
SOURCES/gcc8-rh1668903-1.patch
Normal file
406
SOURCES/gcc8-rh1668903-1.patch
Normal file
@ -0,0 +1,406 @@
|
||||
commit 126dab7c9d84294f256b1f7bf91c24a9e7103249
|
||||
Author: qinzhao <qinzhao@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Thu Nov 29 16:06:03 2018 +0000
|
||||
|
||||
Add a new option -flive-patching={inline-only-static|inline-clone}
|
||||
to support live patching in GCC.
|
||||
|
||||
2018-11-29 qing zhao <qing.zhao@oracle.com>
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* cif-code.def (EXTERN_LIVE_ONLY_STATIC): New CIF code.
|
||||
* common.opt: Add -flive-patching flag.
|
||||
* doc/invoke.texi: Document -flive-patching.
|
||||
* flag-types.h (enum live_patching_level): New enum.
|
||||
* ipa-inline.c (can_inline_edge_p): Disable external functions from
|
||||
inlining when flag_live_patching is LIVE_PATCHING_INLINE_ONLY_STATIC.
|
||||
* opts.c (control_options_for_live_patching): New function.
|
||||
(finish_options): Make flag_live_patching incompatible with flag_lto.
|
||||
Control IPA optimizations based on different levels of
|
||||
flag_live_patching.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.dg/live-patching-1.c: New test.
|
||||
* gcc.dg/live-patching-2.c: New test.
|
||||
* gcc.dg/live-patching-3.c: New test.
|
||||
* gcc.dg/tree-ssa/writeonly-3.c: New test.
|
||||
* gcc.target/i386/ipa-stack-alignment-2.c: New test.
|
||||
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@266627 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
--- gcc/cif-code.def
|
||||
+++ gcc/cif-code.def
|
||||
@@ -132,6 +132,12 @@ DEFCIFCODE(USES_COMDAT_LOCAL, CIF_FINAL_ERROR,
|
||||
DEFCIFCODE(ATTRIBUTE_MISMATCH, CIF_FINAL_ERROR,
|
||||
N_("function attribute mismatch"))
|
||||
|
||||
+/* We can't inline because the user requests only static functions
|
||||
+ but the function has external linkage for live patching purpose. */
|
||||
+DEFCIFCODE(EXTERN_LIVE_ONLY_STATIC, CIF_FINAL_ERROR,
|
||||
+ N_("function has external linkage when the user requests only"
|
||||
+ " inlining static for live patching"))
|
||||
+
|
||||
/* We proved that the call is unreachable. */
|
||||
DEFCIFCODE(UNREACHABLE, CIF_FINAL_ERROR,
|
||||
N_("unreachable"))
|
||||
--- gcc/common.opt
|
||||
+++ gcc/common.opt
|
||||
@@ -2181,6 +2181,24 @@ starts and when the destructor finishes.
|
||||
flifetime-dse=
|
||||
Common Joined RejectNegative UInteger Var(flag_lifetime_dse) Optimization IntegerRange(0, 2)
|
||||
|
||||
+flive-patching
|
||||
+Common RejectNegative Alias(flive-patching=,inline-clone) Optimization
|
||||
+
|
||||
+flive-patching=
|
||||
+Common Report Joined RejectNegative Enum(live_patching_level) Var(flag_live_patching) Init(LIVE_PATCHING_NONE) Optimization
|
||||
+-flive-patching=[inline-only-static|inline-clone] Control IPA
|
||||
+optimizations to provide a safe compilation for live-patching. At the same
|
||||
+time, provides multiple-level control on the enabled IPA optimizations.
|
||||
+
|
||||
+Enum
|
||||
+Name(live_patching_level) Type(enum live_patching_level) UnknownError(unknown Live-Patching Level %qs)
|
||||
+
|
||||
+EnumValue
|
||||
+Enum(live_patching_level) String(inline-only-static) Value(LIVE_PATCHING_INLINE_ONLY_STATIC)
|
||||
+
|
||||
+EnumValue
|
||||
+Enum(live_patching_level) String(inline-clone) Value(LIVE_PATCHING_INLINE_CLONE)
|
||||
+
|
||||
flive-range-shrinkage
|
||||
Common Report Var(flag_live_range_shrinkage) Init(0) Optimization
|
||||
Relief of register pressure through live range shrinkage.
|
||||
--- gcc/doc/invoke.texi
|
||||
+++ gcc/doc/invoke.texi
|
||||
@@ -389,6 +389,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-fipa-bit-cp -fipa-vrp @gol
|
||||
-fipa-pta -fipa-profile -fipa-pure-const -fipa-reference -fipa-icf @gol
|
||||
-fira-algorithm=@var{algorithm} @gol
|
||||
+-flive-patching=@var{level} @gol
|
||||
-fira-region=@var{region} -fira-hoist-pressure @gol
|
||||
-fira-loop-pressure -fno-ira-share-save-slots @gol
|
||||
-fno-ira-share-spill-slots @gol
|
||||
@@ -9291,6 +9292,65 @@ equivalences that are found only by GCC and equivalences found only by Gold.
|
||||
|
||||
This flag is enabled by default at @option{-O2} and @option{-Os}.
|
||||
|
||||
+@item -flive-patching=@var{level}
|
||||
+@opindex flive-patching
|
||||
+Control GCC's optimizations to provide a safe compilation for live-patching.
|
||||
+
|
||||
+If the compiler's optimization uses a function's body or information extracted
|
||||
+from its body to optimize/change another function, the latter is called an
|
||||
+impacted function of the former. If a function is patched, its impacted
|
||||
+functions should be patched too.
|
||||
+
|
||||
+The impacted functions are decided by the compiler's interprocedural
|
||||
+optimizations. For example, inlining a function into its caller, cloning
|
||||
+a function and changing its caller to call this new clone, or extracting
|
||||
+a function's pureness/constness information to optimize its direct or
|
||||
+indirect callers, etc.
|
||||
+
|
||||
+Usually, the more IPA optimizations enabled, the larger the number of
|
||||
+impacted functions for each function. In order to control the number of
|
||||
+impacted functions and computed the list of impacted function easily,
|
||||
+we provide control to partially enable IPA optimizations on two different
|
||||
+levels.
|
||||
+
|
||||
+The @var{level} argument should be one of the following:
|
||||
+
|
||||
+@table @samp
|
||||
+
|
||||
+@item inline-clone
|
||||
+
|
||||
+Only enable inlining and cloning optimizations, which includes inlining,
|
||||
+cloning, interprocedural scalar replacement of aggregates and partial inlining.
|
||||
+As a result, when patching a function, all its callers and its clones'
|
||||
+callers need to be patched as well.
|
||||
+
|
||||
+@option{-flive-patching=inline-clone} disables the following optimization flags:
|
||||
+@gccoptlist{-fwhole-program -fipa-pta -fipa-reference -fipa-ra @gol
|
||||
+-fipa-icf -fipa-icf-functions -fipa-icf-variables @gol
|
||||
+-fipa-bit-cp -fipa-vrp -fipa-pure-const -fipa-reference-addressable @gol
|
||||
+-fipa-stack-alignment}
|
||||
+
|
||||
+@item inline-only-static
|
||||
+
|
||||
+Only enable inlining of static functions.
|
||||
+As a result, when patching a static function, all its callers need to be
|
||||
+patches as well.
|
||||
+
|
||||
+In addition to all the flags that -flive-patching=inline-clone disables,
|
||||
+@option{-flive-patching=inline-only-static} disables the following additional
|
||||
+optimization flags:
|
||||
+@gccoptlist{-fipa-cp-clone -fipa-sra -fpartial-inlining -fipa-cp}
|
||||
+
|
||||
+@end table
|
||||
+
|
||||
+When -flive-patching specified without any value, the default value
|
||||
+is "inline-clone".
|
||||
+
|
||||
+This flag is disabled by default.
|
||||
+
|
||||
+Note that -flive-patching is not supported with link-time optimizer.
|
||||
+(@option{-flto}).
|
||||
+
|
||||
@item -fisolate-erroneous-paths-dereference
|
||||
@opindex fisolate-erroneous-paths-dereference
|
||||
Detect paths that trigger erroneous or undefined behavior due to
|
||||
--- gcc/flag-types.h
|
||||
+++ gcc/flag-types.h
|
||||
@@ -123,6 +123,14 @@ enum stack_reuse_level
|
||||
SR_ALL
|
||||
};
|
||||
|
||||
+/* The live patching level. */
|
||||
+enum live_patching_level
|
||||
+{
|
||||
+ LIVE_PATCHING_NONE = 0,
|
||||
+ LIVE_PATCHING_INLINE_ONLY_STATIC,
|
||||
+ LIVE_PATCHING_INLINE_CLONE
|
||||
+};
|
||||
+
|
||||
/* The algorithm used for basic block reordering. */
|
||||
enum reorder_blocks_algorithm
|
||||
{
|
||||
--- gcc/ipa-inline.c
|
||||
+++ gcc/ipa-inline.c
|
||||
@@ -379,6 +379,12 @@ can_inline_edge_p (struct cgraph_edge *e, bool report,
|
||||
e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
|
||||
inlinable = false;
|
||||
}
|
||||
+ else if (callee->externally_visible
|
||||
+ && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
|
||||
+ {
|
||||
+ e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
|
||||
+ inlinable = false;
|
||||
+ }
|
||||
if (!inlinable && report)
|
||||
report_inline_failed_reason (e);
|
||||
return inlinable;
|
||||
--- gcc/opts.c
|
||||
+++ gcc/opts.c
|
||||
@@ -699,6 +699,152 @@ default_options_optimization (struct gcc
|
||||
lang_mask, handlers, loc, dc);
|
||||
}
|
||||
|
||||
+/* Control IPA optimizations based on different live patching LEVEL. */
|
||||
+static void
|
||||
+control_options_for_live_patching (struct gcc_options *opts,
|
||||
+ struct gcc_options *opts_set,
|
||||
+ enum live_patching_level level,
|
||||
+ location_t loc)
|
||||
+{
|
||||
+ gcc_assert (level > LIVE_PATCHING_NONE);
|
||||
+
|
||||
+ switch (level)
|
||||
+ {
|
||||
+ case LIVE_PATCHING_INLINE_ONLY_STATIC:
|
||||
+ if (opts_set->x_flag_ipa_cp_clone && opts->x_flag_ipa_cp_clone)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-cp-clone%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_cp_clone = 0;
|
||||
+
|
||||
+ if (opts_set->x_flag_ipa_sra && opts->x_flag_ipa_sra)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-sra%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_sra = 0;
|
||||
+
|
||||
+ if (opts_set->x_flag_partial_inlining && opts->x_flag_partial_inlining)
|
||||
+ error_at (loc,
|
||||
+ "%<-fpartial-inlining%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static%>");
|
||||
+ else
|
||||
+ opts->x_flag_partial_inlining = 0;
|
||||
+
|
||||
+ if (opts_set->x_flag_ipa_cp && opts->x_flag_ipa_cp)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-cp%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_cp = 0;
|
||||
+
|
||||
+ /* FALLTHROUGH. */
|
||||
+ case LIVE_PATCHING_INLINE_CLONE:
|
||||
+ /* live patching should disable whole-program optimization. */
|
||||
+ if (opts_set->x_flag_whole_program && opts->x_flag_whole_program)
|
||||
+ error_at (loc,
|
||||
+ "%<-fwhole-program%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_whole_program = 0;
|
||||
+
|
||||
+ /* visibility change should be excluded by !flag_whole_program
|
||||
+ && !in_lto_p && !flag_ipa_cp_clone && !flag_ipa_sra
|
||||
+ && !flag_partial_inlining. */
|
||||
+
|
||||
+ if (opts_set->x_flag_ipa_pta && opts->x_flag_ipa_pta)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-pta%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_pta = 0;
|
||||
+
|
||||
+ if (opts_set->x_flag_ipa_reference && opts->x_flag_ipa_reference)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-reference%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_reference = 0;
|
||||
+
|
||||
+ if (opts_set->x_flag_ipa_ra && opts->x_flag_ipa_ra)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-ra%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_ra = 0;
|
||||
+
|
||||
+ if (opts_set->x_flag_ipa_icf && opts->x_flag_ipa_icf)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-icf%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_icf = 0;
|
||||
+
|
||||
+ if (opts_set->x_flag_ipa_icf_functions && opts->x_flag_ipa_icf_functions)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-icf-functions%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_icf_functions = 0;
|
||||
+
|
||||
+ if (opts_set->x_flag_ipa_icf_variables && opts->x_flag_ipa_icf_variables)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-icf-variables%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_icf_variables = 0;
|
||||
+
|
||||
+ if (opts_set->x_flag_ipa_bit_cp && opts->x_flag_ipa_bit_cp)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-bit-cp%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_bit_cp = 0;
|
||||
+
|
||||
+ if (opts_set->x_flag_ipa_vrp && opts->x_flag_ipa_vrp)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-vrp%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_vrp = 0;
|
||||
+
|
||||
+ if (opts_set->x_flag_ipa_pure_const && opts->x_flag_ipa_pure_const)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-pure-const%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_pure_const = 0;
|
||||
+
|
||||
+ /* FIXME: disable unreachable code removal. */
|
||||
+
|
||||
+ /* discovery of functions/variables with no address taken. */
|
||||
+// GCC 8 doesn't have these options.
|
||||
+#if 0
|
||||
+ if (opts_set->x_flag_ipa_reference_addressable
|
||||
+ && opts->x_flag_ipa_reference_addressable)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-reference-addressable%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_reference_addressable = 0;
|
||||
+
|
||||
+ /* ipa stack alignment propagation. */
|
||||
+ if (opts_set->x_flag_ipa_stack_alignment
|
||||
+ && opts->x_flag_ipa_stack_alignment)
|
||||
+ error_at (loc,
|
||||
+ "%<-fipa-stack-alignment%> is incompatible with "
|
||||
+ "%<-flive-patching=inline-only-static|inline-clone%>");
|
||||
+ else
|
||||
+ opts->x_flag_ipa_stack_alignment = 0;
|
||||
+#endif
|
||||
+
|
||||
+ break;
|
||||
+ default:
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* After all options at LOC have been read into OPTS and OPTS_SET,
|
||||
finalize settings of those options and diagnose incompatible
|
||||
combinations. */
|
||||
@@ -1057,6 +1203,18 @@ finish_options (struct gcc_options *opts
|
||||
sorry ("transactional memory is not supported with "
|
||||
"%<-fsanitize=kernel-address%>");
|
||||
|
||||
+ /* Currently live patching is not support for LTO. */
|
||||
+ if (opts->x_flag_live_patching && opts->x_flag_lto)
|
||||
+ sorry ("live patching is not supported with LTO");
|
||||
+
|
||||
+ /* Control IPA optimizations based on different -flive-patching level. */
|
||||
+ if (opts->x_flag_live_patching)
|
||||
+ {
|
||||
+ control_options_for_live_patching (opts, opts_set,
|
||||
+ opts->x_flag_live_patching,
|
||||
+ loc);
|
||||
+ }
|
||||
+
|
||||
/* Comes from final.c -- no real reason to change it. */
|
||||
#define MAX_CODE_ALIGN 16
|
||||
#define MAX_CODE_ALIGN_VALUE (1 << MAX_CODE_ALIGN)
|
||||
--- /dev/null
|
||||
+++ gcc/testsuite/gcc.dg/live-patching-1.c
|
||||
@@ -0,0 +1,22 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -flive-patching=inline-only-static -fdump-ipa-inline" } */
|
||||
+
|
||||
+extern int sum, n, m;
|
||||
+
|
||||
+int foo (int a)
|
||||
+{
|
||||
+ return a + n;
|
||||
+}
|
||||
+
|
||||
+static int bar (int b)
|
||||
+{
|
||||
+ return b * m;
|
||||
+}
|
||||
+
|
||||
+int main()
|
||||
+{
|
||||
+ sum = foo (m) + bar (n);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-ipa-dump "foo/0 function has external linkage when the user requests only inlining static for live patching" "inline" } } */
|
||||
--- /dev/null
|
||||
+++ gcc/testsuite/gcc.dg/live-patching-2.c
|
||||
@@ -0,0 +1,9 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -flive-patching -flto" } */
|
||||
+
|
||||
+int main()
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-message "sorry, unimplemented: live patching is not supported with LTO" "-flive-patching and -flto together" { target *-*-* } 0 } */
|
||||
--- /dev/null
|
||||
+++ gcc/testsuite/gcc.dg/live-patching-3.c
|
||||
@@ -0,0 +1,9 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O1 -flive-patching -fwhole-program" } */
|
||||
+
|
||||
+int main()
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-message "'-fwhole-program' is incompatible with '-flive-patching=inline-only-static|inline-clone’" "" {target "*-*-*"} 0 } */
|
73
SOURCES/gcc8-rh1668903-2.patch
Normal file
73
SOURCES/gcc8-rh1668903-2.patch
Normal file
@ -0,0 +1,73 @@
|
||||
commit 9939b2f79bd9b75b99080a17f3d6f1214d543477
|
||||
Author: qinzhao <qinzhao@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Wed Apr 3 19:00:25 2019 +0000
|
||||
|
||||
2019-04-03 qing zhao <qing.zhao@oracle.com>
|
||||
|
||||
PR tree-optimization/89730
|
||||
* ipa-inline.c (can_inline_edge_p): Delete the checking for
|
||||
-flive-patching=inline-only-static.
|
||||
(can_inline_edge_by_limits_p): Add the checking for
|
||||
-flive-patching=inline-only-static and grant always_inline
|
||||
even when -flive-patching=inline-only-static is specified.
|
||||
|
||||
* gcc.dg/live-patching-4.c: New test.
|
||||
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@270134 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
--- gcc/ipa-inline.c
|
||||
+++ gcc/ipa-inline.c
|
||||
@@ -385,12 +385,6 @@ can_inline_edge_p (struct cgraph_edge *e, bool report,
|
||||
e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
|
||||
inlinable = false;
|
||||
}
|
||||
- else if (callee->externally_visible
|
||||
- && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
|
||||
- {
|
||||
- e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
|
||||
- inlinable = false;
|
||||
- }
|
||||
if (!inlinable && report)
|
||||
report_inline_failed_reason (e);
|
||||
return inlinable;
|
||||
@@ -433,6 +427,13 @@ can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
|
||||
DECL_ATTRIBUTES (caller->decl))
|
||||
&& !caller_growth_limits (e))
|
||||
inlinable = false;
|
||||
+ else if (callee->externally_visible
|
||||
+ && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
|
||||
+ && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
|
||||
+ {
|
||||
+ e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
|
||||
+ inlinable = false;
|
||||
+ }
|
||||
/* Don't inline a function with a higher optimization level than the
|
||||
caller. FIXME: this is really just tip of iceberg of handling
|
||||
optimization attribute. */
|
||||
--- /dev/null
|
||||
+++ gcc/testsuite/gcc.dg/live-patching-4.c
|
||||
@@ -0,0 +1,23 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -flive-patching=inline-only-static -fdump-tree-einline-optimized" } */
|
||||
+
|
||||
+extern int sum, n, m;
|
||||
+
|
||||
+extern inline __attribute__((always_inline)) int foo (int a);
|
||||
+inline __attribute__((always_inline)) int foo (int a)
|
||||
+{
|
||||
+ return a + n;
|
||||
+}
|
||||
+
|
||||
+static int bar (int b)
|
||||
+{
|
||||
+ return b * m;
|
||||
+}
|
||||
+
|
||||
+int main()
|
||||
+{
|
||||
+ sum = foo (m) + bar (n);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump "Inlining foo into main" "einline" } } */
|
85
SOURCES/gcc8-rh1668903-3.patch
Normal file
85
SOURCES/gcc8-rh1668903-3.patch
Normal file
@ -0,0 +1,85 @@
|
||||
commit 77e6311332590004c5aec82ceeb45e4d4d93f690
|
||||
Author: redi <redi@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Thu Apr 11 08:52:22 2019 +0000
|
||||
|
||||
Clarify documentation for -flive-patching
|
||||
|
||||
* doc/invoke.texi (Optimize Options): Clarify -flive-patching docs.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@270276 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
--- gcc/doc/invoke.texi
|
||||
+++ gcc/doc/invoke.texi
|
||||
@@ -9367,24 +9367,24 @@ This flag is enabled by default at @option{-O2} and @option{-Os}.
|
||||
|
||||
@item -flive-patching=@var{level}
|
||||
@opindex flive-patching
|
||||
-Control GCC's optimizations to provide a safe compilation for live-patching.
|
||||
+Control GCC's optimizations to produce output suitable for live-patching.
|
||||
|
||||
If the compiler's optimization uses a function's body or information extracted
|
||||
from its body to optimize/change another function, the latter is called an
|
||||
impacted function of the former. If a function is patched, its impacted
|
||||
functions should be patched too.
|
||||
|
||||
-The impacted functions are decided by the compiler's interprocedural
|
||||
-optimizations. For example, inlining a function into its caller, cloning
|
||||
-a function and changing its caller to call this new clone, or extracting
|
||||
-a function's pureness/constness information to optimize its direct or
|
||||
-indirect callers, etc.
|
||||
+The impacted functions are determined by the compiler's interprocedural
|
||||
+optimizations. For example, a caller is impacted when inlining a function
|
||||
+into its caller,
|
||||
+cloning a function and changing its caller to call this new clone,
|
||||
+or extracting a function's pureness/constness information to optimize
|
||||
+its direct or indirect callers, etc.
|
||||
|
||||
Usually, the more IPA optimizations enabled, the larger the number of
|
||||
impacted functions for each function. In order to control the number of
|
||||
-impacted functions and computed the list of impacted function easily,
|
||||
-we provide control to partially enable IPA optimizations on two different
|
||||
-levels.
|
||||
+impacted functions and more easily compute the list of impacted function,
|
||||
+IPA optimizations can be partially enabled at two different levels.
|
||||
|
||||
The @var{level} argument should be one of the following:
|
||||
|
||||
@@ -9395,7 +9395,7 @@ The @var{level} argument should be one of the following:
|
||||
Only enable inlining and cloning optimizations, which includes inlining,
|
||||
cloning, interprocedural scalar replacement of aggregates and partial inlining.
|
||||
As a result, when patching a function, all its callers and its clones'
|
||||
-callers need to be patched as well.
|
||||
+callers are impacted, therefore need to be patched as well.
|
||||
|
||||
@option{-flive-patching=inline-clone} disables the following optimization flags:
|
||||
@gccoptlist{-fwhole-program -fipa-pta -fipa-reference -fipa-ra @gol
|
||||
@@ -9406,22 +9406,23 @@ callers need to be patched as well.
|
||||
@item inline-only-static
|
||||
|
||||
Only enable inlining of static functions.
|
||||
-As a result, when patching a static function, all its callers need to be
|
||||
-patches as well.
|
||||
+As a result, when patching a static function, all its callers are impacted
|
||||
+and so need to be patched as well.
|
||||
|
||||
-In addition to all the flags that -flive-patching=inline-clone disables,
|
||||
+In addition to all the flags that @option{-flive-patching=inline-clone}
|
||||
+disables,
|
||||
@option{-flive-patching=inline-only-static} disables the following additional
|
||||
optimization flags:
|
||||
@gccoptlist{-fipa-cp-clone -fipa-sra -fpartial-inlining -fipa-cp}
|
||||
|
||||
@end table
|
||||
|
||||
-When -flive-patching specified without any value, the default value
|
||||
-is "inline-clone".
|
||||
+When @option{-flive-patching} is specified without any value, the default value
|
||||
+is @var{inline-clone}.
|
||||
|
||||
This flag is disabled by default.
|
||||
|
||||
-Note that -flive-patching is not supported with link-time optimizer.
|
||||
+Note that @option{-flive-patching} is not supported with link-time optimization
|
||||
(@option{-flto}).
|
||||
|
||||
@item -fisolate-erroneous-paths-dereference
|
93
SOURCES/gcc8-rh1670535.patch
Normal file
93
SOURCES/gcc8-rh1670535.patch
Normal file
@ -0,0 +1,93 @@
|
||||
2018-11-08 Roman Geissler <roman.geissler@amadeus.com>
|
||||
|
||||
* collect2.c (linker_select): Add USE_LLD_LD.
|
||||
(ld_suffixes): Add ld.lld.
|
||||
(main): Handle -fuse-ld=lld.
|
||||
* common.opt (-fuse-ld=lld): New option.
|
||||
* doc/invoke.texi (-fuse-ld=lld): Document.
|
||||
* opts.c (common_handle_option): Handle OPT_fuse_ld_lld.
|
||||
|
||||
--- gcc/collect2.c
|
||||
+++ gcc/collect2.c
|
||||
@@ -831,6 +831,7 @@ main (int argc, char **argv)
|
||||
USE_PLUGIN_LD,
|
||||
USE_GOLD_LD,
|
||||
USE_BFD_LD,
|
||||
+ USE_LLD_LD,
|
||||
USE_LD_MAX
|
||||
} selected_linker = USE_DEFAULT_LD;
|
||||
static const char *const ld_suffixes[USE_LD_MAX] =
|
||||
@@ -838,7 +839,8 @@ main (int argc, char **argv)
|
||||
"ld",
|
||||
PLUGIN_LD_SUFFIX,
|
||||
"ld.gold",
|
||||
- "ld.bfd"
|
||||
+ "ld.bfd",
|
||||
+ "ld.lld"
|
||||
};
|
||||
static const char *const real_ld_suffix = "real-ld";
|
||||
static const char *const collect_ld_suffix = "collect-ld";
|
||||
@@ -1007,6 +1009,8 @@ main (int argc, char **argv)
|
||||
selected_linker = USE_BFD_LD;
|
||||
else if (strcmp (argv[i], "-fuse-ld=gold") == 0)
|
||||
selected_linker = USE_GOLD_LD;
|
||||
+ else if (strcmp (argv[i], "-fuse-ld=lld") == 0)
|
||||
+ selected_linker = USE_LLD_LD;
|
||||
|
||||
#ifdef COLLECT_EXPORT_LIST
|
||||
/* These flags are position independent, although their order
|
||||
@@ -1096,7 +1100,8 @@ main (int argc, char **argv)
|
||||
/* Maybe we know the right file to use (if not cross). */
|
||||
ld_file_name = 0;
|
||||
#ifdef DEFAULT_LINKER
|
||||
- if (selected_linker == USE_BFD_LD || selected_linker == USE_GOLD_LD)
|
||||
+ if (selected_linker == USE_BFD_LD || selected_linker == USE_GOLD_LD ||
|
||||
+ selected_linker == USE_LLD_LD)
|
||||
{
|
||||
char *linker_name;
|
||||
# ifdef HOST_EXECUTABLE_SUFFIX
|
||||
@@ -1315,7 +1320,7 @@ main (int argc, char **argv)
|
||||
else if (!use_collect_ld
|
||||
&& strncmp (arg, "-fuse-ld=", 9) == 0)
|
||||
{
|
||||
- /* Do not pass -fuse-ld={bfd|gold} to the linker. */
|
||||
+ /* Do not pass -fuse-ld={bfd|gold|lld} to the linker. */
|
||||
ld1--;
|
||||
ld2--;
|
||||
}
|
||||
--- gcc/common.opt
|
||||
+++ gcc/common.opt
|
||||
@@ -2732,6 +2732,10 @@ fuse-ld=gold
|
||||
Common Driver Negative(fuse-ld=bfd)
|
||||
Use the gold linker instead of the default linker.
|
||||
|
||||
+fuse-ld=lld
|
||||
+Common Driver Negative(fuse-ld=lld)
|
||||
+Use the lld LLVM linker instead of the default linker.
|
||||
+
|
||||
fuse-linker-plugin
|
||||
Common Undocumented Var(flag_use_linker_plugin)
|
||||
|
||||
--- gcc/doc/invoke.texi
|
||||
+++ gcc/doc/invoke.texi
|
||||
@@ -12610,6 +12610,10 @@ Use the @command{bfd} linker instead of the default linker.
|
||||
@opindex fuse-ld=gold
|
||||
Use the @command{gold} linker instead of the default linker.
|
||||
|
||||
+@item -fuse-ld=lld
|
||||
+@opindex fuse-ld=lld
|
||||
+Use the LLVM @command{lld} linker instead of the default linker.
|
||||
+
|
||||
@cindex Libraries
|
||||
@item -l@var{library}
|
||||
@itemx -l @var{library}
|
||||
--- gcc/opts.c
|
||||
+++ gcc/opts.c
|
||||
@@ -2557,6 +2557,7 @@ common_handle_option (struct gcc_options *opts,
|
||||
|
||||
case OPT_fuse_ld_bfd:
|
||||
case OPT_fuse_ld_gold:
|
||||
+ case OPT_fuse_ld_lld:
|
||||
case OPT_fuse_linker_plugin:
|
||||
/* No-op. Used by the driver and passed to us because it starts with f.*/
|
||||
break;
|
279
SOURCES/gcc8-rh1711346.patch
Normal file
279
SOURCES/gcc8-rh1711346.patch
Normal file
@ -0,0 +1,279 @@
|
||||
2019-05-29 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR fortran/90329
|
||||
* lto-streamer.h (LTO_minor_version): Bump to 2.
|
||||
|
||||
Backported from mainline
|
||||
2019-05-16 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR fortran/90329
|
||||
* tree-core.h (struct tree_decl_common): Document
|
||||
decl_nonshareable_flag for PARM_DECLs.
|
||||
* tree.h (DECL_HIDDEN_STRING_LENGTH): Define.
|
||||
* calls.c (expand_call): Don't try tail call if caller
|
||||
has any DECL_HIDDEN_STRING_LENGTH PARM_DECLs that are or might be
|
||||
passed on the stack and callee needs to pass any arguments on the
|
||||
stack.
|
||||
* tree-streamer-in.c (unpack_ts_decl_common_value_fields): Use
|
||||
else if instead of series of mutually exclusive ifs. Handle
|
||||
DECL_HIDDEN_STRING_LENGTH for PARM_DECLs.
|
||||
* tree-streamer-out.c (pack_ts_decl_common_value_fields): Likewise.
|
||||
|
||||
* lang.opt (fbroken-callers): Remove.
|
||||
(ftail-call-workaround, ftail-call-workaround=): New options.
|
||||
* gfortran.h (struct gfc_namespace): Add implicit_interface_calls.
|
||||
* interface.c (gfc_procedure_use): Set implicit_interface_calls
|
||||
for calls to implicit interface procedures.
|
||||
* trans-decl.c (create_function_arglist): Use flag_tail_call_workaround
|
||||
instead of flag_broken_callers. If it is not 2, also require
|
||||
sym->ns->implicit_interface_calls.
|
||||
* invoke.texi (fbroken-callers): Remove documentation.
|
||||
(ftail-call-workaround, ftail-call-workaround=): Document.
|
||||
|
||||
2019-05-19 Thomas Koenig <tkoenig@gcc.gnu.org>
|
||||
|
||||
PR fortran/90329
|
||||
* invoke.texi: Document -fbroken-callers.
|
||||
* lang.opt: Add -fbroken-callers.
|
||||
* trans-decl.c (create_function_arglist): Only set
|
||||
DECL_HIDDEN_STRING_LENGTH if flag_broken_callers is set.
|
||||
|
||||
2019-05-16 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR fortran/90329
|
||||
* trans-decl.c (create_function_arglist): Set
|
||||
DECL_HIDDEN_STRING_LENGTH on hidden string length PARM_DECLs if
|
||||
len is constant.
|
||||
|
||||
--- gcc/calls.c
|
||||
+++ gcc/calls.c
|
||||
@@ -3754,6 +3754,28 @@ expand_call (tree exp, rtx target, int ignore)
|
||||
|| dbg_cnt (tail_call) == false)
|
||||
try_tail_call = 0;
|
||||
|
||||
+ /* Workaround buggy C/C++ wrappers around Fortran routines with
|
||||
+ character(len=constant) arguments if the hidden string length arguments
|
||||
+ are passed on the stack; if the callers forget to pass those arguments,
|
||||
+ attempting to tail call in such routines leads to stack corruption.
|
||||
+ Avoid tail calls in functions where at least one such hidden string
|
||||
+ length argument is passed (partially or fully) on the stack in the
|
||||
+ caller and the callee needs to pass any arguments on the stack.
|
||||
+ See PR90329. */
|
||||
+ if (try_tail_call && maybe_ne (args_size.constant, 0))
|
||||
+ for (tree arg = DECL_ARGUMENTS (current_function_decl);
|
||||
+ arg; arg = DECL_CHAIN (arg))
|
||||
+ if (DECL_HIDDEN_STRING_LENGTH (arg) && DECL_INCOMING_RTL (arg))
|
||||
+ {
|
||||
+ subrtx_iterator::array_type array;
|
||||
+ FOR_EACH_SUBRTX (iter, array, DECL_INCOMING_RTL (arg), NONCONST)
|
||||
+ if (MEM_P (*iter))
|
||||
+ {
|
||||
+ try_tail_call = 0;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* If the user has marked the function as requiring tail-call
|
||||
optimization, attempt it. */
|
||||
if (must_tail_call)
|
||||
--- gcc/fortran/gfortran.h
|
||||
+++ gcc/fortran/gfortran.h
|
||||
@@ -1857,6 +1857,9 @@ typedef struct gfc_namespace
|
||||
|
||||
/* Set to 1 for !$ACC ROUTINE namespaces. */
|
||||
unsigned oacc_routine:1;
|
||||
+
|
||||
+ /* Set to 1 if there are any calls to procedures with implicit interface. */
|
||||
+ unsigned implicit_interface_calls:1;
|
||||
}
|
||||
gfc_namespace;
|
||||
|
||||
--- gcc/fortran/interface.c
|
||||
+++ gcc/fortran/interface.c
|
||||
@@ -3657,6 +3657,7 @@ gfc_procedure_use (gfc_symbol *sym, gfc_actual_arglist **ap, locus *where)
|
||||
gfc_warning (OPT_Wimplicit_procedure,
|
||||
"Procedure %qs called at %L is not explicitly declared",
|
||||
sym->name, where);
|
||||
+ gfc_find_proc_namespace (sym->ns)->implicit_interface_calls = 1;
|
||||
}
|
||||
|
||||
if (sym->attr.if_source == IFSRC_UNKNOWN)
|
||||
--- gcc/fortran/invoke.texi
|
||||
+++ gcc/fortran/invoke.texi
|
||||
@@ -181,7 +181,8 @@ and warnings}.
|
||||
@item Code Generation Options
|
||||
@xref{Code Gen Options,,Options for code generation conventions}.
|
||||
@gccoptlist{-faggressive-function-elimination -fblas-matmul-limit=@var{n} @gol
|
||||
--fbounds-check -fcheck-array-temporaries @gol
|
||||
+-fbounds-check -ftail-call-workaround -ftail-call-workaround=@var{n} @gol
|
||||
+-fcheck-array-temporaries @gol
|
||||
-fcheck=@var{<all|array-temps|bounds|do|mem|pointer|recursion>} @gol
|
||||
-fcoarray=@var{<none|single|lib>} -fexternal-blas -ff2c
|
||||
-ffrontend-loop-interchange @gol
|
||||
@@ -1580,6 +1581,39 @@ warnings for generated array temporaries.
|
||||
@c Note: This option is also referred in gcc's manpage
|
||||
Deprecated alias for @option{-fcheck=bounds}.
|
||||
|
||||
+@item -ftail-call-workaround
|
||||
+@itemx -ftail-call-workaround=@var{n}
|
||||
+@opindex @code{tail-call-workaround}
|
||||
+Some C interfaces to Fortran codes violate the gfortran ABI by
|
||||
+omitting the hidden character length arguments as described in
|
||||
+@xref{Argument passing conventions}. This can lead to crashes
|
||||
+because pushing arguments for tail calls can overflow the stack.
|
||||
+
|
||||
+To provide a workaround for existing binary packages, this option
|
||||
+disables tail call optimization for gfortran procedures with character
|
||||
+arguments. With @option{-ftail-call-workaround=2} tail call optimization
|
||||
+is disabled in all gfortran procedures with character arguments,
|
||||
+with @option{-ftail-call-workaround=1} or equivalent
|
||||
+@option{-ftail-call-workaround} only in gfortran procedures with character
|
||||
+arguments that call implicitly prototyped procedures.
|
||||
+
|
||||
+Using this option can lead to problems including crashes due to
|
||||
+insufficient stack space.
|
||||
+
|
||||
+It is @emph{very strongly} recommended to fix the code in question.
|
||||
+The @option{-fc-prototypes-external} option can be used to generate
|
||||
+prototypes which conform to gfortran's ABI, for inclusion in the
|
||||
+source code.
|
||||
+
|
||||
+Support for this option will likely be withdrawn in a future release
|
||||
+of gfortran.
|
||||
+
|
||||
+The negative form, @option{-fno-tail-call-workaround} or equivalent
|
||||
+@option{-ftail-call-workaround=0}, can be used to disable this option.
|
||||
+
|
||||
+Default is currently @option{-ftail-call-workaround}, this will change
|
||||
+in future releases.
|
||||
+
|
||||
@item -fcheck-array-temporaries
|
||||
@opindex @code{fcheck-array-temporaries}
|
||||
Deprecated alias for @option{-fcheck=array-temps}.
|
||||
--- gcc/fortran/lang.opt
|
||||
+++ gcc/fortran/lang.opt
|
||||
@@ -742,6 +742,13 @@ fsign-zero
|
||||
Fortran Var(flag_sign_zero) Init(1)
|
||||
Apply negative sign to zero values.
|
||||
|
||||
+ftail-call-workaround
|
||||
+Fortran Alias(ftail-call-workaround=,1,0)
|
||||
+
|
||||
+ftail-call-workaround=
|
||||
+Fortran RejectNegative Joined UInteger IntegerRange(0, 2) Var(flag_tail_call_workaround) Init(1)
|
||||
+Disallow tail call optimization when a calling routine may have omitted character lengths.
|
||||
+
|
||||
funderscoring
|
||||
Fortran Var(flag_underscoring) Init(1)
|
||||
Append underscores to externally visible names.
|
||||
--- gcc/fortran/trans-decl.c
|
||||
+++ gcc/fortran/trans-decl.c
|
||||
@@ -2513,6 +2513,17 @@ create_function_arglist (gfc_symbol * sym)
|
||||
TREE_READONLY (length) = 1;
|
||||
gfc_finish_decl (length);
|
||||
|
||||
+ /* Marking the length DECL_HIDDEN_STRING_LENGTH will lead
|
||||
+ to tail calls being disabled. Only do that if we
|
||||
+ potentially have broken callers. */
|
||||
+ if (flag_tail_call_workaround
|
||||
+ && f->sym->ts.u.cl
|
||||
+ && f->sym->ts.u.cl->length
|
||||
+ && f->sym->ts.u.cl->length->expr_type == EXPR_CONSTANT
|
||||
+ && (flag_tail_call_workaround == 2
|
||||
+ || f->sym->ns->implicit_interface_calls))
|
||||
+ DECL_HIDDEN_STRING_LENGTH (length) = 1;
|
||||
+
|
||||
/* Remember the passed value. */
|
||||
if (!f->sym->ts.u.cl || f->sym->ts.u.cl->passed_length)
|
||||
{
|
||||
--- gcc/lto-streamer.h
|
||||
+++ gcc/lto-streamer.h
|
||||
@@ -121,7 +121,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
form followed by the data for the string. */
|
||||
|
||||
#define LTO_major_version 7
|
||||
-#define LTO_minor_version 1
|
||||
+#define LTO_minor_version 2
|
||||
|
||||
typedef unsigned char lto_decl_flags_t;
|
||||
|
||||
--- gcc/tree-core.h
|
||||
+++ gcc/tree-core.h
|
||||
@@ -1644,6 +1644,7 @@ struct GTY(()) tree_decl_common {
|
||||
/* In a VAR_DECL and PARM_DECL, this is DECL_READ_P. */
|
||||
unsigned decl_read_flag : 1;
|
||||
/* In a VAR_DECL or RESULT_DECL, this is DECL_NONSHAREABLE. */
|
||||
+ /* In a PARM_DECL, this is DECL_HIDDEN_STRING_LENGTH. */
|
||||
unsigned decl_nonshareable_flag : 1;
|
||||
|
||||
/* DECL_OFFSET_ALIGN, used only for FIELD_DECLs. */
|
||||
--- gcc/tree-streamer-in.c
|
||||
+++ gcc/tree-streamer-in.c
|
||||
@@ -252,7 +252,7 @@ unpack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
|
||||
LABEL_DECL_UID (expr) = -1;
|
||||
}
|
||||
|
||||
- if (TREE_CODE (expr) == FIELD_DECL)
|
||||
+ else if (TREE_CODE (expr) == FIELD_DECL)
|
||||
{
|
||||
DECL_PACKED (expr) = (unsigned) bp_unpack_value (bp, 1);
|
||||
DECL_NONADDRESSABLE_P (expr) = (unsigned) bp_unpack_value (bp, 1);
|
||||
@@ -260,12 +260,15 @@ unpack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
|
||||
expr->decl_common.off_align = bp_unpack_value (bp, 8);
|
||||
}
|
||||
|
||||
- if (VAR_P (expr))
|
||||
+ else if (VAR_P (expr))
|
||||
{
|
||||
DECL_HAS_DEBUG_EXPR_P (expr) = (unsigned) bp_unpack_value (bp, 1);
|
||||
DECL_NONLOCAL_FRAME (expr) = (unsigned) bp_unpack_value (bp, 1);
|
||||
}
|
||||
|
||||
+ else if (TREE_CODE (expr) == PARM_DECL)
|
||||
+ DECL_HIDDEN_STRING_LENGTH (expr) = (unsigned) bp_unpack_value (bp, 1);
|
||||
+
|
||||
if (TREE_CODE (expr) == RESULT_DECL
|
||||
|| TREE_CODE (expr) == PARM_DECL
|
||||
|| VAR_P (expr))
|
||||
--- gcc/tree-streamer-out.c
|
||||
+++ gcc/tree-streamer-out.c
|
||||
@@ -212,7 +212,7 @@ pack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
|
||||
bp_pack_var_len_unsigned (bp, EH_LANDING_PAD_NR (expr));
|
||||
}
|
||||
|
||||
- if (TREE_CODE (expr) == FIELD_DECL)
|
||||
+ else if (TREE_CODE (expr) == FIELD_DECL)
|
||||
{
|
||||
bp_pack_value (bp, DECL_PACKED (expr), 1);
|
||||
bp_pack_value (bp, DECL_NONADDRESSABLE_P (expr), 1);
|
||||
@@ -220,12 +220,15 @@ pack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
|
||||
bp_pack_value (bp, expr->decl_common.off_align, 8);
|
||||
}
|
||||
|
||||
- if (VAR_P (expr))
|
||||
+ else if (VAR_P (expr))
|
||||
{
|
||||
bp_pack_value (bp, DECL_HAS_DEBUG_EXPR_P (expr), 1);
|
||||
bp_pack_value (bp, DECL_NONLOCAL_FRAME (expr), 1);
|
||||
}
|
||||
|
||||
+ else if (TREE_CODE (expr) == PARM_DECL)
|
||||
+ bp_pack_value (bp, DECL_HIDDEN_STRING_LENGTH (expr), 1);
|
||||
+
|
||||
if (TREE_CODE (expr) == RESULT_DECL
|
||||
|| TREE_CODE (expr) == PARM_DECL
|
||||
|| VAR_P (expr))
|
||||
--- gcc/tree.h
|
||||
+++ gcc/tree.h
|
||||
@@ -909,6 +909,11 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
|
||||
(TREE_CHECK2 (NODE, VAR_DECL, \
|
||||
RESULT_DECL)->decl_common.decl_nonshareable_flag)
|
||||
|
||||
+/* In a PARM_DECL, set for Fortran hidden string length arguments that some
|
||||
+ buggy callers don't pass to the callee. */
|
||||
+#define DECL_HIDDEN_STRING_LENGTH(NODE) \
|
||||
+ (TREE_CHECK (NODE, PARM_DECL)->decl_common.decl_nonshareable_flag)
|
||||
+
|
||||
/* In a CALL_EXPR, means that the call is the jump from a thunk to the
|
||||
thunked-to function. */
|
||||
#define CALL_FROM_THUNK_P(NODE) (CALL_EXPR_CHECK (NODE)->base.protected_flag)
|
45
SOURCES/gcc8-rh1730380.patch
Normal file
45
SOURCES/gcc8-rh1730380.patch
Normal file
@ -0,0 +1,45 @@
|
||||
2019-07-15 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
Backport from mainline
|
||||
2019-07-01 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
* config/s390/vector.md: Fix shift count operand printing.
|
||||
|
||||
--- gcc/config/s390/vector.md
|
||||
+++ gcc/config/s390/vector.md
|
||||
@@ -944,7 +944,7 @@
|
||||
(VEC_SHIFTS:VI (match_operand:VI 1 "register_operand" "v")
|
||||
(match_operand:SI 2 "nonmemory_operand" "an")))]
|
||||
"TARGET_VX"
|
||||
- "<vec_shifts_mnem><bhfgq>\t%v0,%v1,%Y2"
|
||||
+ "<vec_shifts_mnem><bhfgq>\t%v0,%v1,<addr_style_op_ops>"
|
||||
[(set_attr "op_type" "VRS")])
|
||||
|
||||
; Shift each element by corresponding vector element
|
||||
--- /dev/null
|
||||
+++ gcc/testsuite/gcc.target/s390/vector/vec-shift-2.c
|
||||
@@ -0,0 +1,24 @@
|
||||
+/* { dg-do run } */
|
||||
+/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times "veslf" 1 } } */
|
||||
+
|
||||
+typedef __attribute__((vector_size(16))) signed int v4si;
|
||||
+
|
||||
+v4si __attribute__((noinline,noclone))
|
||||
+shift_left_by_scalar (v4si in, int shift_count)
|
||||
+{
|
||||
+ return in << (3 + shift_count);
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ v4si a = { 1, 2, 3, 4 };
|
||||
+ v4si result = shift_left_by_scalar (a, 1);
|
||||
+
|
||||
+ if (result[1] != 32)
|
||||
+ __builtin_abort ();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
157
SPECS/gcc.spec
157
SPECS/gcc.spec
@ -1,10 +1,10 @@
|
||||
%global DATE 20180905
|
||||
%global SVNREV 264110
|
||||
%global gcc_version 8.2.1
|
||||
%global DATE 20190507
|
||||
%global SVNREV 270976
|
||||
%global gcc_version 8.3.1
|
||||
%global gcc_major 8
|
||||
# Note, gcc_release must be integer, if you want to add suffixes to
|
||||
# %%{release}, append them after %%{gcc_release} on Release: line.
|
||||
%global gcc_release 3
|
||||
%global gcc_release 4
|
||||
%global nvptx_tools_gitrev c28050f60193b3b95a18866a96f03334e874e78f
|
||||
%global nvptx_newlib_gitrev aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24
|
||||
%global _unpackaged_files_terminate_build 0
|
||||
@ -268,16 +268,20 @@ Patch11: gcc8-rh1512529-aarch64.patch
|
||||
Patch12: gcc8-mcet.patch
|
||||
Patch13: gcc8-rh1574936.patch
|
||||
Patch14: gcc8-libgcc-hardened.patch
|
||||
Patch15: gcc8-rh1612514.patch
|
||||
Patch16: gcc8-pr60790.patch
|
||||
Patch17: gcc8-rh1652016.patch
|
||||
|
||||
Patch21: gcc8-rh1652929-1.patch
|
||||
Patch22: gcc8-rh1652929-2.patch
|
||||
Patch23: gcc8-rh1652929-3.patch
|
||||
Patch24: gcc8-rh1652929-4.patch
|
||||
Patch25: gcc8-rh1652929-5.patch
|
||||
Patch15: gcc8-rh1670535.patch
|
||||
Patch16: gcc8-pr85400.patch
|
||||
Patch17: gcc8-libgomp-20190503.patch
|
||||
Patch18: gcc8-pr86747.patch
|
||||
Patch19: gcc8-libgomp-testsuite.patch
|
||||
Patch20: gcc8-rh1711346.patch
|
||||
Patch21: gcc8-rh1730380.patch
|
||||
Patch22: gcc8-pr86098.patch
|
||||
Patch23: gcc8-pr90139.patch
|
||||
Patch24: gcc8-pr90756.patch
|
||||
|
||||
Patch30: gcc8-rh1668903-1.patch
|
||||
Patch31: gcc8-rh1668903-2.patch
|
||||
Patch32: gcc8-rh1668903-3.patch
|
||||
|
||||
Patch1000: nvptx-tools-no-ptxas.patch
|
||||
Patch1001: nvptx-tools-build.patch
|
||||
@ -847,15 +851,20 @@ to NVidia PTX capable devices if available.
|
||||
%patch13 -p0 -b .rh1574936~
|
||||
%patch14 -p0 -b .libgcc-hardened~
|
||||
%endif
|
||||
%patch15 -p0 -b .rh1612514~
|
||||
%patch16 -p0 -b .pr60790~
|
||||
%patch17 -p1 -b .rh1652016~
|
||||
%patch15 -p0 -b .rh1670535~
|
||||
%patch16 -p0 -b .pr85400~
|
||||
%patch17 -p0 -b .libgomp-20190503~
|
||||
%patch18 -p0 -b .pr86747~
|
||||
%patch19 -p0 -b .libgomp-testsuite~
|
||||
%patch20 -p0 -b .rh1711346~
|
||||
%patch21 -p0 -b .rh1730380~
|
||||
%patch22 -p0 -b .pr86098~
|
||||
%patch23 -p0 -b .pr90139~
|
||||
%patch24 -p0 -b .pr90756~
|
||||
|
||||
%patch21 -p1 -b .rh1652929-1~
|
||||
%patch22 -p1 -b .rh1652929-2~
|
||||
%patch23 -p1 -b .rh1652929-3~
|
||||
%patch24 -p1 -b .rh1652929-4~
|
||||
%patch25 -p1 -b .rh1652929-5~
|
||||
%patch30 -p0 -b .rh1668903-1~
|
||||
%patch31 -p0 -b .rh1668903-2~
|
||||
%patch32 -p0 -b .rh1668903-3~
|
||||
|
||||
cd nvptx-tools-%{nvptx_tools_gitrev}
|
||||
%patch1000 -p1 -b .nvptx-tools-no-ptxas~
|
||||
@ -1350,36 +1359,39 @@ mkdir -p %{buildroot}/%{_lib}
|
||||
mv -f %{buildroot}%{_prefix}/%{_lib}/libgcc_s.so.1 %{buildroot}/%{_lib}/libgcc_s-%{gcc_major}-%{DATE}.so.1
|
||||
chmod 755 %{buildroot}/%{_lib}/libgcc_s-%{gcc_major}-%{DATE}.so.1
|
||||
ln -sf libgcc_s-%{gcc_major}-%{DATE}.so.1 %{buildroot}/%{_lib}/libgcc_s.so.1
|
||||
%ifarch %{ix86} x86_64 ppc ppc64 ppc64p7 ppc64le %{arm}
|
||||
rm -f $FULLPATH/libgcc_s.so
|
||||
echo '/* GNU ld script
|
||||
Use the shared library, but some functions are only in
|
||||
the static library, so try that secondarily. */
|
||||
OUTPUT_FORMAT('`gcc -Wl,--print-output-format -nostdlib -r -o /dev/null`')
|
||||
GROUP ( /%{_lib}/libgcc_s.so.1 libgcc.a )' > $FULLPATH/libgcc_s.so
|
||||
%else
|
||||
ln -sf /%{_lib}/libgcc_s.so.1 $FULLPATH/libgcc_s.so
|
||||
%endif
|
||||
%ifarch sparcv9 ppc
|
||||
%ifarch ppc
|
||||
rm -f $FULLPATH/64/libgcc_s.so
|
||||
echo '/* GNU ld script
|
||||
Use the shared library, but some functions are only in
|
||||
the static library, so try that secondarily. */
|
||||
OUTPUT_FORMAT('`gcc -m64 -Wl,--print-output-format -nostdlib -r -o /dev/null`')
|
||||
GROUP ( /lib64/libgcc_s.so.1 libgcc.a )' > $FULLPATH/64/libgcc_s.so
|
||||
%else
|
||||
ln -sf /lib64/libgcc_s.so.1 $FULLPATH/64/libgcc_s.so
|
||||
%endif
|
||||
%endif
|
||||
%ifarch %{multilib_64_archs}
|
||||
%ifarch x86_64 ppc64 ppc64p7
|
||||
rm -f $FULLPATH/64/libgcc_s.so
|
||||
echo '/* GNU ld script
|
||||
Use the shared library, but some functions are only in
|
||||
the static library, so try that secondarily. */
|
||||
OUTPUT_FORMAT('`gcc -m32 -Wl,--print-output-format -nostdlib -r -o /dev/null`')
|
||||
GROUP ( /lib/libgcc_s.so.1 libgcc.a )' > $FULLPATH/32/libgcc_s.so
|
||||
%else
|
||||
ln -sf /lib/libgcc_s.so.1 $FULLPATH/32/libgcc_s.so
|
||||
%endif
|
||||
%ifarch ppc
|
||||
rm -f $FULLPATH/libgcc_s.so
|
||||
echo '/* GNU ld script
|
||||
Use the shared library, but some functions are only in
|
||||
the static library, so try that secondarily. */
|
||||
OUTPUT_FORMAT(elf32-powerpc)
|
||||
GROUP ( /lib/libgcc_s.so.1 libgcc.a )' > $FULLPATH/libgcc_s.so
|
||||
%endif
|
||||
%ifarch ppc64 ppc64p7
|
||||
rm -f $FULLPATH/32/libgcc_s.so
|
||||
echo '/* GNU ld script
|
||||
Use the shared library, but some functions are only in
|
||||
the static library, so try that secondarily. */
|
||||
OUTPUT_FORMAT(elf32-powerpc)
|
||||
GROUP ( /lib/libgcc_s.so.1 libgcc.a )' > $FULLPATH/32/libgcc_s.so
|
||||
%endif
|
||||
%ifarch %{arm}
|
||||
rm -f $FULLPATH/libgcc_s.so
|
||||
echo '/* GNU ld script
|
||||
Use the shared library, but some functions are only in
|
||||
the static library, so try that secondarily. */
|
||||
OUTPUT_FORMAT(elf32-littlearm)
|
||||
GROUP ( /lib/libgcc_s.so.1 libgcc.a )' > $FULLPATH/libgcc_s.so
|
||||
%endif
|
||||
|
||||
mv -f %{buildroot}%{_prefix}/%{_lib}/libgomp.spec $FULLPATH/
|
||||
@ -2335,6 +2347,14 @@ fi
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/vec_types.h
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/htmintrin.h
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/htmxlintrin.h
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/bmi2intrin.h
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/bmiintrin.h
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/xmmintrin.h
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/mm_malloc.h
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/emmintrin.h
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/mmintrin.h
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/x86intrin.h
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/amo.h
|
||||
%endif
|
||||
%ifarch %{arm}
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/unwind-arm-common.h
|
||||
@ -3157,21 +3177,60 @@ fi
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Mon Dec 10 2018 Marek Polacek <polacek@redhat.com 8.2.1-3.5
|
||||
* Tue Jul 16 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4.5
|
||||
- fix shift count operand printing (#1730380)
|
||||
- fix tree-outof-ssa.c ICE with vector types (PR middle-end/90139, #1730454)
|
||||
- fix out-of-ssa with unsupported vector types (PR rtl-optimization/90756,
|
||||
#1730454)
|
||||
- fix ICE with template placeholder for TTP (PR c++/86098, #1730454)
|
||||
|
||||
* Mon Jun 3 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4.4
|
||||
- backport workaround for broken C/C++ wrappers to LAPACK (#1711346)
|
||||
|
||||
* Fri May 24 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4.3
|
||||
- additional fix for the libgomp testsuite (#1707568)
|
||||
|
||||
* Tue May 21 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4.2
|
||||
- backport the -fuse-ld=lld option (#1670535)
|
||||
- TLS model fix (#1678555, PR c++/85400)
|
||||
- two small autoFDO fixes (#1686082)
|
||||
- libgomp update (#1707568)
|
||||
- member template redeclaration fix (#1652704, PR c++/86747)
|
||||
- turn libgcc_s.so into a linker script on i?86, x86_64, ppc64le and also on
|
||||
ppc and ppc64 for 64-bit multilib (#1708309)
|
||||
- avoid using unaligned vsx or lxvd2x/stxvd2x for memcpy/memmove inline
|
||||
expansion (#1666977)
|
||||
|
||||
* Wed May 8 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4.1
|
||||
- tweak gcc8-rh1668903-1.patch and gcc8-rh1668903-2.patch patches
|
||||
|
||||
* Tue May 7 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4
|
||||
- update from Fedora 8.3.1-4 (#1680182)
|
||||
- drop gcc8-pr60790.patch, gcc8-pr89629.patch, gcc8-rh1668903-4.patch
|
||||
|
||||
* Tue May 7 2019 Marek Polacek <polacek@redhat.com> 8.3.1-3
|
||||
- update from Fedora 8.3.1-3 (#1680182)
|
||||
- remove load and test FP splitter (#1673116)
|
||||
- fix *movsi_from_df (#1677652)
|
||||
- add missing headers
|
||||
- add support for live patching (#1668903)
|
||||
- retire gcc8-rh1612514.patch, gcc8-rh1652016.patch, gcc8-rh1652929-?.patch
|
||||
|
||||
* Mon Dec 10 2018 Marek Polacek <polacek@redhat.com> 8.2.1-3.5
|
||||
- remove python2 dependecy (#1595385)
|
||||
|
||||
* Tue Nov 27 2018 Jeff Law <law@redhat.com 8.2.1-3.4
|
||||
* Tue Nov 27 2018 Jeff Law <law@redhat.com> 8.2.1-3.4
|
||||
- Backport PPC string inlines from trunk which allow for valgrind's
|
||||
memcheck to work properly (#1652929)
|
||||
- Backport bugfix for clz pattern on s390 affecting jemalloc (#1652016)
|
||||
|
||||
* Mon Oct 15 2018 Marek Polacek <polacek@redhat.com 8.2.1-3.3
|
||||
* Mon Oct 15 2018 Marek Polacek <polacek@redhat.com> 8.2.1-3.3
|
||||
- avoid IFUNC resolver access to uninitialized data (#1559350, PR libgcc/60790)
|
||||
|
||||
* Thu Oct 11 2018 Marek Polacek <polacek@redhat.com 8.2.1-3.2
|
||||
* Thu Oct 11 2018 Marek Polacek <polacek@redhat.com> 8.2.1-3.2
|
||||
- fix typos in manual (#1612514)
|
||||
|
||||
* Mon Oct 8 2018 Marek Polacek <polacek@redhat.com 8.2.1-3.1
|
||||
* Mon Oct 8 2018 Marek Polacek <polacek@redhat.com> 8.2.1-3.1
|
||||
- enable hardening of binaries (#1624114)
|
||||
- disable libgccjit on RHEL
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user