import gcc-8.3.1-4.5.el8

This commit is contained in:
CentOS Sources 2019-11-05 15:22:38 -05:00
parent 0e369741f6
commit 745403a9bb
24 changed files with 11450 additions and 2551 deletions

View File

@ -1,3 +1,3 @@
1fe3aa7ce95faa0f4d7f08f0dfefd86ff4b43015 SOURCES/gcc-8.2.1-20180905.tar.xz
8ee669ee60997110e6251c72dac66bf69bbe13c7 SOURCES/gcc-8.3.1-20190507.tar.xz
3bdb3cc01fa7690a0e20ea5cfffcbe690f7665eb SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
ce8eb83be0ac37fb5d5388df455a980fe37b4f13 SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz

2
.gitignore vendored
View File

@ -1,3 +1,3 @@
SOURCES/gcc-8.2.1-20180905.tar.xz
SOURCES/gcc-8.3.1-20190507.tar.xz
SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
--- libgomp/testsuite/libgomp-test-support.exp.in.jj 2018-04-25 09:40:31.323655308 +0200
+++ libgomp/testsuite/libgomp-test-support.exp.in 2019-04-25 20:01:50.028243827 +0200
@@ -2,4 +2,5 @@ set cuda_driver_include "@CUDA_DRIVER_IN
set cuda_driver_lib "@CUDA_DRIVER_LIB@"
set hsa_runtime_lib "@HSA_RUNTIME_LIB@"
+set offload_plugins "@offload_plugins@"
set offload_targets "@offload_targets@"
--- libgomp/testsuite/lib/libgomp.exp.jj 2018-04-25 09:40:31.584655429 +0200
+++ libgomp/testsuite/lib/libgomp.exp 2019-05-24 11:41:51.015822702 +0200
@@ -40,7 +40,7 @@ load_file libgomp-test-support.exp
# Populate offload_targets_s (offloading targets separated by a space), and
# offload_targets_s_openacc (the same, but with OpenACC names; OpenACC spells
# some of them a little differently).
-set offload_targets_s [split $offload_targets ","]
+set offload_targets_s [split $offload_plugins ","]
set offload_targets_s_openacc {}
foreach offload_target_openacc $offload_targets_s {
# Translate to OpenACC names, or skip if not yet supported.
@@ -137,8 +137,8 @@ proc libgomp_init { args } {
# Add liboffloadmic build directory in LD_LIBRARY_PATH to support
# non-fallback testing for Intel MIC targets
- global offload_targets
- if { [string match "*,intelmic,*" ",$offload_targets,"] } {
+ global offload_plugins
+ if { [string match "*,intelmic,*" ",$offload_plugins,"] } {
append always_ld_library_path ":${blddir}/../liboffloadmic/.libs"
append always_ld_library_path ":${blddir}/../liboffloadmic/plugin/.libs"
# libstdc++ is required by liboffloadmic
@@ -362,8 +362,8 @@ proc check_effective_target_offload_devi
# Return 1 if configured for nvptx offloading.
proc check_effective_target_openacc_nvidia_accel_configured { } {
- global offload_targets
- if { ![string match "*,nvptx,*" ",$offload_targets,"] } {
+ global offload_plugins
+ if { ![string match "*,nvptx,*" ",$offload_plugins,"] } {
return 0
}
# PR libgomp/65099: Currently, we only support offloading in 64-bit

View File

@ -1,84 +0,0 @@
PR libgcc/60790
x86: Do not assume ELF constructors run before IFUNC resolvers.
* config/x86/host-config.h (libat_feat1_ecx, libat_feat1_edx):
Remove declarations.
(__libat_feat1, __libat_feat1_init): Declare.
(FEAT1_REGISTER): Define.
(load_feat1): New function.
(IFUNC_COND_1): Adjust.
* config/x86/init.c (libat_feat1_ecx, libat_feat1_edx)
(init_cpuid): Remove definitions.
(__libat_feat1): New variable.
(__libat_feat1_init): New function.
--- libatomic/config/x86/host-config.h (revision 264990)
+++ libatomic/config/x86/host-config.h (working copy)
@@ -25,13 +25,39 @@
#if HAVE_IFUNC
#include <cpuid.h>
-extern unsigned int libat_feat1_ecx HIDDEN;
-extern unsigned int libat_feat1_edx HIDDEN;
+#ifdef __x86_64__
+# define FEAT1_REGISTER ecx
+#else
+# define FEAT1_REGISTER edx
+#endif
+/* Value of the CPUID feature register FEAT1_REGISTER for the cmpxchg
+ bit for IFUNC_COND1 below. */
+extern unsigned int __libat_feat1 HIDDEN;
+
+/* Initialize libat_feat1 and return its value. */
+unsigned int __libat_feat1_init (void) HIDDEN;
+
+/* Return the value of the relevant feature register for the relevant
+ cmpxchg bit, or 0 if there is no CPUID support. */
+static inline unsigned int
+__attribute__ ((const))
+load_feat1 (void)
+{
+ /* See the store in __libat_feat1_init. */
+ unsigned int feat1 = __atomic_load_n (&__libat_feat1, __ATOMIC_RELAXED);
+ if (feat1 == 0)
+ /* Assume that initialization has not happened yet. This may get
+ called repeatedly if the CPU does not have any feature bits at
+ all. */
+ feat1 = __libat_feat1_init ();
+ return feat1;
+}
+
#ifdef __x86_64__
-# define IFUNC_COND_1 (libat_feat1_ecx & bit_CMPXCHG16B)
+# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG16B)
#else
-# define IFUNC_COND_1 (libat_feat1_edx & bit_CMPXCHG8B)
+# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG8B)
#endif
#ifdef __x86_64__
--- libatomic/config/x86/init.c (revision 264990)
+++ libatomic/config/x86/init.c (working copy)
@@ -26,13 +26,17 @@
#if HAVE_IFUNC
-unsigned int libat_feat1_ecx, libat_feat1_edx;
+unsigned int __libat_feat1;
-static void __attribute__((constructor))
-init_cpuid (void)
+unsigned int
+__libat_feat1_init (void)
{
- unsigned int eax, ebx;
- __get_cpuid (1, &eax, &ebx, &libat_feat1_ecx, &libat_feat1_edx);
+ unsigned int eax, ebx, ecx, edx;
+ FEAT1_REGISTER = 0;
+ __get_cpuid (1, &eax, &ebx, &ecx, &edx);
+ /* See the load in load_feat1. */
+ __atomic_store_n (&__libat_feat1, FEAT1_REGISTER, __ATOMIC_RELAXED);
+ return FEAT1_REGISTER;
}
#endif /* HAVE_IFUNC */

View File

@ -0,0 +1,94 @@
2018-05-10 Eric Botcazou <ebotcazou@adacore.com>
PR c++/85400
* c-attribs.c (handle_visibility_attribute): Do not set no_add_attrs.
* decl2.c (adjust_var_decl_tls_model): New static function.
(comdat_linkage): Call it on a variable.
(maybe_make_one_only): Likewise.
--- gcc/c-family/c-attribs.c
+++ gcc/c-family/c-attribs.c
@@ -2299,14 +2299,13 @@ handle_visibility_attribute (tree *node, tree name, tree args,
static tree
handle_tls_model_attribute (tree *node, tree name, tree args,
- int ARG_UNUSED (flags), bool *no_add_attrs)
+ int ARG_UNUSED (flags),
+ bool *ARG_UNUSED (no_add_attrs))
{
tree id;
tree decl = *node;
enum tls_model kind;
- *no_add_attrs = true;
-
if (!VAR_P (decl) || !DECL_THREAD_LOCAL_P (decl))
{
warning (OPT_Wattributes, "%qE attribute ignored", name);
--- gcc/cp/decl2.c
+++ gcc/cp/decl2.c
@@ -1838,6 +1838,17 @@ mark_vtable_entries (tree decl)
}
}
+/* Adjust the TLS model on variable DECL if need be, typically after
+ the linkage of DECL has been modified. */
+
+static void
+adjust_var_decl_tls_model (tree decl)
+{
+ if (CP_DECL_THREAD_LOCAL_P (decl)
+ && !lookup_attribute ("tls_model", DECL_ATTRIBUTES (decl)))
+ set_decl_tls_model (decl, decl_default_tls_model (decl));
+}
+
/* Set DECL up to have the closest approximation of "initialized common"
linkage available. */
@@ -1888,6 +1899,9 @@ comdat_linkage (tree decl)
if (TREE_PUBLIC (decl))
DECL_COMDAT (decl) = 1;
+
+ if (VAR_P (decl))
+ adjust_var_decl_tls_model (decl);
}
/* For win32 we also want to put explicit instantiations in
@@ -1926,6 +1940,8 @@ maybe_make_one_only (tree decl)
/* Mark it needed so we don't forget to emit it. */
node->forced_by_abi = true;
TREE_USED (decl) = 1;
+
+ adjust_var_decl_tls_model (decl);
}
}
}
--- /dev/null
+++ gcc/testsuite/g++.dg/tls/pr85400.C
@@ -0,0 +1,24 @@
+// PR c++/85400
+// Testcase by Brian Vandenberg <phantall@gmail.com>
+
+// { dg-do link { target c++11 } }
+// { dg-require-effective-target fpic }
+// { dg-require-effective-target shared }
+// { dg-require-effective-target tls }
+// { dg-options "-shared -fPIC -O" }
+// { dg-add-options tls }
+
+struct Test
+{
+ int blah (int y)
+ {
+ thread_local int mything = 3;
+ mything = y > 0 ? y : mything;
+ return mything;
+ }
+};
+
+int stuff (Test& test, int y)
+{
+ return test.blah(y);
+}

View File

@ -0,0 +1,39 @@
2018-06-12 Jason Merrill <jason@redhat.com>
PR c++/86098 - ICE with template placeholder for TTP.
* typeck.c (structural_comptypes) [TEMPLATE_TYPE_PARM]: Check
CLASS_PLACEHOLDER_TEMPLATE.
--- gcc/cp/typeck.c
+++ gcc/cp/typeck.c
@@ -1375,6 +1375,11 @@ structural_comptypes (tree t1, tree t2, int strict)
template parameters set, they can't be equal. */
if (!comp_template_parms_position (t1, t2))
return false;
+ /* If T1 and T2 don't represent the same class template deduction,
+ they aren't equal. */
+ if (CLASS_PLACEHOLDER_TEMPLATE (t1)
+ != CLASS_PLACEHOLDER_TEMPLATE (t2))
+ return false;
/* Constrained 'auto's are distinct from parms that don't have the same
constraints. */
if (!equivalent_placeholder_constraints (t1, t2))
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp1z/class-deduction58.C
@@ -0,0 +1,16 @@
+// PR c++/86098
+// { dg-additional-options -std=c++17 }
+
+template <class _Res> class future;
+template <class T> T&& declval();
+
+template<template <class...> class T>
+struct construct_deduced {
+ template <class... AN>
+ using deduced_t = decltype(T{declval<AN>()...});
+ template<class... AN>
+ deduced_t<AN...> operator()(AN&&... an) const;
+};
+
+template<class T>
+future<T> future_from(T singleSender);

View File

@ -0,0 +1,30 @@
2018-12-06 Alexandre Oliva <aoliva@redhat.com>
PR c++/86747
* pt.c (tsubst_friend_class): Enter tsubsted class context.
--- gcc/cp/pt.c
+++ gcc/cp/pt.c
@@ -10558,7 +10558,10 @@ tsubst_friend_class (tree friend_tmpl, tree args)
if (TREE_CODE (context) == NAMESPACE_DECL)
push_nested_namespace (context);
else
- push_nested_class (context);
+ {
+ context = tsubst (context, args, tf_error, NULL_TREE);
+ push_nested_class (context);
+ }
tmpl = lookup_name_real (DECL_NAME (friend_tmpl), /*prefer_type=*/false,
/*non_class=*/false, /*block_p=*/false,
--- /dev/null
+++ gcc/testsuite/g++.dg/pr86747.C
@@ -0,0 +1,8 @@
+// { dg-do compile }
+
+template <typename T> class A {
+ template <void (A::*p)()> class C; // #1
+ template <void (A::*q)()> friend class C; // #2
+};
+
+A<double> a;

View File

@ -0,0 +1,40 @@
2019-04-19 Jakub Jelinek <jakub@redhat.com>
PR middle-end/90139
* tree-outof-ssa.c (get_temp_reg): If reg_mode is BLKmode, return
assign_temp instead of gen_reg_rtx.
--- /dev/null
+++ gcc/testsuite/gcc.c-torture/compile/pr90139.c
@@ -0,0 +1,20 @@
+/* PR middle-end/90139 */
+
+typedef float __attribute__((vector_size (sizeof (float)))) V;
+void bar (int, V *);
+int l;
+
+void
+foo (void)
+{
+ V n, b, o;
+ while (1)
+ switch (l)
+ {
+ case 0:
+ o = n;
+ n = b;
+ b = o;
+ bar (1, &o);
+ }
+}
--- gcc/tree-outof-ssa.c
+++ gcc/tree-outof-ssa.c
@@ -653,6 +653,8 @@ get_temp_reg (tree name)
tree type = TREE_TYPE (name);
int unsignedp;
machine_mode reg_mode = promote_ssa_mode (name, &unsignedp);
+ if (reg_mode == BLKmode)
+ return assign_temp (type, 0, 0);
rtx x = gen_reg_rtx (reg_mode);
if (POINTER_TYPE_P (type))
mark_reg_pointer (x, TYPE_ALIGN (TREE_TYPE (type)));

View File

@ -0,0 +1,55 @@
2019-07-04 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/90756
* explow.c (promote_ssa_mode): Always use TYPE_MODE, don't bypass it
for VECTOR_TYPE_P.
--- gcc/explow.c
+++ gcc/explow.c
@@ -892,16 +892,7 @@ promote_ssa_mode (const_tree name, int *punsignedp)
tree type = TREE_TYPE (name);
int unsignedp = TYPE_UNSIGNED (type);
- machine_mode mode = TYPE_MODE (type);
-
- /* Bypass TYPE_MODE when it maps vector modes to BLKmode. */
- if (mode == BLKmode)
- {
- gcc_assert (VECTOR_TYPE_P (type));
- mode = type->type_common.mode;
- }
-
- machine_mode pmode = promote_mode (type, mode, &unsignedp);
+ machine_mode pmode = promote_mode (type, TYPE_MODE (type), &unsignedp);
if (punsignedp)
*punsignedp = unsignedp;
--- /dev/null
+++ gcc/testsuite/gcc.dg/pr90756.c
@@ -0,0 +1,26 @@
+/* PR rtl-optimization/90756 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wno-psabi" } */
+/* { dg-additional-options "-mno-sse" { target ia32 } } */
+
+typedef float B __attribute__((vector_size(4 * sizeof (float))));
+typedef unsigned long long C __attribute__((vector_size(4 * sizeof (long long))));
+typedef short D __attribute__((vector_size(4 * sizeof (short))));
+B z;
+void foo (C);
+C bar (D);
+B baz ();
+D qux (B);
+
+void
+quux (int x)
+{
+ B n = z, b = z;
+ while (1)
+ switch (x)
+ {
+ case 0: n = baz (); /* FALLTHRU */
+ case 1: { B o = n; n = b; b = o; } /* FALLTHRU */
+ case 2: { D u = qux (b); C v = bar (u); foo (v); }
+ }
+}

View File

@ -1,85 +0,0 @@
2018-08-03 David Malcolm <dmalcolm@redhat.com>
* doc/gcov.texi (-x): Remove duplicate "to".
* doc/invoke.texi (-Wnoexcept-type): Remove duplicate "calls".
(-Wif-not-aligned): Remove duplicate "is".
(-flto): Remove duplicate "the".
(MicroBlaze Options): In examples of "-mcpu=cpu-type", remove
duplicate "v5.00.b".
(MSP430 Options): Remove duplicate "and" from the description
of "-mgprel-sec=regexp".
(x86 Options): Remove duplicate copies of "vmldLog102" and
vmlsLog104 from description of "-mveclibabi=type".
--- gcc/doc/gcov.texi
+++ gcc/doc/gcov.texi
@@ -340,7 +340,7 @@ Print verbose informations related to basic blocks and arcs.
@item -x
@itemx --hash-filenames
-By default, gcov uses the full pathname of the source files to to create
+By default, gcov uses the full pathname of the source files to create
an output filename. This can lead to long filenames that can overflow
filesystem limits. This option creates names of the form
@file{@var{source-file}##@var{md5}.gcov},
--- gcc/doc/invoke.texi
+++ gcc/doc/invoke.texi
@@ -3056,7 +3056,7 @@ void h() @{ f(g); @}
@end smallexample
@noindent
-In C++14, @code{f} calls calls @code{f<void(*)()>}, but in
+In C++14, @code{f} calls @code{f<void(*)()>}, but in
C++17 it calls @code{f<void(*)()noexcept>}.
@item -Wclass-memaccess @r{(C++ and Objective-C++ only)}
@@ -4587,7 +4587,7 @@ The @option{-Wimplicit-fallthrough=3} warning is enabled by @option{-Wextra}.
@opindex Wif-not-aligned
@opindex Wno-if-not-aligned
Control if warning triggered by the @code{warn_if_not_aligned} attribute
-should be issued. This is is enabled by default.
+should be issued. This is enabled by default.
Use @option{-Wno-if-not-aligned} to disable it.
@item -Wignored-qualifiers @r{(C and C++ only)}
@@ -9613,7 +9613,7 @@ for LTO, use @command{gcc-ar} and @command{gcc-ranlib} instead of @command{ar}
and @command{ranlib};
to show the symbols of object files with GIMPLE bytecode, use
@command{gcc-nm}. Those commands require that @command{ar}, @command{ranlib}
-and @command{nm} have been compiled with plugin support. At link time, use the the
+and @command{nm} have been compiled with plugin support. At link time, use the
flag @option{-fuse-linker-plugin} to ensure that the library participates in
the LTO optimization process:
@@ -20159,7 +20159,7 @@ Use features of, and schedule code for, the given CPU.
Supported values are in the format @samp{v@var{X}.@var{YY}.@var{Z}},
where @var{X} is a major version, @var{YY} is the minor version, and
@var{Z} is compatibility code. Example values are @samp{v3.00.a},
-@samp{v4.00.b}, @samp{v5.00.a}, @samp{v5.00.b}, @samp{v5.00.b}, @samp{v6.00.a}.
+@samp{v4.00.b}, @samp{v5.00.a}, @samp{v5.00.b}, @samp{v6.00.a}.
@item -mxl-soft-mul
@opindex mxl-soft-mul
@@ -21839,7 +21839,7 @@ GP-relative addressing. It is most useful in conjunction with
The @var{regexp} is a POSIX Extended Regular Expression.
This option does not affect the behavior of the @option{-G} option, and
-and the specified sections are in addition to the standard @code{.sdata}
+the specified sections are in addition to the standard @code{.sdata}
and @code{.sbss} small-data sections that are recognized by @option{-mgpopt}.
@item -mr0rel-sec=@var{regexp}
@@ -27613,11 +27613,11 @@ To use this option, both @option{-ftree-vectorize} and
ABI-compatible library must be specified at link time.
GCC currently emits calls to @code{vmldExp2},
-@code{vmldLn2}, @code{vmldLog102}, @code{vmldLog102}, @code{vmldPow2},
+@code{vmldLn2}, @code{vmldLog102}, @code{vmldPow2},
@code{vmldTanh2}, @code{vmldTan2}, @code{vmldAtan2}, @code{vmldAtanh2},
@code{vmldCbrt2}, @code{vmldSinh2}, @code{vmldSin2}, @code{vmldAsinh2},
@code{vmldAsin2}, @code{vmldCosh2}, @code{vmldCos2}, @code{vmldAcosh2},
-@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4}, @code{vmlsLog104},
+@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4},
@code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, @code{vmlsTan4},
@code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, @code{vmlsSinh4},
@code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, @code{vmlsCosh4},

View File

@ -1,124 +0,0 @@
commit e7c4d49ab27338e6bc8b0272c4036da58482bde0
Author: krebbel <krebbel@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Mon Nov 26 15:15:57 2018 +0000
S/390: Fix flogr RTX.
The flogr instruction uses a 64 bit register pair target operand. In
the RTX we model this as a write to a TImode register. Unfortunately
the RTX's being assigned to the two parts of the target operand were
swapped. This is no problem if in the end the flogr instruction will
be emitted since the instruction still does what the clzdi expander
expects. However, a problem arises when the RTX is used to optimize
CLZ for a constant input operand. Even then it matters only if the
expression couldn't be folded on tree level already.
In the testcase this happened thanks to loop unrolling on RTL level.
The iteration variable is used as an argument to the clz
builtin. Due to the loop unrolling it becomes a constant and after
folding the broken RTX leads to a wrong assumption.
gcc/ChangeLog:
2018-11-26 Andreas Krebbel <krebbel@linux.ibm.com>
Backport from mainline
2018-11-20 Andreas Krebbel <krebbel@linux.ibm.com>
* config/s390/s390.md ("clztidi2"): Swap the RTX's written to the
DImode parts of the target operand.
gcc/testsuite/ChangeLog:
2018-11-26 Andreas Krebbel <krebbel@linux.ibm.com>
Backport from mainline
2018-11-20 Andreas Krebbel <krebbel@linux.ibm.com>
* gcc.target/s390/flogr-1.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-8-branch@266465 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index c4d391bc9b5..53bb1985285 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -8861,17 +8861,17 @@
DONE;
})
+; CLZ result is in hard reg op0 - this is the high part of the target operand
+; The source with the left-most one bit cleared is in hard reg op0 + 1 - the low part
(define_insn "clztidi2"
[(set (match_operand:TI 0 "register_operand" "=d")
(ior:TI
- (ashift:TI
- (zero_extend:TI
- (xor:DI (match_operand:DI 1 "register_operand" "d")
- (lshiftrt (match_operand:DI 2 "const_int_operand" "")
- (subreg:SI (clz:DI (match_dup 1)) 4))))
-
- (const_int 64))
- (zero_extend:TI (clz:DI (match_dup 1)))))
+ (ashift:TI (zero_extend:TI (clz:DI (match_operand:DI 1 "register_operand" "d")))
+ (const_int 64))
+ (zero_extend:TI
+ (xor:DI (match_dup 1)
+ (lshiftrt (match_operand:DI 2 "const_int_operand" "")
+ (subreg:SI (clz:DI (match_dup 1)) 4))))))
(clobber (reg:CC CC_REGNUM))]
"UINTVAL (operands[2]) == HOST_WIDE_INT_1U << 63
&& TARGET_EXTIMM && TARGET_ZARCH"
diff --git a/gcc/testsuite/gcc.target/s390/flogr-1.c b/gcc/testsuite/gcc.target/s390/flogr-1.c
new file mode 100644
index 00000000000..a3869000d62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/flogr-1.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -funroll-loops -march=z9-109" } */
+/* { dg-require-effective-target stdint_types } */
+
+/* Folding of the FLOGR caused a wrong value to be returned by
+ __builtin_clz becuase of a problem in the RTX we emit for FLOGR.
+ The problematic folding can only be triggered with constants inputs
+ introduced on RTL level. In this case it happens with loop
+ unrolling. */
+
+#include <stdint.h>
+#include <assert.h>
+
+static inline uint32_t pow2_ceil_u32(uint32_t x) {
+ if (x <= 1) {
+ return x;
+ }
+ int msb_on_index;
+ msb_on_index = (31 ^ __builtin_clz(x - 1));
+ assert(msb_on_index < 31);
+ return 1U << (msb_on_index + 1);
+}
+
+void __attribute__((noinline,noclone))
+die (int a)
+{
+ if (a)
+ __builtin_abort ();
+}
+
+void test_pow2_ceil_u32(void) {
+ unsigned i;
+
+ for (i = 0; i < 18; i++) {
+ uint32_t a_ = (pow2_ceil_u32(((uint32_t)1) << i));
+ if (!(a_ == (((uint32_t)1) << i))) {
+ die(1);
+ }
+ }
+}
+
+int
+main(void) {
+ test_pow2_ceil_u32();
+
+ return 0;
+}

View File

@ -1,572 +0,0 @@
commit 87c504d3b293ebe6d36f3b50696cd307b02b0daa
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Tue Jun 19 21:23:39 2018 +0000
2018-06-19 Aaron Sawdey <acsawdey@linux.ibm.com>
* config/rs6000/rs6000-string.c (select_block_compare_mode): Check
TARGET_EFFICIENT_OVERLAPPING_UNALIGNED here instead of in caller.
(do_and3, do_and3_mask, do_compb3, do_rotl3): New functions.
(expand_block_compare): Change select_block_compare_mode call.
(expand_strncmp_align_check): Use new functions, fix comment.
(emit_final_str_compare_gpr): New function.
(expand_strn_compare): Refactor and clean up code.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Remove *.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@261769 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 632d3359711..f9dd54eb639 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -266,6 +266,7 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
else if (bytes == GET_MODE_SIZE (QImode))
return QImode;
else if (bytes < GET_MODE_SIZE (SImode)
+ && TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
&& offset >= GET_MODE_SIZE (SImode) - bytes)
/* This matches the case were we have SImode and 3 bytes
and offset >= 1 and permits us to move back one and overlap
@@ -273,6 +274,7 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
unwanted bytes off of the input. */
return SImode;
else if (word_mode_ok && bytes < UNITS_PER_WORD
+ && TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
&& offset >= UNITS_PER_WORD-bytes)
/* Similarly, if we can use DImode it will get matched here and
can do an overlapping read that ends at the end of the block. */
@@ -408,6 +410,54 @@ do_add3 (rtx dest, rtx src1, rtx src2)
emit_insn (gen_addsi3 (dest, src1, src2));
}
+/* Emit an and of the proper mode for DEST.
+
+ DEST is the destination register for the and.
+ SRC1 is the first and input.
+ SRC2 is the second and input.
+
+ Computes DEST = SRC1&SRC2. */
+static void
+do_and3 (rtx dest, rtx src1, rtx src2)
+{
+ if (GET_MODE (dest) == DImode)
+ emit_insn (gen_anddi3 (dest, src1, src2));
+ else
+ emit_insn (gen_andsi3 (dest, src1, src2));
+}
+
+/* Emit an cmpb of the proper mode for DEST.
+
+ DEST is the destination register for the cmpb.
+ SRC1 is the first input.
+ SRC2 is the second input.
+
+ Computes cmpb of SRC1, SRC2. */
+static void
+do_cmpb3 (rtx dest, rtx src1, rtx src2)
+{
+ if (GET_MODE (dest) == DImode)
+ emit_insn (gen_cmpbdi3 (dest, src1, src2));
+ else
+ emit_insn (gen_cmpbsi3 (dest, src1, src2));
+}
+
+/* Emit a rotl of the proper mode for DEST.
+
+ DEST is the destination register for the and.
+ SRC1 is the first and input.
+ SRC2 is the second and input.
+
+ Computes DEST = SRC1 rotated left by SRC2. */
+static void
+do_rotl3 (rtx dest, rtx src1, rtx src2)
+{
+ if (GET_MODE (dest) == DImode)
+ emit_insn (gen_rotldi3 (dest, src1, src2));
+ else
+ emit_insn (gen_rotlsi3 (dest, src1, src2));
+}
+
/* Generate rtl for a load, shift, and compare of less than a full word.
LOAD_MODE is the machine mode for the loads.
@@ -1395,11 +1445,8 @@ expand_block_compare (rtx operands[])
while (bytes > 0)
{
unsigned int align = compute_current_alignment (base_align, offset);
- if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
- load_mode = select_block_compare_mode (offset, bytes, align,
- word_mode_ok);
- else
- load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
+ load_mode = select_block_compare_mode (offset, bytes,
+ align, word_mode_ok);
load_mode_size = GET_MODE_SIZE (load_mode);
if (bytes >= load_mode_size)
cmp_bytes = load_mode_size;
@@ -1627,22 +1674,19 @@ expand_block_compare (rtx operands[])
return true;
}
-/* Generate alignment check and branch code to set up for
+/* Generate page crossing check and branch code to set up for
strncmp when we don't have DI alignment.
STRNCMP_LABEL is the label to branch if there is a page crossing.
- SRC is the string pointer to be examined.
+ SRC_ADDR is the string address to be examined.
BYTES is the max number of bytes to compare. */
static void
-expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
+expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes)
{
rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
- rtx src_check = copy_addr_to_reg (XEXP (src, 0));
- if (GET_MODE (src_check) == SImode)
- emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
- else
- emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
+ rtx src_pgoff = gen_reg_rtx (GET_MODE (src_addr));
+ do_and3 (src_pgoff, src_addr, GEN_INT (0xfff));
rtx cond = gen_reg_rtx (CCmode);
- emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
+ emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_pgoff,
GEN_INT (4096 - bytes)));
rtx cmp_rtx = gen_rtx_GE (VOIDmode, cond, const0_rtx);
@@ -1654,6 +1698,76 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
LABEL_NUSES (strncmp_label) += 1;
}
+/* Generate the final sequence that identifies the differing
+ byte and generates the final result, taking into account
+ zero bytes:
+
+ cmpb cmpb_result1, src1, src2
+ cmpb cmpb_result2, src1, zero
+ orc cmpb_result1, cmp_result1, cmpb_result2
+ cntlzd get bit of first zero/diff byte
+ addi convert for rldcl use
+ rldcl rldcl extract diff/zero byte
+ subf subtract for final result
+
+ STR1 is the reg rtx for data from string 1.
+ STR2 is the reg rtx for data from string 2.
+ RESULT is the reg rtx for the comparison result. */
+
+static void
+emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
+{
+ machine_mode m = GET_MODE (str1);
+ rtx cmpb_diff = gen_reg_rtx (m);
+ rtx cmpb_zero = gen_reg_rtx (m);
+ rtx rot_amt = gen_reg_rtx (m);
+ rtx zero_reg = gen_reg_rtx (m);
+
+ rtx rot1_1 = gen_reg_rtx (m);
+ rtx rot1_2 = gen_reg_rtx (m);
+ rtx rot2_1 = gen_reg_rtx (m);
+ rtx rot2_2 = gen_reg_rtx (m);
+
+ if (m == SImode)
+ {
+ emit_insn (gen_cmpbsi3 (cmpb_diff, str1, str2));
+ emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
+ emit_insn (gen_cmpbsi3 (cmpb_zero, str1, zero_reg));
+ emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
+ emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
+ emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
+ emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
+ emit_insn (gen_rotlsi3 (rot1_1, str1,
+ gen_lowpart (SImode, rot_amt)));
+ emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
+ emit_insn (gen_rotlsi3 (rot2_1, str2,
+ gen_lowpart (SImode, rot_amt)));
+ emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
+ emit_insn (gen_subsi3 (result, rot1_2, rot2_2));
+ }
+ else if (m == DImode)
+ {
+ emit_insn (gen_cmpbdi3 (cmpb_diff, str1, str2));
+ emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
+ emit_insn (gen_cmpbdi3 (cmpb_zero, str1, zero_reg));
+ emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
+ emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
+ emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
+ emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
+ emit_insn (gen_rotldi3 (rot1_1, str1,
+ gen_lowpart (SImode, rot_amt)));
+ emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
+ emit_insn (gen_rotldi3 (rot2_1, str2,
+ gen_lowpart (SImode, rot_amt)));
+ emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
+ emit_insn (gen_subdi3 (result, rot1_2, rot2_2));
+ }
+ else
+ gcc_unreachable ();
+
+ return;
+}
+
/* Expand a string compare operation with length, and return
true if successful. Return false if we should let the
compiler generate normal code, probably a strncmp call.
@@ -1684,8 +1798,8 @@ expand_strn_compare (rtx operands[], int no_length)
align_rtx = operands[4];
}
unsigned HOST_WIDE_INT cmp_bytes = 0;
- rtx src1 = orig_src1;
- rtx src2 = orig_src2;
+ rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
+ rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
/* If we have a length, it must be constant. This simplifies things
a bit as we don't have to generate code to check if we've exceeded
@@ -1698,8 +1812,8 @@ expand_strn_compare (rtx operands[], int no_length)
return false;
unsigned int base_align = UINTVAL (align_rtx);
- int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
- int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
+ unsigned int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
+ unsigned int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
/* targetm.slow_unaligned_access -- don't do unaligned stuff. */
if (targetm.slow_unaligned_access (word_mode, align1)
@@ -1751,8 +1865,9 @@ expand_strn_compare (rtx operands[], int no_length)
rtx final_move_label = gen_label_rtx ();
rtx final_label = gen_label_rtx ();
rtx begin_compare_label = NULL;
+ unsigned int required_align = 8;
- if (base_align < 8)
+ if (base_align < required_align)
{
/* Generate code that checks distance to 4k boundary for this case. */
begin_compare_label = gen_label_rtx ();
@@ -1775,14 +1890,14 @@ expand_strn_compare (rtx operands[], int no_length)
}
else
{
- align_test = ROUND_UP (align_test, 8);
- base_align = 8;
+ align_test = ROUND_UP (align_test, required_align);
+ base_align = required_align;
}
- if (align1 < 8)
- expand_strncmp_align_check (strncmp_label, src1, align_test);
- if (align2 < 8)
- expand_strncmp_align_check (strncmp_label, src2, align_test);
+ if (align1 < required_align)
+ expand_strncmp_align_check (strncmp_label, src1_addr, align_test);
+ if (align2 < required_align)
+ expand_strncmp_align_check (strncmp_label, src2_addr, align_test);
/* Now generate the following sequence:
- branch to begin_compare
@@ -1799,25 +1914,13 @@ expand_strn_compare (rtx operands[], int no_length)
emit_label (strncmp_label);
- if (!REG_P (XEXP (src1, 0)))
- {
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
- src1 = replace_equiv_address (src1, src1_reg);
- }
-
- if (!REG_P (XEXP (src2, 0)))
- {
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
- src2 = replace_equiv_address (src2, src2_reg);
- }
-
if (no_length)
{
tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
target, LCT_NORMAL, GET_MODE (target),
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
- force_reg (Pmode, XEXP (src2, 0)), Pmode);
+ force_reg (Pmode, src1_addr), Pmode,
+ force_reg (Pmode, src2_addr), Pmode);
}
else
{
@@ -1830,8 +1933,8 @@ expand_strn_compare (rtx operands[], int no_length)
tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
target, LCT_NORMAL, GET_MODE (target),
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
- force_reg (Pmode, XEXP (src2, 0)), Pmode,
+ force_reg (Pmode, src1_addr), Pmode,
+ force_reg (Pmode, src2_addr), Pmode,
len_rtx, Pmode);
}
@@ -1847,12 +1950,12 @@ expand_strn_compare (rtx operands[], int no_length)
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
- /* Generate sequence of ld/ldbrx, cmpb to compare out
+ /* Generate a sequence of GPR or VEC/VSX instructions to compare out
to the length specified. */
unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
while (bytes_to_compare > 0)
{
- /* Compare sequence:
+ /* GPR compare sequence:
check each 8B with: ld/ld cmpd bne
If equal, use rldicr/cmpb to check for zero byte.
cleanup code at end:
@@ -1866,13 +1969,10 @@ expand_strn_compare (rtx operands[], int no_length)
The last compare can branch around the cleanup code if the
result is zero because the strings are exactly equal. */
+
unsigned int align = compute_current_alignment (base_align, offset);
- if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
- load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
- word_mode_ok);
- else
- load_mode = select_block_compare_mode (0, bytes_to_compare, align,
- word_mode_ok);
+ load_mode = select_block_compare_mode (offset, bytes_to_compare,
+ align, word_mode_ok);
load_mode_size = GET_MODE_SIZE (load_mode);
if (bytes_to_compare >= load_mode_size)
cmp_bytes = load_mode_size;
@@ -1895,25 +1995,10 @@ expand_strn_compare (rtx operands[], int no_length)
rid of the extra bytes. */
cmp_bytes = bytes_to_compare;
- src1 = adjust_address (orig_src1, load_mode, offset);
- src2 = adjust_address (orig_src2, load_mode, offset);
-
- if (!REG_P (XEXP (src1, 0)))
- {
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
- src1 = replace_equiv_address (src1, src1_reg);
- }
- set_mem_size (src1, load_mode_size);
-
- if (!REG_P (XEXP (src2, 0)))
- {
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
- src2 = replace_equiv_address (src2, src2_reg);
- }
- set_mem_size (src2, load_mode_size);
-
- do_load_for_compare (tmp_reg_src1, src1, load_mode);
- do_load_for_compare (tmp_reg_src2, src2, load_mode);
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
/* We must always left-align the data we read, and
clear any bytes to the right that are beyond the string.
@@ -1926,16 +2011,8 @@ expand_strn_compare (rtx operands[], int no_length)
{
/* Rotate left first. */
rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
- if (word_mode == DImode)
- {
- emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
- emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
- }
- else
- {
- emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
- emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
- }
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
}
if (cmp_bytes < word_mode_size)
@@ -1944,16 +2021,8 @@ expand_strn_compare (rtx operands[], int no_length)
turned into a rldicr instruction. */
HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- if (word_mode == DImode)
- {
- emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
- emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
- }
- else
- {
- emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
- emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
- }
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
}
/* Cases to handle. A and B are chunks of the two strings.
@@ -2010,31 +2079,16 @@ expand_strn_compare (rtx operands[], int no_length)
rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
rtx condz = gen_reg_rtx (CCmode);
rtx zero_reg = gen_reg_rtx (word_mode);
- if (word_mode == SImode)
- {
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
- if (cmp_bytes < word_mode_size)
- {
- /* Don't want to look at zero bytes past end. */
- HOST_WIDE_INT mb =
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
- }
- }
- else
+ emit_move_insn (zero_reg, GEN_INT (0));
+ do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
+
+ if (cmp_bytes < word_mode_size)
{
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
- if (cmp_bytes < word_mode_size)
- {
- /* Don't want to look at zero bytes past end. */
- HOST_WIDE_INT mb =
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
- }
+ /* Don't want to look at zero bytes past end. */
+ HOST_WIDE_INT mb =
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
+ do_and3 (cmpb_zero, cmpb_zero, mask);
}
emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
@@ -2054,22 +2108,10 @@ expand_strn_compare (rtx operands[], int no_length)
if (equality_compare_rest)
{
/* Update pointers past what has been compared already. */
- src1 = adjust_address (orig_src1, load_mode, offset);
- src2 = adjust_address (orig_src2, load_mode, offset);
-
- if (!REG_P (XEXP (src1, 0)))
- {
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
- src1 = replace_equiv_address (src1, src1_reg);
- }
- set_mem_size (src1, load_mode_size);
-
- if (!REG_P (XEXP (src2, 0)))
- {
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
- src2 = replace_equiv_address (src2, src2_reg);
- }
- set_mem_size (src2, load_mode_size);
+ rtx src1 = force_reg (Pmode,
+ gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset)));
+ rtx src2 = force_reg (Pmode,
+ gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset)));
/* Construct call to strcmp/strncmp to compare the rest of the string. */
if (no_length)
@@ -2077,8 +2119,7 @@ expand_strn_compare (rtx operands[], int no_length)
tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
target, LCT_NORMAL, GET_MODE (target),
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
- force_reg (Pmode, XEXP (src2, 0)), Pmode);
+ src1, Pmode, src2, Pmode);
}
else
{
@@ -2087,9 +2128,7 @@ expand_strn_compare (rtx operands[], int no_length)
tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
target, LCT_NORMAL, GET_MODE (target),
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
- force_reg (Pmode, XEXP (src2, 0)), Pmode,
- len_rtx, Pmode);
+ src1, Pmode, src2, Pmode, len_rtx, Pmode);
}
rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
@@ -2102,63 +2141,7 @@ expand_strn_compare (rtx operands[], int no_length)
if (cleanup_label)
emit_label (cleanup_label);
- /* Generate the final sequence that identifies the differing
- byte and generates the final result, taking into account
- zero bytes:
-
- cmpb cmpb_result1, src1, src2
- cmpb cmpb_result2, src1, zero
- orc cmpb_result1, cmp_result1, cmpb_result2
- cntlzd get bit of first zero/diff byte
- addi convert for rldcl use
- rldcl rldcl extract diff/zero byte
- subf subtract for final result
- */
-
- rtx cmpb_diff = gen_reg_rtx (word_mode);
- rtx cmpb_zero = gen_reg_rtx (word_mode);
- rtx rot_amt = gen_reg_rtx (word_mode);
- rtx zero_reg = gen_reg_rtx (word_mode);
-
- rtx rot1_1 = gen_reg_rtx (word_mode);
- rtx rot1_2 = gen_reg_rtx (word_mode);
- rtx rot2_1 = gen_reg_rtx (word_mode);
- rtx rot2_2 = gen_reg_rtx (word_mode);
-
- if (word_mode == SImode)
- {
- emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
- emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
- emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
- emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
- emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
- emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
- gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
- emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
- gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
- emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
- }
- else
- {
- emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
- emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
- emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
- emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
- emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
- emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
- gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
- emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
- gen_lowpart (SImode, rot_amt)));
- emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
- emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
- }
+ emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
emit_label (final_move_label);
emit_insn (gen_movsi (target,
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0fc77aa18b0..e6921e96a3d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1210,7 +1210,7 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
-(define_insn "*vsx_mov<mode>_64bit"
+(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, <VSa>, <VSa>, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wo, v,

View File

@ -1,472 +0,0 @@
commit c7a833caa029b84ad579c3fabe006a80f718d7e1
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Thu Aug 2 18:11:54 2018 +0000
2018-07-31 Aaron Sawdey <acsawdey@linux.ibm.com>
* config/rs6000/rs6000-string.c (select_block_compare_mode): Move test
for word_mode_ok here instead of passing as argument.
(expand_block_compare): Change select_block_compare_mode() call.
(expand_strncmp_gpr_sequence): New function.
(expand_strn_compare): Make use of expand_strncmp_gpr_sequence.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@263273 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index f9dd54eb639..451e9ed33da 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -238,13 +238,11 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
OFFSET is the current read offset from the beginning of the block.
BYTES is the number of bytes remaining to be read.
- ALIGN is the minimum alignment of the memory blocks being compared in bytes.
- WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
- the largest allowable mode. */
+ ALIGN is the minimum alignment of the memory blocks being compared in bytes. */
static machine_mode
select_block_compare_mode (unsigned HOST_WIDE_INT offset,
unsigned HOST_WIDE_INT bytes,
- unsigned HOST_WIDE_INT align, bool word_mode_ok)
+ unsigned HOST_WIDE_INT align)
{
/* First see if we can do a whole load unit
as that will be more efficient than a larger load + shift. */
@@ -257,6 +255,11 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
/* The most we can read without potential page crossing. */
unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
+ /* If we have an LE target without ldbrx and word_mode is DImode,
+ then we must avoid using word_mode. */
+ int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
+ && word_mode == DImode);
+
if (word_mode_ok && bytes >= UNITS_PER_WORD)
return word_mode;
else if (bytes == GET_MODE_SIZE (SImode))
@@ -1382,16 +1385,11 @@ expand_block_compare (rtx operands[])
else
cond = gen_reg_rtx (CCmode);
- /* If we have an LE target without ldbrx and word_mode is DImode,
- then we must avoid using word_mode. */
- int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
- && word_mode == DImode);
-
/* Strategy phase. How many ops will this take and should we expand it? */
unsigned HOST_WIDE_INT offset = 0;
machine_mode load_mode =
- select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
+ select_block_compare_mode (offset, bytes, base_align);
unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
/* We don't want to generate too much code. The loop code can take
@@ -1445,8 +1443,7 @@ expand_block_compare (rtx operands[])
while (bytes > 0)
{
unsigned int align = compute_current_alignment (base_align, offset);
- load_mode = select_block_compare_mode (offset, bytes,
- align, word_mode_ok);
+ load_mode = select_block_compare_mode (offset, bytes, align);
load_mode_size = GET_MODE_SIZE (load_mode);
if (bytes >= load_mode_size)
cmp_bytes = load_mode_size;
@@ -1698,6 +1695,189 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
LABEL_NUSES (strncmp_label) += 1;
}
+/* Generate the sequence of compares for strcmp/strncmp using gpr instructions.
+ BYTES_TO_COMPARE is the number of bytes to be compared.
+ BASE_ALIGN is the smaller of the alignment of the two strings.
+ ORIG_SRC1 is the unmodified rtx for the first string.
+ ORIG_SRC2 is the unmodified rtx for the second string.
+ TMP_REG_SRC1 is the register for loading the first string.
+ TMP_REG_SRC2 is the register for loading the second string.
+ RESULT_REG is the rtx for the result register.
+ EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
+ to strcmp/strncmp if we have equality at the end of the inline comparison.
+ CLEANUP_LABEL is rtx for a label we generate if we need code to clean up
+ and generate the final comparison result.
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
+ set the final result. */
+static void
+expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
+ unsigned int base_align,
+ rtx orig_src1, rtx orig_src2,
+ rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
+ bool equality_compare_rest, rtx &cleanup_label,
+ rtx final_move_label)
+{
+ unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
+ machine_mode load_mode;
+ unsigned int load_mode_size;
+ unsigned HOST_WIDE_INT cmp_bytes = 0;
+ unsigned HOST_WIDE_INT offset = 0;
+ rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
+ rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
+
+ while (bytes_to_compare > 0)
+ {
+ /* GPR compare sequence:
+ check each 8B with: ld/ld cmpd bne
+ If equal, use rldicr/cmpb to check for zero byte.
+ cleanup code at end:
+ cmpb get byte that differs
+ cmpb look for zero byte
+ orc combine
+ cntlzd get bit of first zero/diff byte
+ subfic convert for rldcl use
+ rldcl rldcl extract diff/zero byte
+ subf subtract for final result
+
+ The last compare can branch around the cleanup code if the
+ result is zero because the strings are exactly equal. */
+
+ unsigned int align = compute_current_alignment (base_align, offset);
+ load_mode = select_block_compare_mode (offset, bytes_to_compare, align);
+ load_mode_size = GET_MODE_SIZE (load_mode);
+ if (bytes_to_compare >= load_mode_size)
+ cmp_bytes = load_mode_size;
+ else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
+ {
+ /* Move this load back so it doesn't go past the end.
+ P8/P9 can do this efficiently. */
+ unsigned int extra_bytes = load_mode_size - bytes_to_compare;
+ cmp_bytes = bytes_to_compare;
+ if (extra_bytes < offset)
+ {
+ offset -= extra_bytes;
+ cmp_bytes = load_mode_size;
+ bytes_to_compare = cmp_bytes;
+ }
+ }
+ else
+ /* P7 and earlier can't do the overlapping load trick fast,
+ so this forces a non-overlapping load and a shift to get
+ rid of the extra bytes. */
+ cmp_bytes = bytes_to_compare;
+
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
+
+ /* We must always left-align the data we read, and
+ clear any bytes to the right that are beyond the string.
+ Otherwise the cmpb sequence won't produce the correct
+ results. The beginning of the compare will be done
+ with word_mode so will not have any extra shifts or
+ clear rights. */
+
+ if (load_mode_size < word_mode_size)
+ {
+ /* Rotate left first. */
+ rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
+ }
+
+ if (cmp_bytes < word_mode_size)
+ {
+ /* Now clear right. This plus the rotate can be
+ turned into a rldicr instruction. */
+ HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
+ }
+
+ /* Cases to handle. A and B are chunks of the two strings.
+ 1: Not end of comparison:
+ A != B: branch to cleanup code to compute result.
+ A == B: check for 0 byte, next block if not found.
+ 2: End of the inline comparison:
+ A != B: branch to cleanup code to compute result.
+ A == B: check for 0 byte, call strcmp/strncmp
+ 3: compared requested N bytes:
+ A == B: branch to result 0.
+ A != B: cleanup code to compute result. */
+
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
+
+ rtx dst_label;
+ if (remain > 0 || equality_compare_rest)
+ {
+ /* Branch to cleanup code, otherwise fall through to do
+ more compares. */
+ if (!cleanup_label)
+ cleanup_label = gen_label_rtx ();
+ dst_label = cleanup_label;
+ }
+ else
+ /* Branch to end and produce result of 0. */
+ dst_label = final_move_label;
+
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
+ rtx cond = gen_reg_rtx (CCmode);
+
+ /* Always produce the 0 result, it is needed if
+ cmpb finds a 0 byte in this chunk. */
+ rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
+ rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
+
+ rtx cmp_rtx;
+ if (remain == 0 && !equality_compare_rest)
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
+ else
+ cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
+ lab_ref, pc_rtx);
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+ JUMP_LABEL (j) = dst_label;
+ LABEL_NUSES (dst_label) += 1;
+
+ if (remain > 0 || equality_compare_rest)
+ {
+ /* Generate a cmpb to test for a 0 byte and branch
+ to final result if found. */
+ rtx cmpb_zero = gen_reg_rtx (word_mode);
+ rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
+ rtx condz = gen_reg_rtx (CCmode);
+ rtx zero_reg = gen_reg_rtx (word_mode);
+ emit_move_insn (zero_reg, GEN_INT (0));
+ do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
+
+ if (cmp_bytes < word_mode_size)
+ {
+ /* Don't want to look at zero bytes past end. */
+ HOST_WIDE_INT mb =
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
+ do_and3 (cmpb_zero, cmpb_zero, mask);
+ }
+
+ emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
+ rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
+ lab_ref_fin, pc_rtx);
+ rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+ JUMP_LABEL (j2) = final_move_label;
+ LABEL_NUSES (final_move_label) += 1;
+
+ }
+
+ offset += cmp_bytes;
+ bytes_to_compare -= cmp_bytes;
+ }
+
+}
+
/* Generate the final sequence that identifies the differing
byte and generates the final result, taking into account
zero bytes:
@@ -1797,7 +1977,7 @@ expand_strn_compare (rtx operands[], int no_length)
bytes_rtx = operands[3];
align_rtx = operands[4];
}
- unsigned HOST_WIDE_INT cmp_bytes = 0;
+
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
@@ -1822,11 +2002,6 @@ expand_strn_compare (rtx operands[], int no_length)
gcc_assert (GET_MODE (target) == SImode);
- /* If we have an LE target without ldbrx and word_mode is DImode,
- then we must avoid using word_mode. */
- int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
- && word_mode == DImode);
-
unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
unsigned HOST_WIDE_INT offset = 0;
@@ -1839,7 +2014,7 @@ expand_strn_compare (rtx operands[], int no_length)
bytes = UINTVAL (bytes_rtx);
machine_mode load_mode =
- select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
+ select_block_compare_mode (0, bytes, base_align);
unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
compare_length = rs6000_string_compare_inline_limit * load_mode_size;
@@ -1867,6 +2042,8 @@ expand_strn_compare (rtx operands[], int no_length)
rtx begin_compare_label = NULL;
unsigned int required_align = 8;
+ required_align = 8;
+
if (base_align < required_align)
{
/* Generate code that checks distance to 4k boundary for this case. */
@@ -1952,159 +2129,15 @@ expand_strn_compare (rtx operands[], int no_length)
/* Generate a sequence of GPR or VEC/VSX instructions to compare out
to the length specified. */
- unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
- while (bytes_to_compare > 0)
- {
- /* GPR compare sequence:
- check each 8B with: ld/ld cmpd bne
- If equal, use rldicr/cmpb to check for zero byte.
- cleanup code at end:
- cmpb get byte that differs
- cmpb look for zero byte
- orc combine
- cntlzd get bit of first zero/diff byte
- subfic convert for rldcl use
- rldcl rldcl extract diff/zero byte
- subf subtract for final result
-
- The last compare can branch around the cleanup code if the
- result is zero because the strings are exactly equal. */
-
- unsigned int align = compute_current_alignment (base_align, offset);
- load_mode = select_block_compare_mode (offset, bytes_to_compare,
- align, word_mode_ok);
- load_mode_size = GET_MODE_SIZE (load_mode);
- if (bytes_to_compare >= load_mode_size)
- cmp_bytes = load_mode_size;
- else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
- {
- /* Move this load back so it doesn't go past the end.
- P8/P9 can do this efficiently. */
- unsigned int extra_bytes = load_mode_size - bytes_to_compare;
- cmp_bytes = bytes_to_compare;
- if (extra_bytes < offset)
- {
- offset -= extra_bytes;
- cmp_bytes = load_mode_size;
- bytes_to_compare = cmp_bytes;
- }
- }
- else
- /* P7 and earlier can't do the overlapping load trick fast,
- so this forces a non-overlapping load and a shift to get
- rid of the extra bytes. */
- cmp_bytes = bytes_to_compare;
-
- rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
- do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
- rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
- do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
-
- /* We must always left-align the data we read, and
- clear any bytes to the right that are beyond the string.
- Otherwise the cmpb sequence won't produce the correct
- results. The beginning of the compare will be done
- with word_mode so will not have any extra shifts or
- clear rights. */
-
- if (load_mode_size < word_mode_size)
- {
- /* Rotate left first. */
- rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
- do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
- do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
- }
-
- if (cmp_bytes < word_mode_size)
- {
- /* Now clear right. This plus the rotate can be
- turned into a rldicr instruction. */
- HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
- do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
- }
-
- /* Cases to handle. A and B are chunks of the two strings.
- 1: Not end of comparison:
- A != B: branch to cleanup code to compute result.
- A == B: check for 0 byte, next block if not found.
- 2: End of the inline comparison:
- A != B: branch to cleanup code to compute result.
- A == B: check for 0 byte, call strcmp/strncmp
- 3: compared requested N bytes:
- A == B: branch to result 0.
- A != B: cleanup code to compute result. */
-
- unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
-
- rtx dst_label;
- if (remain > 0 || equality_compare_rest)
- {
- /* Branch to cleanup code, otherwise fall through to do
- more compares. */
- if (!cleanup_label)
- cleanup_label = gen_label_rtx ();
- dst_label = cleanup_label;
- }
- else
- /* Branch to end and produce result of 0. */
- dst_label = final_move_label;
-
- rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
- rtx cond = gen_reg_rtx (CCmode);
-
- /* Always produce the 0 result, it is needed if
- cmpb finds a 0 byte in this chunk. */
- rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
- rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
-
- rtx cmp_rtx;
- if (remain == 0 && !equality_compare_rest)
- cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
- else
- cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
-
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
- lab_ref, pc_rtx);
- rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
- JUMP_LABEL (j) = dst_label;
- LABEL_NUSES (dst_label) += 1;
-
- if (remain > 0 || equality_compare_rest)
- {
- /* Generate a cmpb to test for a 0 byte and branch
- to final result if found. */
- rtx cmpb_zero = gen_reg_rtx (word_mode);
- rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
- rtx condz = gen_reg_rtx (CCmode);
- rtx zero_reg = gen_reg_rtx (word_mode);
- emit_move_insn (zero_reg, GEN_INT (0));
- do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
-
- if (cmp_bytes < word_mode_size)
- {
- /* Don't want to look at zero bytes past end. */
- HOST_WIDE_INT mb =
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- do_and3 (cmpb_zero, cmpb_zero, mask);
- }
-
- emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
- rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
- lab_ref_fin, pc_rtx);
- rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
- JUMP_LABEL (j2) = final_move_label;
- LABEL_NUSES (final_move_label) += 1;
-
- }
-
- offset += cmp_bytes;
- bytes_to_compare -= cmp_bytes;
- }
-
+ expand_strncmp_gpr_sequence(compare_length, base_align,
+ orig_src1, orig_src2,
+ tmp_reg_src1, tmp_reg_src2,
+ result_reg,
+ equality_compare_rest,
+ cleanup_label, final_move_label);
+
+ offset = compare_length;
+
if (equality_compare_rest)
{
/* Update pointers past what has been compared already. */

View File

@ -1,613 +0,0 @@
commit e4108e7e619dcf7f21224382bc37ba2ef651eb43
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Thu Aug 30 18:17:00 2018 +0000
2018-08-30 Aaron Sawdey <acsawdey@linux.ibm.com>
* config/rs6000/altivec.md (altivec_eq<mode>): Remove star.
(altivec_vcmpequ<VI_char>_p): Remove star.
* config/rs6000/rs6000-string.c (do_load_for_compare): Support
vector load modes.
(expand_strncmp_vec_sequence): New function.
(emit_final_str_compare_vec): New function.
(expand_strn_compare): Add support for vector strncmp.
* config/rs6000/rs6000.opt (-mstring-compare-inline-limit): Change
length specification to bytes.
* config/rs6000/vsx.md (vsx_ld_elemrev_v16qi_internal): Remove star.
(vcmpnezb_p): New pattern.
* doc/invoke.texi (RS/6000 and PowerPC Options): Update documentation
for option -mstring-compare-inline-limit.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@263991 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 13f4654db6a..db4f926bd15 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -608,7 +608,7 @@
"vcmpbfp %0,%1,%2"
[(set_attr "type" "veccmp")])
-(define_insn "*altivec_eq<mode>"
+(define_insn "altivec_eq<mode>"
[(set (match_operand:VI2 0 "altivec_register_operand" "=v")
(eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
(match_operand:VI2 2 "altivec_register_operand" "v")))]
@@ -2438,7 +2438,7 @@
;; Compare vectors producing a vector result and a predicate, setting CR6 to
;; indicate a combined status
-(define_insn "*altivec_vcmpequ<VI_char>_p"
+(define_insn "altivec_vcmpequ<VI_char>_p"
[(set (reg:CC CR6_REGNO)
(unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
(match_operand:VI2 2 "register_operand" "v"))]
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 451e9ed33da..ff0414586d0 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -157,6 +157,33 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
{
switch (GET_MODE (reg))
{
+ case E_V16QImode:
+ switch (mode)
+ {
+ case E_V16QImode:
+ if (!BYTES_BIG_ENDIAN)
+ {
+ if (TARGET_P9_VECTOR)
+ emit_insn (gen_vsx_ld_elemrev_v16qi_internal (reg, mem));
+ else
+ {
+ rtx reg_v2di = simplify_gen_subreg (V2DImode, reg,
+ V16QImode, 0);
+ gcc_assert (MEM_P (mem));
+ rtx addr = XEXP (mem, 0);
+ rtx mem_v2di = gen_rtx_MEM (V2DImode, addr);
+ MEM_COPY_ATTRIBUTES (mem_v2di, mem);
+ set_mem_size (mem, GET_MODE_SIZE (V2DImode));
+ emit_insn (gen_vsx_ld_elemrev_v2di (reg_v2di, mem_v2di));
+ }
+ }
+ else
+ emit_insn (gen_vsx_movv2di_64bit (reg, mem));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ break;
case E_DImode:
switch (mode)
{
@@ -227,6 +254,12 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
gcc_unreachable ();
}
break;
+
+ case E_QImode:
+ gcc_assert (mode == E_QImode);
+ emit_move_insn (reg, mem);
+ break;
+
default:
gcc_unreachable ();
break;
@@ -1705,17 +1738,17 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
RESULT_REG is the rtx for the result register.
EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
to strcmp/strncmp if we have equality at the end of the inline comparison.
- CLEANUP_LABEL is rtx for a label we generate if we need code to clean up
- and generate the final comparison result.
+ P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code
+ to clean up and generate the final comparison result.
FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
set the final result. */
static void
-expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
- unsigned int base_align,
- rtx orig_src1, rtx orig_src2,
- rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
- bool equality_compare_rest, rtx &cleanup_label,
- rtx final_move_label)
+expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
+ unsigned int base_align,
+ rtx orig_src1, rtx orig_src2,
+ rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
+ bool equality_compare_rest, rtx *p_cleanup_label,
+ rtx final_move_label)
{
unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
machine_mode load_mode;
@@ -1724,6 +1757,8 @@ expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
unsigned HOST_WIDE_INT offset = 0;
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
+ gcc_assert (p_cleanup_label != NULL);
+ rtx cleanup_label = *p_cleanup_label;
while (bytes_to_compare > 0)
{
@@ -1876,6 +1911,178 @@ expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
bytes_to_compare -= cmp_bytes;
}
+ *p_cleanup_label = cleanup_label;
+ return;
+}
+
+/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
+ instructions.
+
+ BYTES_TO_COMPARE is the number of bytes to be compared.
+ ORIG_SRC1 is the unmodified rtx for the first string.
+ ORIG_SRC2 is the unmodified rtx for the second string.
+ S1ADDR is the register to use for the base address of the first string.
+ S2ADDR is the register to use for the base address of the second string.
+ OFF_REG is the register to use for the string offset for loads.
+ S1DATA is the register for loading the first string.
+ S2DATA is the register for loading the second string.
+ VEC_RESULT is the rtx for the vector result indicating the byte difference.
+ EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
+ to strcmp/strncmp if we have equality at the end of the inline comparison.
+ P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code to clean up
+ and generate the final comparison result.
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
+ set the final result. */
+static void
+expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
+ rtx orig_src1, rtx orig_src2,
+ rtx s1addr, rtx s2addr, rtx off_reg,
+ rtx s1data, rtx s2data,
+ rtx vec_result, bool equality_compare_rest,
+ rtx *p_cleanup_label, rtx final_move_label)
+{
+ machine_mode load_mode;
+ unsigned int load_mode_size;
+ unsigned HOST_WIDE_INT cmp_bytes = 0;
+ unsigned HOST_WIDE_INT offset = 0;
+
+ gcc_assert (p_cleanup_label != NULL);
+ rtx cleanup_label = *p_cleanup_label;
+
+ emit_move_insn (s1addr, force_reg (Pmode, XEXP (orig_src1, 0)));
+ emit_move_insn (s2addr, force_reg (Pmode, XEXP (orig_src2, 0)));
+
+ unsigned int i;
+ rtx zr[16];
+ for (i = 0; i < 16; i++)
+ zr[i] = GEN_INT (0);
+ rtvec zv = gen_rtvec_v (16, zr);
+ rtx zero_reg = gen_reg_rtx (V16QImode);
+ rs6000_expand_vector_init (zero_reg, gen_rtx_PARALLEL (V16QImode, zv));
+
+ while (bytes_to_compare > 0)
+ {
+ /* VEC/VSX compare sequence for P8:
+ check each 16B with:
+ lxvd2x 32,28,8
+ lxvd2x 33,29,8
+ vcmpequb 2,0,1 # compare strings
+ vcmpequb 4,0,3 # compare w/ 0
+ xxlorc 37,36,34 # first FF byte is either mismatch or end of string
+ vcmpequb. 7,5,3 # reg 7 contains 0
+ bnl 6,.Lmismatch
+
+ For the P8 LE case, we use lxvd2x and compare full 16 bytes
+ but then use use vgbbd and a shift to get two bytes with the
+ information we need in the correct order.
+
+ VEC/VSX compare sequence if TARGET_P9_VECTOR:
+ lxvb16x/lxvb16x # load 16B of each string
+ vcmpnezb. # produces difference location or zero byte location
+ bne 6,.Lmismatch
+
+ Use the overlapping compare trick for the last block if it is
+ less than 16 bytes.
+ */
+
+ load_mode = V16QImode;
+ load_mode_size = GET_MODE_SIZE (load_mode);
+
+ if (bytes_to_compare >= load_mode_size)
+ cmp_bytes = load_mode_size;
+ else
+ {
+ /* Move this load back so it doesn't go past the end. P8/P9
+ can do this efficiently. This is never called with less
+ than 16 bytes so we should always be able to do this. */
+ unsigned int extra_bytes = load_mode_size - bytes_to_compare;
+ cmp_bytes = bytes_to_compare;
+ gcc_assert (offset > extra_bytes);
+ offset -= extra_bytes;
+ cmp_bytes = load_mode_size;
+ bytes_to_compare = cmp_bytes;
+ }
+
+ /* The offset currently used is always kept in off_reg so that the
+ cleanup code on P8 can use it to extract the differing byte. */
+ emit_move_insn (off_reg, GEN_INT (offset));
+
+ rtx addr1 = gen_rtx_PLUS (Pmode, s1addr, off_reg);
+ do_load_for_compare_from_addr (load_mode, s1data, addr1, orig_src1);
+ rtx addr2 = gen_rtx_PLUS (Pmode, s2addr, off_reg);
+ do_load_for_compare_from_addr (load_mode, s2data, addr2, orig_src2);
+
+ /* Cases to handle. A and B are chunks of the two strings.
+ 1: Not end of comparison:
+ A != B: branch to cleanup code to compute result.
+ A == B: next block
+ 2: End of the inline comparison:
+ A != B: branch to cleanup code to compute result.
+ A == B: call strcmp/strncmp
+ 3: compared requested N bytes:
+ A == B: branch to result 0.
+ A != B: cleanup code to compute result. */
+
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
+
+ if (TARGET_P9_VECTOR)
+ emit_insn (gen_vcmpnezb_p (vec_result, s1data, s2data));
+ else
+ {
+ /* Emit instructions to do comparison and zero check. */
+ rtx cmp_res = gen_reg_rtx (load_mode);
+ rtx cmp_zero = gen_reg_rtx (load_mode);
+ rtx cmp_combined = gen_reg_rtx (load_mode);
+ emit_insn (gen_altivec_eqv16qi (cmp_res, s1data, s2data));
+ emit_insn (gen_altivec_eqv16qi (cmp_zero, s1data, zero_reg));
+ emit_insn (gen_orcv16qi3 (vec_result, cmp_zero, cmp_res));
+ emit_insn (gen_altivec_vcmpequb_p (cmp_combined, vec_result, zero_reg));
+ }
+
+ bool branch_to_cleanup = (remain > 0 || equality_compare_rest);
+ rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO);
+ rtx dst_label;
+ rtx cmp_rtx;
+ if (branch_to_cleanup)
+ {
+ /* Branch to cleanup code, otherwise fall through to do more
+ compares. P8 and P9 use different CR bits because on P8
+ we are looking at the result of a comparsion vs a
+ register of zeroes so the all-true condition means no
+ difference or zero was found. On P9, vcmpnezb sets a byte
+ to 0xff if there is a mismatch or zero, so the all-false
+ condition indicates we found no difference or zero. */
+ if (!cleanup_label)
+ cleanup_label = gen_label_rtx ();
+ dst_label = cleanup_label;
+ if (TARGET_P9_VECTOR)
+ cmp_rtx = gen_rtx_NE (VOIDmode, cr6, const0_rtx);
+ else
+ cmp_rtx = gen_rtx_GE (VOIDmode, cr6, const0_rtx);
+ }
+ else
+ {
+ /* Branch to final return or fall through to cleanup,
+ result is already set to 0. */
+ dst_label = final_move_label;
+ if (TARGET_P9_VECTOR)
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cr6, const0_rtx);
+ else
+ cmp_rtx = gen_rtx_LT (VOIDmode, cr6, const0_rtx);
+ }
+
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
+ lab_ref, pc_rtx);
+ rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+ JUMP_LABEL (j2) = dst_label;
+ LABEL_NUSES (dst_label) += 1;
+
+ offset += cmp_bytes;
+ bytes_to_compare -= cmp_bytes;
+ }
+ *p_cleanup_label = cleanup_label;
+ return;
}
/* Generate the final sequence that identifies the differing
@@ -1948,6 +2155,96 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
return;
}
+/* Generate the final sequence that identifies the differing
+ byte and generates the final result, taking into account
+ zero bytes:
+
+ P8:
+ vgbbd 0,0
+ vsldoi 0,0,0,9
+ mfvsrd 9,32
+ addi 10,9,-1 # count trailing zero bits
+ andc 9,10,9
+ popcntd 9,9
+ lbzx 10,28,9 # use that offset to load differing byte
+ lbzx 3,29,9
+ subf 3,3,10 # subtract for final result
+
+ P9:
+ vclzlsbb # counts trailing bytes with lsb=0
+ vextublx # extract differing byte
+
+ STR1 is the reg rtx for data from string 1.
+ STR2 is the reg rtx for data from string 2.
+ RESULT is the reg rtx for the comparison result.
+ S1ADDR is the register to use for the base address of the first string.
+ S2ADDR is the register to use for the base address of the second string.
+ ORIG_SRC1 is the unmodified rtx for the first string.
+ ORIG_SRC2 is the unmodified rtx for the second string.
+ OFF_REG is the register to use for the string offset for loads.
+ VEC_RESULT is the rtx for the vector result indicating the byte difference.
+ */
+
+static void
+emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
+ rtx s1addr, rtx s2addr,
+ rtx orig_src1, rtx orig_src2,
+ rtx off_reg, rtx vec_result)
+{
+ if (TARGET_P9_VECTOR)
+ {
+ rtx diffix = gen_reg_rtx (SImode);
+ rtx chr1 = gen_reg_rtx (SImode);
+ rtx chr2 = gen_reg_rtx (SImode);
+ rtx chr1_di = simplify_gen_subreg (DImode, chr1, SImode, 0);
+ rtx chr2_di = simplify_gen_subreg (DImode, chr2, SImode, 0);
+ emit_insn (gen_vclzlsbb_v16qi (diffix, vec_result));
+ emit_insn (gen_vextublx (chr1, diffix, str1));
+ emit_insn (gen_vextublx (chr2, diffix, str2));
+ do_sub3 (result, chr1_di, chr2_di);
+ }
+ else
+ {
+ rtx diffix = gen_reg_rtx (DImode);
+ rtx result_gbbd = gen_reg_rtx (V16QImode);
+ /* Since each byte of the input is either 00 or FF, the bytes in
+ dw0 and dw1 after vgbbd are all identical to each other. */
+ emit_insn (gen_p8v_vgbbd (result_gbbd, vec_result));
+ /* For LE, we shift by 9 and get BA in the low two bytes then CTZ.
+ For BE, we shift by 7 and get AB in the high two bytes then CLZ. */
+ rtx result_shifted = gen_reg_rtx (V16QImode);
+ int shift_amt = (BYTES_BIG_ENDIAN) ? 7 : 9;
+ emit_insn (gen_altivec_vsldoi_v16qi (result_shifted,result_gbbd,result_gbbd, GEN_INT (shift_amt)));
+
+ rtx diffix_df = simplify_gen_subreg (DFmode, diffix, DImode, 0);
+ emit_insn (gen_p8_mfvsrd_3_v16qi (diffix_df, result_shifted));
+ rtx count = gen_reg_rtx (DImode);
+
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_clzdi2 (count, diffix));
+ else
+ emit_insn (gen_ctzdi2 (count, diffix));
+
+ /* P8 doesn't have a good solution for extracting one byte from
+ a vsx reg like vextublx on P9 so we just compute the offset
+ of the differing byte and load it from each string. */
+ do_add3 (off_reg, off_reg, count);
+
+ rtx chr1 = gen_reg_rtx (QImode);
+ rtx chr2 = gen_reg_rtx (QImode);
+ rtx addr1 = gen_rtx_PLUS (Pmode, s1addr, off_reg);
+ do_load_for_compare_from_addr (QImode, chr1, addr1, orig_src1);
+ rtx addr2 = gen_rtx_PLUS (Pmode, s2addr, off_reg);
+ do_load_for_compare_from_addr (QImode, chr2, addr2, orig_src2);
+ machine_mode rmode = GET_MODE (result);
+ rtx chr1_rm = simplify_gen_subreg (rmode, chr1, QImode, 0);
+ rtx chr2_rm = simplify_gen_subreg (rmode, chr2, QImode, 0);
+ do_sub3 (result, chr1_rm, chr2_rm);
+ }
+
+ return;
+}
+
/* Expand a string compare operation with length, and return
true if successful. Return false if we should let the
compiler generate normal code, probably a strncmp call.
@@ -2002,21 +2299,43 @@ expand_strn_compare (rtx operands[], int no_length)
gcc_assert (GET_MODE (target) == SImode);
- unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
+ unsigned int required_align = 8;
unsigned HOST_WIDE_INT offset = 0;
unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */
unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */
+
if (no_length)
- /* Use this as a standin to determine the mode to use. */
- bytes = rs6000_string_compare_inline_limit * word_mode_size;
+ bytes = rs6000_string_compare_inline_limit;
else
bytes = UINTVAL (bytes_rtx);
- machine_mode load_mode =
- select_block_compare_mode (0, bytes, base_align);
- unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
- compare_length = rs6000_string_compare_inline_limit * load_mode_size;
+ /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
+ least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
+ at least POWER8. That way we can rely on overlapping compares to
+ do the final comparison of less than 16 bytes. Also I do not want
+ to deal with making this work for 32 bits. */
+ int use_vec = (bytes >= 16 && !TARGET_32BIT && TARGET_EFFICIENT_UNALIGNED_VSX);
+
+ if (use_vec)
+ required_align = 16;
+
+ machine_mode load_mode;
+ rtx tmp_reg_src1, tmp_reg_src2;
+ if (use_vec)
+ {
+ load_mode = V16QImode;
+ tmp_reg_src1 = gen_reg_rtx (V16QImode);
+ tmp_reg_src2 = gen_reg_rtx (V16QImode);
+ }
+ else
+ {
+ load_mode = select_block_compare_mode (0, bytes, base_align);
+ tmp_reg_src1 = gen_reg_rtx (word_mode);
+ tmp_reg_src2 = gen_reg_rtx (word_mode);
+ }
+
+ compare_length = rs6000_string_compare_inline_limit;
/* If we have equality at the end of the last compare and we have not
found the end of the string, we need to call strcmp/strncmp to
@@ -2040,10 +2359,7 @@ expand_strn_compare (rtx operands[], int no_length)
rtx final_move_label = gen_label_rtx ();
rtx final_label = gen_label_rtx ();
rtx begin_compare_label = NULL;
- unsigned int required_align = 8;
-
- required_align = 8;
-
+
if (base_align < required_align)
{
/* Generate code that checks distance to 4k boundary for this case. */
@@ -2060,7 +2376,7 @@ expand_strn_compare (rtx operands[], int no_length)
the subsequent code generation are in agreement so we do not
go past the length we tested for a 4k boundary crossing. */
unsigned HOST_WIDE_INT align_test = compare_length;
- if (align_test < 8)
+ if (align_test < required_align)
{
align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test);
base_align = align_test;
@@ -2102,7 +2418,7 @@ expand_strn_compare (rtx operands[], int no_length)
else
{
/* -m32 -mpowerpc64 results in word_mode being DImode even
- though otherwise it is 32-bit. The length arg to strncmp
+ though otherwise it is 32-bit. The length arg to strncmp
is a size_t which will be the same size as pointers. */
rtx len_rtx = gen_reg_rtx (Pmode);
emit_move_insn (len_rtx, gen_int_mode (bytes, Pmode));
@@ -2124,17 +2440,32 @@ expand_strn_compare (rtx operands[], int no_length)
}
rtx cleanup_label = NULL;
- rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
- rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
+ rtx s1addr = NULL, s2addr = NULL, off_reg = NULL, vec_result = NULL;
/* Generate a sequence of GPR or VEC/VSX instructions to compare out
to the length specified. */
- expand_strncmp_gpr_sequence(compare_length, base_align,
- orig_src1, orig_src2,
- tmp_reg_src1, tmp_reg_src2,
- result_reg,
- equality_compare_rest,
- cleanup_label, final_move_label);
+ if (use_vec)
+ {
+ s1addr = gen_reg_rtx (Pmode);
+ s2addr = gen_reg_rtx (Pmode);
+ off_reg = gen_reg_rtx (Pmode);
+ vec_result = gen_reg_rtx (load_mode);
+ emit_move_insn (result_reg, GEN_INT (0));
+ expand_strncmp_vec_sequence (compare_length,
+ orig_src1, orig_src2,
+ s1addr, s2addr, off_reg,
+ tmp_reg_src1, tmp_reg_src2,
+ vec_result,
+ equality_compare_rest,
+ &cleanup_label, final_move_label);
+ }
+ else
+ expand_strncmp_gpr_sequence (compare_length, base_align,
+ orig_src1, orig_src2,
+ tmp_reg_src1, tmp_reg_src2,
+ result_reg,
+ equality_compare_rest,
+ &cleanup_label, final_move_label);
offset = compare_length;
@@ -2174,7 +2505,12 @@ expand_strn_compare (rtx operands[], int no_length)
if (cleanup_label)
emit_label (cleanup_label);
- emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
+ if (use_vec)
+ emit_final_str_compare_vec (tmp_reg_src1, tmp_reg_src2, result_reg,
+ s1addr, s2addr, orig_src1, orig_src2,
+ off_reg, vec_result);
+ else
+ emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
emit_label (final_move_label);
emit_insn (gen_movsi (target,
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index ace8a477550..ad1b8a29ac6 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -342,8 +342,8 @@ Target Report Var(rs6000_block_compare_inline_loop_limit) Init(-1) RejectNegativ
Max number of bytes to compare with loops.
mstring-compare-inline-limit=
-Target Report Var(rs6000_string_compare_inline_limit) Init(8) RejectNegative Joined UInteger Save
-Max number of pairs of load insns for compare.
+Target Report Var(rs6000_string_compare_inline_limit) Init(64) RejectNegative Joined UInteger Save
+Max number of bytes to compare.
misel
Target Report Mask(ISEL) Var(rs6000_isa_flags)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e6921e96a3d..01fb4213001 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1429,7 +1429,7 @@
}
})
-(define_insn "*vsx_ld_elemrev_v16qi_internal"
+(define_insn "vsx_ld_elemrev_v16qi_internal"
[(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
(vec_select:V16QI
(match_operand:V16QI 1 "memory_operand" "Z")
@@ -5107,6 +5107,22 @@
"vcmpnezb %0,%1,%2"
[(set_attr "type" "vecsimple")])
+;; Vector Compare Not Equal or Zero Byte predicate or record-form
+(define_insn "vcmpnezb_p"
+ [(set (reg:CC CR6_REGNO)
+ (unspec:CC
+ [(match_operand:V16QI 1 "altivec_register_operand" "v")
+ (match_operand:V16QI 2 "altivec_register_operand" "v")]
+ UNSPEC_VCMPNEZB))
+ (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
+ (unspec:V16QI
+ [(match_dup 1)
+ (match_dup 2)]
+ UNSPEC_VCMPNEZB))]
+ "TARGET_P9_VECTOR"
+ "vcmpnezb. %0,%1,%2"
+ [(set_attr "type" "vecsimple")])
+
;; Vector Compare Not Equal Half Word (specified/not+eq:)
(define_insn "vcmpneh"
[(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index f2dd12b3d73..291e414fea2 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -24165,12 +24165,10 @@ target-specific.
@item -mstring-compare-inline-limit=@var{num}
@opindex mstring-compare-inline-limit
-Generate at most @var{num} pairs of load instructions to compare the
-string inline. If the difference or end of string is not found at the
+Compare at most @var{num} string bytes with inline code.
+If the difference or end of string is not found at the
end of the inline compare a call to @code{strcmp} or @code{strncmp} will
-take care of the rest of the comparison. The default is 8 pairs of
-loads, which will compare 64 bytes on a 64-bit target and 32 bytes on a
-32-bit target.
+take care of the rest of the comparison. The default is 64 bytes.
@item -G @var{num}
@opindex G

View File

@ -1,40 +0,0 @@
commit 6f1a7440d9aac59fba0f2e2d8d0a9a0b82f480cb
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Tue Oct 2 17:31:53 2018 +0000
2018-10-02 Aaron Sawdey <acsawdey@linux.ibm.com>
PR target/87474
* config/rs6000/rs6000-string.c (expand_strn_compare): Check that both
P8_VECTOR and VSX are enabled.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@264799 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index ff0414586d0..9c25bad97a1 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -2205,6 +2205,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
}
else
{
+ gcc_assert (TARGET_P8_VECTOR);
rtx diffix = gen_reg_rtx (DImode);
rtx result_gbbd = gen_reg_rtx (V16QImode);
/* Since each byte of the input is either 00 or FF, the bytes in
@@ -2313,9 +2314,12 @@ expand_strn_compare (rtx operands[], int no_length)
/* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
at least POWER8. That way we can rely on overlapping compares to
- do the final comparison of less than 16 bytes. Also I do not want
- to deal with making this work for 32 bits. */
- int use_vec = (bytes >= 16 && !TARGET_32BIT && TARGET_EFFICIENT_UNALIGNED_VSX);
+ do the final comparison of less than 16 bytes. Also I do not
+ want to deal with making this work for 32 bits. In addition, we
+ have to make sure that we have at least P8_VECTOR (we don't allow
+ P9_VECTOR without P8_VECTOR). */
+ int use_vec = (bytes >= 16 && !TARGET_32BIT
+ && TARGET_EFFICIENT_UNALIGNED_VSX && TARGET_P8_VECTOR);
if (use_vec)
required_align = 16;

View File

@ -1,510 +0,0 @@
commit 08869d85bd2a7ec5468b3bb3f01a930eb93e6381
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Fri Oct 26 19:33:31 2018 +0000
2018-10-26 Aaron Sawdey <acsawdey@linux.ibm.com>
* config/rs6000/rs6000-string.c (expand_strncmp_gpr_sequence): Change to
a shorter sequence with fewer branches.
(emit_final_str_compare_gpr): Ditto.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@265546 138bc75d-0d04-0410-961f-82ee72b054a4
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 9c25bad97a1..96729d9663c 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -259,7 +259,7 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
gcc_assert (mode == E_QImode);
emit_move_insn (reg, mem);
break;
-
+
default:
gcc_unreachable ();
break;
@@ -726,7 +726,7 @@ expand_compare_loop (rtx operands[])
{
if (GET_MODE_SIZE (GET_MODE (bytes_rtx)) > GET_MODE_SIZE (word_mode))
/* Do not expect length longer than word_mode. */
- return false;
+ return false;
else if (GET_MODE_SIZE (GET_MODE (bytes_rtx)) < GET_MODE_SIZE (word_mode))
{
bytes_rtx = force_reg (GET_MODE (bytes_rtx), bytes_rtx);
@@ -770,7 +770,7 @@ expand_compare_loop (rtx operands[])
rtx j;
/* Example of generated code for 35 bytes aligned 1 byte.
-
+
mtctr 8
li 6,0
li 5,8
@@ -798,7 +798,7 @@ expand_compare_loop (rtx operands[])
popcntd 9,9
subfe 10,10,10
or 9,9,10
-
+
Compiled with -fno-reorder-blocks for clarity. */
/* Structure of what we're going to do:
@@ -1041,7 +1041,7 @@ expand_compare_loop (rtx operands[])
if (!bytes_is_const)
{
/* If we're dealing with runtime length, we have to check if
- it's zero after the loop. When length is known at compile
+ it's zero after the loop. When length is known at compile
time the no-remainder condition is dealt with above. By
doing this after cleanup_label, we also deal with the
case where length is 0 at the start and we bypass the
@@ -1411,7 +1411,7 @@ expand_block_compare (rtx operands[])
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
/* P7/P8 code uses cond for subfc. but P9 uses
- it for cmpld which needs CCUNSmode. */
+ it for cmpld which needs CCUNSmode. */
rtx cond;
if (TARGET_P9_MISC)
cond = gen_reg_rtx (CCUNSmode);
@@ -1655,7 +1655,7 @@ expand_block_compare (rtx operands[])
emit_label (convert_label);
/* We need to produce DI result from sub, then convert to target SI
- while maintaining <0 / ==0 / >0 properties. This sequence works:
+ while maintaining <0 / ==0 / >0 properties. This sequence works:
subfc L,A,B
subfe H,H,H
popcntd L,L
@@ -1740,7 +1740,7 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
to strcmp/strncmp if we have equality at the end of the inline comparison.
P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code
to clean up and generate the final comparison result.
- FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
set the final result. */
static void
expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
@@ -1763,12 +1763,9 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
while (bytes_to_compare > 0)
{
/* GPR compare sequence:
- check each 8B with: ld/ld cmpd bne
- If equal, use rldicr/cmpb to check for zero byte.
+ check each 8B with: ld/ld/cmpb/cmpb/orc./bne
+
cleanup code at end:
- cmpb get byte that differs
- cmpb look for zero byte
- orc combine
cntlzd get bit of first zero/diff byte
subfic convert for rldcl use
rldcl rldcl extract diff/zero byte
@@ -1776,7 +1773,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
The last compare can branch around the cleanup code if the
result is zero because the strings are exactly equal. */
-
+
unsigned int align = compute_current_alignment (base_align, offset);
load_mode = select_block_compare_mode (offset, bytes_to_compare, align);
load_mode_size = GET_MODE_SIZE (load_mode);
@@ -1801,34 +1798,49 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
rid of the extra bytes. */
cmp_bytes = bytes_to_compare;
- rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
+ rtx offset_reg = gen_reg_rtx (Pmode);
+ emit_move_insn (offset_reg, GEN_INT (offset));
+
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, offset_reg);
do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
- rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, offset_reg);
do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
/* We must always left-align the data we read, and
clear any bytes to the right that are beyond the string.
Otherwise the cmpb sequence won't produce the correct
- results. The beginning of the compare will be done
- with word_mode so will not have any extra shifts or
- clear rights. */
+ results. However if there is only one byte left, we
+ can just subtract to get the final result so the shifts
+ and clears are not needed. */
- if (load_mode_size < word_mode_size)
- {
- /* Rotate left first. */
- rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
- do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
- do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
- }
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
- if (cmp_bytes < word_mode_size)
+ /* Loading just a single byte is a special case. If we are
+ loading more than that, we have to check whether we are
+ looking at the entire chunk of data. If not, rotate left and
+ clear right so that bytes we aren't supposed to look at are
+ zeroed, and the first byte we are supposed to compare is
+ leftmost. */
+ if (load_mode_size != 1)
{
- /* Now clear right. This plus the rotate can be
- turned into a rldicr instruction. */
- HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
- do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
+ if (load_mode_size < word_mode_size)
+ {
+ /* Rotate left first. */
+ rtx sh = GEN_INT (BITS_PER_UNIT
+ * (word_mode_size - load_mode_size));
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
+ }
+
+ if (cmp_bytes < word_mode_size)
+ {
+ /* Now clear right. This plus the rotate can be
+ turned into a rldicr instruction. */
+ HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
+ }
}
/* Cases to handle. A and B are chunks of the two strings.
@@ -1842,8 +1854,6 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
A == B: branch to result 0.
A != B: cleanup code to compute result. */
- unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
-
rtx dst_label;
if (remain > 0 || equality_compare_rest)
{
@@ -1857,54 +1867,89 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
/* Branch to end and produce result of 0. */
dst_label = final_move_label;
- rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
- rtx cond = gen_reg_rtx (CCmode);
+ if (load_mode_size == 1)
+ {
+ /* Special case for comparing just single byte. */
+ if (equality_compare_rest)
+ {
+ /* Use subf./bne to branch to final_move_label if the
+ byte differs, otherwise fall through to the strncmp
+ call. We must also check for a zero byte here as we
+ must not make the library call if this is the end of
+ the string. */
+
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
+ rtx cond = gen_reg_rtx (CCmode);
+ rtx diff_rtx = gen_rtx_MINUS (word_mode,
+ tmp_reg_src1, tmp_reg_src2);
+ rs6000_emit_dot_insn (result_reg, diff_rtx, 2, cond);
+ rtx cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
+ lab_ref, pc_rtx);
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+ JUMP_LABEL (j) = final_move_label;
+ LABEL_NUSES (final_move_label) += 1;
- /* Always produce the 0 result, it is needed if
- cmpb finds a 0 byte in this chunk. */
- rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
- rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
+ /* Check for zero byte here before fall through to
+ library call. This catches the case where the
+ strings are equal and end in a zero byte at this
+ position. */
- rtx cmp_rtx;
- if (remain == 0 && !equality_compare_rest)
- cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
- else
- cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+ rtx cond0 = gen_reg_rtx (CCmode);
+ emit_move_insn (cond0, gen_rtx_COMPARE (CCmode, tmp_reg_src1,
+ const0_rtx));
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
- lab_ref, pc_rtx);
- rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
- JUMP_LABEL (j) = dst_label;
- LABEL_NUSES (dst_label) += 1;
+ rtx cmp0eq_rtx = gen_rtx_EQ (VOIDmode, cond0, const0_rtx);
- if (remain > 0 || equality_compare_rest)
+ rtx ifelse0 = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp0eq_rtx,
+ lab_ref, pc_rtx);
+ rtx j0 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse0));
+ JUMP_LABEL (j0) = final_move_label;
+ LABEL_NUSES (final_move_label) += 1;
+ }
+ else
+ {
+ /* This is the last byte to be compared so we can use
+ subf to compute the final result and branch
+ unconditionally to final_move_label. */
+
+ do_sub3 (result_reg, tmp_reg_src1, tmp_reg_src2);
+
+ rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
+ JUMP_LABEL (j) = final_move_label;
+ LABEL_NUSES (final_move_label) += 1;
+ emit_barrier ();
+ }
+ }
+ else
{
- /* Generate a cmpb to test for a 0 byte and branch
- to final result if found. */
rtx cmpb_zero = gen_reg_rtx (word_mode);
- rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
- rtx condz = gen_reg_rtx (CCmode);
+ rtx cmpb_diff = gen_reg_rtx (word_mode);
rtx zero_reg = gen_reg_rtx (word_mode);
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
+ rtx cond = gen_reg_rtx (CCmode);
+
emit_move_insn (zero_reg, GEN_INT (0));
+ do_cmpb3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2);
do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
+ rtx not_diff = gen_rtx_NOT (word_mode, cmpb_diff);
+ rtx orc_rtx = gen_rtx_IOR (word_mode, not_diff, cmpb_zero);
- if (cmp_bytes < word_mode_size)
- {
- /* Don't want to look at zero bytes past end. */
- HOST_WIDE_INT mb =
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
- do_and3 (cmpb_zero, cmpb_zero, mask);
- }
+ rs6000_emit_dot_insn (result_reg, orc_rtx, 2, cond);
- emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
- rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
- lab_ref_fin, pc_rtx);
- rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
- JUMP_LABEL (j2) = final_move_label;
- LABEL_NUSES (final_move_label) += 1;
+ rtx cmp_rtx;
+ if (remain == 0 && !equality_compare_rest)
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
+ else
+ cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
+ lab_ref, pc_rtx);
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+ JUMP_LABEL (j) = dst_label;
+ LABEL_NUSES (dst_label) += 1;
}
offset += cmp_bytes;
@@ -1915,7 +1960,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
return;
}
-/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
+/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
instructions.
BYTES_TO_COMPARE is the number of bytes to be compared.
@@ -1931,7 +1976,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
to strcmp/strncmp if we have equality at the end of the inline comparison.
P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code to clean up
and generate the final comparison result.
- FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
set the final result. */
static void
expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
@@ -1982,12 +2027,12 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
bne 6,.Lmismatch
Use the overlapping compare trick for the last block if it is
- less than 16 bytes.
+ less than 16 bytes.
*/
load_mode = V16QImode;
load_mode_size = GET_MODE_SIZE (load_mode);
-
+
if (bytes_to_compare >= load_mode_size)
cmp_bytes = load_mode_size;
else
@@ -2046,10 +2091,10 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
if (branch_to_cleanup)
{
/* Branch to cleanup code, otherwise fall through to do more
- compares. P8 and P9 use different CR bits because on P8
+ compares. P8 and P9 use different CR bits because on P8
we are looking at the result of a comparsion vs a
register of zeroes so the all-true condition means no
- difference or zero was found. On P9, vcmpnezb sets a byte
+ difference or zero was found. On P9, vcmpnezb sets a byte
to 0xff if there is a mismatch or zero, so the all-false
condition indicates we found no difference or zero. */
if (!cleanup_label)
@@ -2062,7 +2107,7 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
}
else
{
- /* Branch to final return or fall through to cleanup,
+ /* Branch to final return or fall through to cleanup,
result is already set to 0. */
dst_label = final_move_label;
if (TARGET_P9_VECTOR)
@@ -2088,10 +2133,7 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
/* Generate the final sequence that identifies the differing
byte and generates the final result, taking into account
zero bytes:
-
- cmpb cmpb_result1, src1, src2
- cmpb cmpb_result2, src1, zero
- orc cmpb_result1, cmp_result1, cmpb_result2
+
cntlzd get bit of first zero/diff byte
addi convert for rldcl use
rldcl rldcl extract diff/zero byte
@@ -2105,10 +2147,7 @@ static void
emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
{
machine_mode m = GET_MODE (str1);
- rtx cmpb_diff = gen_reg_rtx (m);
- rtx cmpb_zero = gen_reg_rtx (m);
rtx rot_amt = gen_reg_rtx (m);
- rtx zero_reg = gen_reg_rtx (m);
rtx rot1_1 = gen_reg_rtx (m);
rtx rot1_2 = gen_reg_rtx (m);
@@ -2117,12 +2156,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
if (m == SImode)
{
- emit_insn (gen_cmpbsi3 (cmpb_diff, str1, str2));
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbsi3 (cmpb_zero, str1, zero_reg));
- emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
- emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
- emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
+ emit_insn (gen_clzsi2 (rot_amt, result));
emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
emit_insn (gen_rotlsi3 (rot1_1, str1,
gen_lowpart (SImode, rot_amt)));
@@ -2134,12 +2168,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
}
else if (m == DImode)
{
- emit_insn (gen_cmpbdi3 (cmpb_diff, str1, str2));
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
- emit_insn (gen_cmpbdi3 (cmpb_zero, str1, zero_reg));
- emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
- emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
- emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
+ emit_insn (gen_clzdi2 (rot_amt, result));
emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
emit_insn (gen_rotldi3 (rot1_1, str1,
gen_lowpart (SImode, rot_amt)));
@@ -2151,7 +2180,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
}
else
gcc_unreachable ();
-
+
return;
}
@@ -2169,10 +2198,10 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
lbzx 10,28,9 # use that offset to load differing byte
lbzx 3,29,9
subf 3,3,10 # subtract for final result
-
+
P9:
vclzlsbb # counts trailing bytes with lsb=0
- vextublx # extract differing byte
+ vextublx # extract differing byte
STR1 is the reg rtx for data from string 1.
STR2 is the reg rtx for data from string 2.
@@ -2208,7 +2237,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
gcc_assert (TARGET_P8_VECTOR);
rtx diffix = gen_reg_rtx (DImode);
rtx result_gbbd = gen_reg_rtx (V16QImode);
- /* Since each byte of the input is either 00 or FF, the bytes in
+ /* Since each byte of the input is either 00 or FF, the bytes in
dw0 and dw1 after vgbbd are all identical to each other. */
emit_insn (gen_p8v_vgbbd (result_gbbd, vec_result));
/* For LE, we shift by 9 and get BA in the low two bytes then CTZ.
@@ -2226,7 +2255,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
else
emit_insn (gen_ctzdi2 (count, diffix));
- /* P8 doesn't have a good solution for extracting one byte from
+ /* P8 doesn't have a good solution for extracting one byte from
a vsx reg like vextublx on P9 so we just compute the offset
of the differing byte and load it from each string. */
do_add3 (off_reg, off_reg, count);
@@ -2247,7 +2276,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
}
/* Expand a string compare operation with length, and return
- true if successful. Return false if we should let the
+ true if successful. Return false if we should let the
compiler generate normal code, probably a strncmp call.
OPERANDS[0] is the target (result).
@@ -2279,9 +2308,9 @@ expand_strn_compare (rtx operands[], int no_length)
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
- /* If we have a length, it must be constant. This simplifies things
+ /* If we have a length, it must be constant. This simplifies things
a bit as we don't have to generate code to check if we've exceeded
- the length. Later this could be expanded to handle this case. */
+ the length. Later this could be expanded to handle this case. */
if (!no_length && !CONST_INT_P (bytes_rtx))
return false;
@@ -2311,7 +2340,7 @@ expand_strn_compare (rtx operands[], int no_length)
else
bytes = UINTVAL (bytes_rtx);
- /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
+ /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
at least POWER8. That way we can rely on overlapping compares to
do the final comparison of less than 16 bytes. Also I do not
@@ -2363,7 +2392,7 @@ expand_strn_compare (rtx operands[], int no_length)
rtx final_move_label = gen_label_rtx ();
rtx final_label = gen_label_rtx ();
rtx begin_compare_label = NULL;
-
+
if (base_align < required_align)
{
/* Generate code that checks distance to 4k boundary for this case. */
@@ -2472,7 +2501,7 @@ expand_strn_compare (rtx operands[], int no_length)
&cleanup_label, final_move_label);
offset = compare_length;
-
+
if (equality_compare_rest)
{
/* Update pointers past what has been compared already. */

View File

@ -0,0 +1,406 @@
commit 126dab7c9d84294f256b1f7bf91c24a9e7103249
Author: qinzhao <qinzhao@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Thu Nov 29 16:06:03 2018 +0000
Add a new option -flive-patching={inline-only-static|inline-clone}
to support live patching in GCC.
2018-11-29 qing zhao <qing.zhao@oracle.com>
gcc/ChangeLog:
* cif-code.def (EXTERN_LIVE_ONLY_STATIC): New CIF code.
* common.opt: Add -flive-patching flag.
* doc/invoke.texi: Document -flive-patching.
* flag-types.h (enum live_patching_level): New enum.
* ipa-inline.c (can_inline_edge_p): Disable external functions from
inlining when flag_live_patching is LIVE_PATCHING_INLINE_ONLY_STATIC.
* opts.c (control_options_for_live_patching): New function.
(finish_options): Make flag_live_patching incompatible with flag_lto.
Control IPA optimizations based on different levels of
flag_live_patching.
gcc/testsuite/ChangeLog:
* gcc.dg/live-patching-1.c: New test.
* gcc.dg/live-patching-2.c: New test.
* gcc.dg/live-patching-3.c: New test.
* gcc.dg/tree-ssa/writeonly-3.c: New test.
* gcc.target/i386/ipa-stack-alignment-2.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@266627 138bc75d-0d04-0410-961f-82ee72b054a4
--- gcc/cif-code.def
+++ gcc/cif-code.def
@@ -132,6 +132,12 @@ DEFCIFCODE(USES_COMDAT_LOCAL, CIF_FINAL_ERROR,
DEFCIFCODE(ATTRIBUTE_MISMATCH, CIF_FINAL_ERROR,
N_("function attribute mismatch"))
+/* We can't inline because the user requests only static functions
+ but the function has external linkage for live patching purpose. */
+DEFCIFCODE(EXTERN_LIVE_ONLY_STATIC, CIF_FINAL_ERROR,
+ N_("function has external linkage when the user requests only"
+ " inlining static for live patching"))
+
/* We proved that the call is unreachable. */
DEFCIFCODE(UNREACHABLE, CIF_FINAL_ERROR,
N_("unreachable"))
--- gcc/common.opt
+++ gcc/common.opt
@@ -2181,6 +2181,24 @@ starts and when the destructor finishes.
flifetime-dse=
Common Joined RejectNegative UInteger Var(flag_lifetime_dse) Optimization IntegerRange(0, 2)
+flive-patching
+Common RejectNegative Alias(flive-patching=,inline-clone) Optimization
+
+flive-patching=
+Common Report Joined RejectNegative Enum(live_patching_level) Var(flag_live_patching) Init(LIVE_PATCHING_NONE) Optimization
+-flive-patching=[inline-only-static|inline-clone] Control IPA
+optimizations to provide a safe compilation for live-patching. At the same
+time, provides multiple-level control on the enabled IPA optimizations.
+
+Enum
+Name(live_patching_level) Type(enum live_patching_level) UnknownError(unknown Live-Patching Level %qs)
+
+EnumValue
+Enum(live_patching_level) String(inline-only-static) Value(LIVE_PATCHING_INLINE_ONLY_STATIC)
+
+EnumValue
+Enum(live_patching_level) String(inline-clone) Value(LIVE_PATCHING_INLINE_CLONE)
+
flive-range-shrinkage
Common Report Var(flag_live_range_shrinkage) Init(0) Optimization
Relief of register pressure through live range shrinkage.
--- gcc/doc/invoke.texi
+++ gcc/doc/invoke.texi
@@ -389,6 +389,7 @@ Objective-C and Objective-C++ Dialects}.
-fipa-bit-cp -fipa-vrp @gol
-fipa-pta -fipa-profile -fipa-pure-const -fipa-reference -fipa-icf @gol
-fira-algorithm=@var{algorithm} @gol
+-flive-patching=@var{level} @gol
-fira-region=@var{region} -fira-hoist-pressure @gol
-fira-loop-pressure -fno-ira-share-save-slots @gol
-fno-ira-share-spill-slots @gol
@@ -9291,6 +9292,65 @@ equivalences that are found only by GCC and equivalences found only by Gold.
This flag is enabled by default at @option{-O2} and @option{-Os}.
+@item -flive-patching=@var{level}
+@opindex flive-patching
+Control GCC's optimizations to provide a safe compilation for live-patching.
+
+If the compiler's optimization uses a function's body or information extracted
+from its body to optimize/change another function, the latter is called an
+impacted function of the former. If a function is patched, its impacted
+functions should be patched too.
+
+The impacted functions are decided by the compiler's interprocedural
+optimizations. For example, inlining a function into its caller, cloning
+a function and changing its caller to call this new clone, or extracting
+a function's pureness/constness information to optimize its direct or
+indirect callers, etc.
+
+Usually, the more IPA optimizations enabled, the larger the number of
+impacted functions for each function. In order to control the number of
+impacted functions and computed the list of impacted function easily,
+we provide control to partially enable IPA optimizations on two different
+levels.
+
+The @var{level} argument should be one of the following:
+
+@table @samp
+
+@item inline-clone
+
+Only enable inlining and cloning optimizations, which includes inlining,
+cloning, interprocedural scalar replacement of aggregates and partial inlining.
+As a result, when patching a function, all its callers and its clones'
+callers need to be patched as well.
+
+@option{-flive-patching=inline-clone} disables the following optimization flags:
+@gccoptlist{-fwhole-program -fipa-pta -fipa-reference -fipa-ra @gol
+-fipa-icf -fipa-icf-functions -fipa-icf-variables @gol
+-fipa-bit-cp -fipa-vrp -fipa-pure-const -fipa-reference-addressable @gol
+-fipa-stack-alignment}
+
+@item inline-only-static
+
+Only enable inlining of static functions.
+As a result, when patching a static function, all its callers need to be
+patches as well.
+
+In addition to all the flags that -flive-patching=inline-clone disables,
+@option{-flive-patching=inline-only-static} disables the following additional
+optimization flags:
+@gccoptlist{-fipa-cp-clone -fipa-sra -fpartial-inlining -fipa-cp}
+
+@end table
+
+When -flive-patching specified without any value, the default value
+is "inline-clone".
+
+This flag is disabled by default.
+
+Note that -flive-patching is not supported with link-time optimizer.
+(@option{-flto}).
+
@item -fisolate-erroneous-paths-dereference
@opindex fisolate-erroneous-paths-dereference
Detect paths that trigger erroneous or undefined behavior due to
--- gcc/flag-types.h
+++ gcc/flag-types.h
@@ -123,6 +123,14 @@ enum stack_reuse_level
SR_ALL
};
+/* The live patching level. */
+enum live_patching_level
+{
+ LIVE_PATCHING_NONE = 0,
+ LIVE_PATCHING_INLINE_ONLY_STATIC,
+ LIVE_PATCHING_INLINE_CLONE
+};
+
/* The algorithm used for basic block reordering. */
enum reorder_blocks_algorithm
{
--- gcc/ipa-inline.c
+++ gcc/ipa-inline.c
@@ -379,6 +379,12 @@ can_inline_edge_p (struct cgraph_edge *e, bool report,
e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
inlinable = false;
}
+ else if (callee->externally_visible
+ && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
+ {
+ e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
+ inlinable = false;
+ }
if (!inlinable && report)
report_inline_failed_reason (e);
return inlinable;
--- gcc/opts.c
+++ gcc/opts.c
@@ -699,6 +699,152 @@ default_options_optimization (struct gcc
lang_mask, handlers, loc, dc);
}
+/* Control IPA optimizations based on different live patching LEVEL. */
+static void
+control_options_for_live_patching (struct gcc_options *opts,
+ struct gcc_options *opts_set,
+ enum live_patching_level level,
+ location_t loc)
+{
+ gcc_assert (level > LIVE_PATCHING_NONE);
+
+ switch (level)
+ {
+ case LIVE_PATCHING_INLINE_ONLY_STATIC:
+ if (opts_set->x_flag_ipa_cp_clone && opts->x_flag_ipa_cp_clone)
+ error_at (loc,
+ "%<-fipa-cp-clone%> is incompatible with "
+ "%<-flive-patching=inline-only-static%>");
+ else
+ opts->x_flag_ipa_cp_clone = 0;
+
+ if (opts_set->x_flag_ipa_sra && opts->x_flag_ipa_sra)
+ error_at (loc,
+ "%<-fipa-sra%> is incompatible with "
+ "%<-flive-patching=inline-only-static%>");
+ else
+ opts->x_flag_ipa_sra = 0;
+
+ if (opts_set->x_flag_partial_inlining && opts->x_flag_partial_inlining)
+ error_at (loc,
+ "%<-fpartial-inlining%> is incompatible with "
+ "%<-flive-patching=inline-only-static%>");
+ else
+ opts->x_flag_partial_inlining = 0;
+
+ if (opts_set->x_flag_ipa_cp && opts->x_flag_ipa_cp)
+ error_at (loc,
+ "%<-fipa-cp%> is incompatible with "
+ "%<-flive-patching=inline-only-static%>");
+ else
+ opts->x_flag_ipa_cp = 0;
+
+ /* FALLTHROUGH. */
+ case LIVE_PATCHING_INLINE_CLONE:
+ /* live patching should disable whole-program optimization. */
+ if (opts_set->x_flag_whole_program && opts->x_flag_whole_program)
+ error_at (loc,
+ "%<-fwhole-program%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_whole_program = 0;
+
+ /* visibility change should be excluded by !flag_whole_program
+ && !in_lto_p && !flag_ipa_cp_clone && !flag_ipa_sra
+ && !flag_partial_inlining. */
+
+ if (opts_set->x_flag_ipa_pta && opts->x_flag_ipa_pta)
+ error_at (loc,
+ "%<-fipa-pta%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_ipa_pta = 0;
+
+ if (opts_set->x_flag_ipa_reference && opts->x_flag_ipa_reference)
+ error_at (loc,
+ "%<-fipa-reference%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_ipa_reference = 0;
+
+ if (opts_set->x_flag_ipa_ra && opts->x_flag_ipa_ra)
+ error_at (loc,
+ "%<-fipa-ra%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_ipa_ra = 0;
+
+ if (opts_set->x_flag_ipa_icf && opts->x_flag_ipa_icf)
+ error_at (loc,
+ "%<-fipa-icf%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_ipa_icf = 0;
+
+ if (opts_set->x_flag_ipa_icf_functions && opts->x_flag_ipa_icf_functions)
+ error_at (loc,
+ "%<-fipa-icf-functions%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_ipa_icf_functions = 0;
+
+ if (opts_set->x_flag_ipa_icf_variables && opts->x_flag_ipa_icf_variables)
+ error_at (loc,
+ "%<-fipa-icf-variables%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_ipa_icf_variables = 0;
+
+ if (opts_set->x_flag_ipa_bit_cp && opts->x_flag_ipa_bit_cp)
+ error_at (loc,
+ "%<-fipa-bit-cp%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_ipa_bit_cp = 0;
+
+ if (opts_set->x_flag_ipa_vrp && opts->x_flag_ipa_vrp)
+ error_at (loc,
+ "%<-fipa-vrp%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_ipa_vrp = 0;
+
+ if (opts_set->x_flag_ipa_pure_const && opts->x_flag_ipa_pure_const)
+ error_at (loc,
+ "%<-fipa-pure-const%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_ipa_pure_const = 0;
+
+ /* FIXME: disable unreachable code removal. */
+
+ /* discovery of functions/variables with no address taken. */
+// GCC 8 doesn't have these options.
+#if 0
+ if (opts_set->x_flag_ipa_reference_addressable
+ && opts->x_flag_ipa_reference_addressable)
+ error_at (loc,
+ "%<-fipa-reference-addressable%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_ipa_reference_addressable = 0;
+
+ /* ipa stack alignment propagation. */
+ if (opts_set->x_flag_ipa_stack_alignment
+ && opts->x_flag_ipa_stack_alignment)
+ error_at (loc,
+ "%<-fipa-stack-alignment%> is incompatible with "
+ "%<-flive-patching=inline-only-static|inline-clone%>");
+ else
+ opts->x_flag_ipa_stack_alignment = 0;
+#endif
+
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* After all options at LOC have been read into OPTS and OPTS_SET,
finalize settings of those options and diagnose incompatible
combinations. */
@@ -1057,6 +1203,18 @@ finish_options (struct gcc_options *opts
sorry ("transactional memory is not supported with "
"%<-fsanitize=kernel-address%>");
+ /* Currently live patching is not support for LTO. */
+ if (opts->x_flag_live_patching && opts->x_flag_lto)
+ sorry ("live patching is not supported with LTO");
+
+ /* Control IPA optimizations based on different -flive-patching level. */
+ if (opts->x_flag_live_patching)
+ {
+ control_options_for_live_patching (opts, opts_set,
+ opts->x_flag_live_patching,
+ loc);
+ }
+
/* Comes from final.c -- no real reason to change it. */
#define MAX_CODE_ALIGN 16
#define MAX_CODE_ALIGN_VALUE (1 << MAX_CODE_ALIGN)
--- /dev/null
+++ gcc/testsuite/gcc.dg/live-patching-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -flive-patching=inline-only-static -fdump-ipa-inline" } */
+
+extern int sum, n, m;
+
+int foo (int a)
+{
+ return a + n;
+}
+
+static int bar (int b)
+{
+ return b * m;
+}
+
+int main()
+{
+ sum = foo (m) + bar (n);
+ return 0;
+}
+
+/* { dg-final { scan-ipa-dump "foo/0 function has external linkage when the user requests only inlining static for live patching" "inline" } } */
--- /dev/null
+++ gcc/testsuite/gcc.dg/live-patching-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -flive-patching -flto" } */
+
+int main()
+{
+ return 0;
+}
+
+/* { dg-message "sorry, unimplemented: live patching is not supported with LTO" "-flive-patching and -flto together" { target *-*-* } 0 } */
--- /dev/null
+++ gcc/testsuite/gcc.dg/live-patching-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -flive-patching -fwhole-program" } */
+
+int main()
+{
+ return 0;
+}
+
+/* { dg-message "'-fwhole-program' is incompatible with '-flive-patching=inline-only-static|inline-clone" "" {target "*-*-*"} 0 } */

View File

@ -0,0 +1,73 @@
commit 9939b2f79bd9b75b99080a17f3d6f1214d543477
Author: qinzhao <qinzhao@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Wed Apr 3 19:00:25 2019 +0000
2019-04-03 qing zhao <qing.zhao@oracle.com>
PR tree-optimization/89730
* ipa-inline.c (can_inline_edge_p): Delete the checking for
-flive-patching=inline-only-static.
(can_inline_edge_by_limits_p): Add the checking for
-flive-patching=inline-only-static and grant always_inline
even when -flive-patching=inline-only-static is specified.
* gcc.dg/live-patching-4.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@270134 138bc75d-0d04-0410-961f-82ee72b054a4
--- gcc/ipa-inline.c
+++ gcc/ipa-inline.c
@@ -385,12 +385,6 @@ can_inline_edge_p (struct cgraph_edge *e, bool report,
e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
inlinable = false;
}
- else if (callee->externally_visible
- && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
- {
- e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
- inlinable = false;
- }
if (!inlinable && report)
report_inline_failed_reason (e);
return inlinable;
@@ -433,6 +427,13 @@ can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
DECL_ATTRIBUTES (caller->decl))
&& !caller_growth_limits (e))
inlinable = false;
+ else if (callee->externally_visible
+ && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
+ && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
+ {
+ e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
+ inlinable = false;
+ }
/* Don't inline a function with a higher optimization level than the
caller. FIXME: this is really just tip of iceberg of handling
optimization attribute. */
--- /dev/null
+++ gcc/testsuite/gcc.dg/live-patching-4.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -flive-patching=inline-only-static -fdump-tree-einline-optimized" } */
+
+extern int sum, n, m;
+
+extern inline __attribute__((always_inline)) int foo (int a);
+inline __attribute__((always_inline)) int foo (int a)
+{
+ return a + n;
+}
+
+static int bar (int b)
+{
+ return b * m;
+}
+
+int main()
+{
+ sum = foo (m) + bar (n);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Inlining foo into main" "einline" } } */

View File

@ -0,0 +1,85 @@
commit 77e6311332590004c5aec82ceeb45e4d4d93f690
Author: redi <redi@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Thu Apr 11 08:52:22 2019 +0000
Clarify documentation for -flive-patching
* doc/invoke.texi (Optimize Options): Clarify -flive-patching docs.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@270276 138bc75d-0d04-0410-961f-82ee72b054a4
--- gcc/doc/invoke.texi
+++ gcc/doc/invoke.texi
@@ -9367,24 +9367,24 @@ This flag is enabled by default at @option{-O2} and @option{-Os}.
@item -flive-patching=@var{level}
@opindex flive-patching
-Control GCC's optimizations to provide a safe compilation for live-patching.
+Control GCC's optimizations to produce output suitable for live-patching.
If the compiler's optimization uses a function's body or information extracted
from its body to optimize/change another function, the latter is called an
impacted function of the former. If a function is patched, its impacted
functions should be patched too.
-The impacted functions are decided by the compiler's interprocedural
-optimizations. For example, inlining a function into its caller, cloning
-a function and changing its caller to call this new clone, or extracting
-a function's pureness/constness information to optimize its direct or
-indirect callers, etc.
+The impacted functions are determined by the compiler's interprocedural
+optimizations. For example, a caller is impacted when inlining a function
+into its caller,
+cloning a function and changing its caller to call this new clone,
+or extracting a function's pureness/constness information to optimize
+its direct or indirect callers, etc.
Usually, the more IPA optimizations enabled, the larger the number of
impacted functions for each function. In order to control the number of
-impacted functions and computed the list of impacted function easily,
-we provide control to partially enable IPA optimizations on two different
-levels.
+impacted functions and more easily compute the list of impacted function,
+IPA optimizations can be partially enabled at two different levels.
The @var{level} argument should be one of the following:
@@ -9395,7 +9395,7 @@ The @var{level} argument should be one of the following:
Only enable inlining and cloning optimizations, which includes inlining,
cloning, interprocedural scalar replacement of aggregates and partial inlining.
As a result, when patching a function, all its callers and its clones'
-callers need to be patched as well.
+callers are impacted, therefore need to be patched as well.
@option{-flive-patching=inline-clone} disables the following optimization flags:
@gccoptlist{-fwhole-program -fipa-pta -fipa-reference -fipa-ra @gol
@@ -9406,22 +9406,23 @@ callers need to be patched as well.
@item inline-only-static
Only enable inlining of static functions.
-As a result, when patching a static function, all its callers need to be
-patches as well.
+As a result, when patching a static function, all its callers are impacted
+and so need to be patched as well.
-In addition to all the flags that -flive-patching=inline-clone disables,
+In addition to all the flags that @option{-flive-patching=inline-clone}
+disables,
@option{-flive-patching=inline-only-static} disables the following additional
optimization flags:
@gccoptlist{-fipa-cp-clone -fipa-sra -fpartial-inlining -fipa-cp}
@end table
-When -flive-patching specified without any value, the default value
-is "inline-clone".
+When @option{-flive-patching} is specified without any value, the default value
+is @var{inline-clone}.
This flag is disabled by default.
-Note that -flive-patching is not supported with link-time optimizer.
+Note that @option{-flive-patching} is not supported with link-time optimization
(@option{-flto}).
@item -fisolate-erroneous-paths-dereference

View File

@ -0,0 +1,93 @@
2018-11-08 Roman Geissler <roman.geissler@amadeus.com>
* collect2.c (linker_select): Add USE_LLD_LD.
(ld_suffixes): Add ld.lld.
(main): Handle -fuse-ld=lld.
* common.opt (-fuse-ld=lld): New option.
* doc/invoke.texi (-fuse-ld=lld): Document.
* opts.c (common_handle_option): Handle OPT_fuse_ld_lld.
--- gcc/collect2.c
+++ gcc/collect2.c
@@ -831,6 +831,7 @@ main (int argc, char **argv)
USE_PLUGIN_LD,
USE_GOLD_LD,
USE_BFD_LD,
+ USE_LLD_LD,
USE_LD_MAX
} selected_linker = USE_DEFAULT_LD;
static const char *const ld_suffixes[USE_LD_MAX] =
@@ -838,7 +839,8 @@ main (int argc, char **argv)
"ld",
PLUGIN_LD_SUFFIX,
"ld.gold",
- "ld.bfd"
+ "ld.bfd",
+ "ld.lld"
};
static const char *const real_ld_suffix = "real-ld";
static const char *const collect_ld_suffix = "collect-ld";
@@ -1007,6 +1009,8 @@ main (int argc, char **argv)
selected_linker = USE_BFD_LD;
else if (strcmp (argv[i], "-fuse-ld=gold") == 0)
selected_linker = USE_GOLD_LD;
+ else if (strcmp (argv[i], "-fuse-ld=lld") == 0)
+ selected_linker = USE_LLD_LD;
#ifdef COLLECT_EXPORT_LIST
/* These flags are position independent, although their order
@@ -1096,7 +1100,8 @@ main (int argc, char **argv)
/* Maybe we know the right file to use (if not cross). */
ld_file_name = 0;
#ifdef DEFAULT_LINKER
- if (selected_linker == USE_BFD_LD || selected_linker == USE_GOLD_LD)
+ if (selected_linker == USE_BFD_LD || selected_linker == USE_GOLD_LD ||
+ selected_linker == USE_LLD_LD)
{
char *linker_name;
# ifdef HOST_EXECUTABLE_SUFFIX
@@ -1315,7 +1320,7 @@ main (int argc, char **argv)
else if (!use_collect_ld
&& strncmp (arg, "-fuse-ld=", 9) == 0)
{
- /* Do not pass -fuse-ld={bfd|gold} to the linker. */
+ /* Do not pass -fuse-ld={bfd|gold|lld} to the linker. */
ld1--;
ld2--;
}
--- gcc/common.opt
+++ gcc/common.opt
@@ -2732,6 +2732,10 @@ fuse-ld=gold
Common Driver Negative(fuse-ld=bfd)
Use the gold linker instead of the default linker.
+fuse-ld=lld
+Common Driver Negative(fuse-ld=lld)
+Use the lld LLVM linker instead of the default linker.
+
fuse-linker-plugin
Common Undocumented Var(flag_use_linker_plugin)
--- gcc/doc/invoke.texi
+++ gcc/doc/invoke.texi
@@ -12610,6 +12610,10 @@ Use the @command{bfd} linker instead of the default linker.
@opindex fuse-ld=gold
Use the @command{gold} linker instead of the default linker.
+@item -fuse-ld=lld
+@opindex fuse-ld=lld
+Use the LLVM @command{lld} linker instead of the default linker.
+
@cindex Libraries
@item -l@var{library}
@itemx -l @var{library}
--- gcc/opts.c
+++ gcc/opts.c
@@ -2557,6 +2557,7 @@ common_handle_option (struct gcc_options *opts,
case OPT_fuse_ld_bfd:
case OPT_fuse_ld_gold:
+ case OPT_fuse_ld_lld:
case OPT_fuse_linker_plugin:
/* No-op. Used by the driver and passed to us because it starts with f.*/
break;

View File

@ -0,0 +1,279 @@
2019-05-29 Jakub Jelinek <jakub@redhat.com>
PR fortran/90329
* lto-streamer.h (LTO_minor_version): Bump to 2.
Backported from mainline
2019-05-16 Jakub Jelinek <jakub@redhat.com>
PR fortran/90329
* tree-core.h (struct tree_decl_common): Document
decl_nonshareable_flag for PARM_DECLs.
* tree.h (DECL_HIDDEN_STRING_LENGTH): Define.
* calls.c (expand_call): Don't try tail call if caller
has any DECL_HIDDEN_STRING_LENGTH PARM_DECLs that are or might be
passed on the stack and callee needs to pass any arguments on the
stack.
* tree-streamer-in.c (unpack_ts_decl_common_value_fields): Use
else if instead of series of mutually exclusive ifs. Handle
DECL_HIDDEN_STRING_LENGTH for PARM_DECLs.
* tree-streamer-out.c (pack_ts_decl_common_value_fields): Likewise.
* lang.opt (fbroken-callers): Remove.
(ftail-call-workaround, ftail-call-workaround=): New options.
* gfortran.h (struct gfc_namespace): Add implicit_interface_calls.
* interface.c (gfc_procedure_use): Set implicit_interface_calls
for calls to implicit interface procedures.
* trans-decl.c (create_function_arglist): Use flag_tail_call_workaround
instead of flag_broken_callers. If it is not 2, also require
sym->ns->implicit_interface_calls.
* invoke.texi (fbroken-callers): Remove documentation.
(ftail-call-workaround, ftail-call-workaround=): Document.
2019-05-19 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/90329
* invoke.texi: Document -fbroken-callers.
* lang.opt: Add -fbroken-callers.
* trans-decl.c (create_function_arglist): Only set
DECL_HIDDEN_STRING_LENGTH if flag_broken_callers is set.
2019-05-16 Jakub Jelinek <jakub@redhat.com>
PR fortran/90329
* trans-decl.c (create_function_arglist): Set
DECL_HIDDEN_STRING_LENGTH on hidden string length PARM_DECLs if
len is constant.
--- gcc/calls.c
+++ gcc/calls.c
@@ -3754,6 +3754,28 @@ expand_call (tree exp, rtx target, int ignore)
|| dbg_cnt (tail_call) == false)
try_tail_call = 0;
+ /* Workaround buggy C/C++ wrappers around Fortran routines with
+ character(len=constant) arguments if the hidden string length arguments
+ are passed on the stack; if the callers forget to pass those arguments,
+ attempting to tail call in such routines leads to stack corruption.
+ Avoid tail calls in functions where at least one such hidden string
+ length argument is passed (partially or fully) on the stack in the
+ caller and the callee needs to pass any arguments on the stack.
+ See PR90329. */
+ if (try_tail_call && maybe_ne (args_size.constant, 0))
+ for (tree arg = DECL_ARGUMENTS (current_function_decl);
+ arg; arg = DECL_CHAIN (arg))
+ if (DECL_HIDDEN_STRING_LENGTH (arg) && DECL_INCOMING_RTL (arg))
+ {
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, DECL_INCOMING_RTL (arg), NONCONST)
+ if (MEM_P (*iter))
+ {
+ try_tail_call = 0;
+ break;
+ }
+ }
+
/* If the user has marked the function as requiring tail-call
optimization, attempt it. */
if (must_tail_call)
--- gcc/fortran/gfortran.h
+++ gcc/fortran/gfortran.h
@@ -1857,6 +1857,9 @@ typedef struct gfc_namespace
/* Set to 1 for !$ACC ROUTINE namespaces. */
unsigned oacc_routine:1;
+
+ /* Set to 1 if there are any calls to procedures with implicit interface. */
+ unsigned implicit_interface_calls:1;
}
gfc_namespace;
--- gcc/fortran/interface.c
+++ gcc/fortran/interface.c
@@ -3657,6 +3657,7 @@ gfc_procedure_use (gfc_symbol *sym, gfc_actual_arglist **ap, locus *where)
gfc_warning (OPT_Wimplicit_procedure,
"Procedure %qs called at %L is not explicitly declared",
sym->name, where);
+ gfc_find_proc_namespace (sym->ns)->implicit_interface_calls = 1;
}
if (sym->attr.if_source == IFSRC_UNKNOWN)
--- gcc/fortran/invoke.texi
+++ gcc/fortran/invoke.texi
@@ -181,7 +181,8 @@ and warnings}.
@item Code Generation Options
@xref{Code Gen Options,,Options for code generation conventions}.
@gccoptlist{-faggressive-function-elimination -fblas-matmul-limit=@var{n} @gol
--fbounds-check -fcheck-array-temporaries @gol
+-fbounds-check -ftail-call-workaround -ftail-call-workaround=@var{n} @gol
+-fcheck-array-temporaries @gol
-fcheck=@var{<all|array-temps|bounds|do|mem|pointer|recursion>} @gol
-fcoarray=@var{<none|single|lib>} -fexternal-blas -ff2c
-ffrontend-loop-interchange @gol
@@ -1580,6 +1581,39 @@ warnings for generated array temporaries.
@c Note: This option is also referred in gcc's manpage
Deprecated alias for @option{-fcheck=bounds}.
+@item -ftail-call-workaround
+@itemx -ftail-call-workaround=@var{n}
+@opindex @code{tail-call-workaround}
+Some C interfaces to Fortran codes violate the gfortran ABI by
+omitting the hidden character length arguments as described in
+@xref{Argument passing conventions}. This can lead to crashes
+because pushing arguments for tail calls can overflow the stack.
+
+To provide a workaround for existing binary packages, this option
+disables tail call optimization for gfortran procedures with character
+arguments. With @option{-ftail-call-workaround=2} tail call optimization
+is disabled in all gfortran procedures with character arguments,
+with @option{-ftail-call-workaround=1} or equivalent
+@option{-ftail-call-workaround} only in gfortran procedures with character
+arguments that call implicitly prototyped procedures.
+
+Using this option can lead to problems including crashes due to
+insufficient stack space.
+
+It is @emph{very strongly} recommended to fix the code in question.
+The @option{-fc-prototypes-external} option can be used to generate
+prototypes which conform to gfortran's ABI, for inclusion in the
+source code.
+
+Support for this option will likely be withdrawn in a future release
+of gfortran.
+
+The negative form, @option{-fno-tail-call-workaround} or equivalent
+@option{-ftail-call-workaround=0}, can be used to disable this option.
+
+Default is currently @option{-ftail-call-workaround}, this will change
+in future releases.
+
@item -fcheck-array-temporaries
@opindex @code{fcheck-array-temporaries}
Deprecated alias for @option{-fcheck=array-temps}.
--- gcc/fortran/lang.opt
+++ gcc/fortran/lang.opt
@@ -742,6 +742,13 @@ fsign-zero
Fortran Var(flag_sign_zero) Init(1)
Apply negative sign to zero values.
+ftail-call-workaround
+Fortran Alias(ftail-call-workaround=,1,0)
+
+ftail-call-workaround=
+Fortran RejectNegative Joined UInteger IntegerRange(0, 2) Var(flag_tail_call_workaround) Init(1)
+Disallow tail call optimization when a calling routine may have omitted character lengths.
+
funderscoring
Fortran Var(flag_underscoring) Init(1)
Append underscores to externally visible names.
--- gcc/fortran/trans-decl.c
+++ gcc/fortran/trans-decl.c
@@ -2513,6 +2513,17 @@ create_function_arglist (gfc_symbol * sym)
TREE_READONLY (length) = 1;
gfc_finish_decl (length);
+ /* Marking the length DECL_HIDDEN_STRING_LENGTH will lead
+ to tail calls being disabled. Only do that if we
+ potentially have broken callers. */
+ if (flag_tail_call_workaround
+ && f->sym->ts.u.cl
+ && f->sym->ts.u.cl->length
+ && f->sym->ts.u.cl->length->expr_type == EXPR_CONSTANT
+ && (flag_tail_call_workaround == 2
+ || f->sym->ns->implicit_interface_calls))
+ DECL_HIDDEN_STRING_LENGTH (length) = 1;
+
/* Remember the passed value. */
if (!f->sym->ts.u.cl || f->sym->ts.u.cl->passed_length)
{
--- gcc/lto-streamer.h
+++ gcc/lto-streamer.h
@@ -121,7 +121,7 @@ along with GCC; see the file COPYING3. If not see
form followed by the data for the string. */
#define LTO_major_version 7
-#define LTO_minor_version 1
+#define LTO_minor_version 2
typedef unsigned char lto_decl_flags_t;
--- gcc/tree-core.h
+++ gcc/tree-core.h
@@ -1644,6 +1644,7 @@ struct GTY(()) tree_decl_common {
/* In a VAR_DECL and PARM_DECL, this is DECL_READ_P. */
unsigned decl_read_flag : 1;
/* In a VAR_DECL or RESULT_DECL, this is DECL_NONSHAREABLE. */
+ /* In a PARM_DECL, this is DECL_HIDDEN_STRING_LENGTH. */
unsigned decl_nonshareable_flag : 1;
/* DECL_OFFSET_ALIGN, used only for FIELD_DECLs. */
--- gcc/tree-streamer-in.c
+++ gcc/tree-streamer-in.c
@@ -252,7 +252,7 @@ unpack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
LABEL_DECL_UID (expr) = -1;
}
- if (TREE_CODE (expr) == FIELD_DECL)
+ else if (TREE_CODE (expr) == FIELD_DECL)
{
DECL_PACKED (expr) = (unsigned) bp_unpack_value (bp, 1);
DECL_NONADDRESSABLE_P (expr) = (unsigned) bp_unpack_value (bp, 1);
@@ -260,12 +260,15 @@ unpack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
expr->decl_common.off_align = bp_unpack_value (bp, 8);
}
- if (VAR_P (expr))
+ else if (VAR_P (expr))
{
DECL_HAS_DEBUG_EXPR_P (expr) = (unsigned) bp_unpack_value (bp, 1);
DECL_NONLOCAL_FRAME (expr) = (unsigned) bp_unpack_value (bp, 1);
}
+ else if (TREE_CODE (expr) == PARM_DECL)
+ DECL_HIDDEN_STRING_LENGTH (expr) = (unsigned) bp_unpack_value (bp, 1);
+
if (TREE_CODE (expr) == RESULT_DECL
|| TREE_CODE (expr) == PARM_DECL
|| VAR_P (expr))
--- gcc/tree-streamer-out.c
+++ gcc/tree-streamer-out.c
@@ -212,7 +212,7 @@ pack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
bp_pack_var_len_unsigned (bp, EH_LANDING_PAD_NR (expr));
}
- if (TREE_CODE (expr) == FIELD_DECL)
+ else if (TREE_CODE (expr) == FIELD_DECL)
{
bp_pack_value (bp, DECL_PACKED (expr), 1);
bp_pack_value (bp, DECL_NONADDRESSABLE_P (expr), 1);
@@ -220,12 +220,15 @@ pack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
bp_pack_value (bp, expr->decl_common.off_align, 8);
}
- if (VAR_P (expr))
+ else if (VAR_P (expr))
{
bp_pack_value (bp, DECL_HAS_DEBUG_EXPR_P (expr), 1);
bp_pack_value (bp, DECL_NONLOCAL_FRAME (expr), 1);
}
+ else if (TREE_CODE (expr) == PARM_DECL)
+ bp_pack_value (bp, DECL_HIDDEN_STRING_LENGTH (expr), 1);
+
if (TREE_CODE (expr) == RESULT_DECL
|| TREE_CODE (expr) == PARM_DECL
|| VAR_P (expr))
--- gcc/tree.h
+++ gcc/tree.h
@@ -909,6 +909,11 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
(TREE_CHECK2 (NODE, VAR_DECL, \
RESULT_DECL)->decl_common.decl_nonshareable_flag)
+/* In a PARM_DECL, set for Fortran hidden string length arguments that some
+ buggy callers don't pass to the callee. */
+#define DECL_HIDDEN_STRING_LENGTH(NODE) \
+ (TREE_CHECK (NODE, PARM_DECL)->decl_common.decl_nonshareable_flag)
+
/* In a CALL_EXPR, means that the call is the jump from a thunk to the
thunked-to function. */
#define CALL_FROM_THUNK_P(NODE) (CALL_EXPR_CHECK (NODE)->base.protected_flag)

View File

@ -0,0 +1,45 @@
2019-07-15 Andreas Krebbel <krebbel@linux.ibm.com>
Backport from mainline
2019-07-01 Andreas Krebbel <krebbel@linux.ibm.com>
* config/s390/vector.md: Fix shift count operand printing.
--- gcc/config/s390/vector.md
+++ gcc/config/s390/vector.md
@@ -944,7 +944,7 @@
(VEC_SHIFTS:VI (match_operand:VI 1 "register_operand" "v")
(match_operand:SI 2 "nonmemory_operand" "an")))]
"TARGET_VX"
- "<vec_shifts_mnem><bhfgq>\t%v0,%v1,%Y2"
+ "<vec_shifts_mnem><bhfgq>\t%v0,%v1,<addr_style_op_ops>"
[(set_attr "op_type" "VRS")])
; Shift each element by corresponding vector element
--- /dev/null
+++ gcc/testsuite/gcc.target/s390/vector/vec-shift-2.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */
+
+/* { dg-final { scan-assembler-times "veslf" 1 } } */
+
+typedef __attribute__((vector_size(16))) signed int v4si;
+
+v4si __attribute__((noinline,noclone))
+shift_left_by_scalar (v4si in, int shift_count)
+{
+ return in << (3 + shift_count);
+}
+
+int
+main ()
+{
+ v4si a = { 1, 2, 3, 4 };
+ v4si result = shift_left_by_scalar (a, 1);
+
+ if (result[1] != 32)
+ __builtin_abort ();
+
+ return 0;
+}

View File

@ -1,10 +1,10 @@
%global DATE 20180905
%global SVNREV 264110
%global gcc_version 8.2.1
%global DATE 20190507
%global SVNREV 270976
%global gcc_version 8.3.1
%global gcc_major 8
# Note, gcc_release must be integer, if you want to add suffixes to
# %%{release}, append them after %%{gcc_release} on Release: line.
%global gcc_release 3
%global gcc_release 4
%global nvptx_tools_gitrev c28050f60193b3b95a18866a96f03334e874e78f
%global nvptx_newlib_gitrev aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24
%global _unpackaged_files_terminate_build 0
@ -268,16 +268,20 @@ Patch11: gcc8-rh1512529-aarch64.patch
Patch12: gcc8-mcet.patch
Patch13: gcc8-rh1574936.patch
Patch14: gcc8-libgcc-hardened.patch
Patch15: gcc8-rh1612514.patch
Patch16: gcc8-pr60790.patch
Patch17: gcc8-rh1652016.patch
Patch21: gcc8-rh1652929-1.patch
Patch22: gcc8-rh1652929-2.patch
Patch23: gcc8-rh1652929-3.patch
Patch24: gcc8-rh1652929-4.patch
Patch25: gcc8-rh1652929-5.patch
Patch15: gcc8-rh1670535.patch
Patch16: gcc8-pr85400.patch
Patch17: gcc8-libgomp-20190503.patch
Patch18: gcc8-pr86747.patch
Patch19: gcc8-libgomp-testsuite.patch
Patch20: gcc8-rh1711346.patch
Patch21: gcc8-rh1730380.patch
Patch22: gcc8-pr86098.patch
Patch23: gcc8-pr90139.patch
Patch24: gcc8-pr90756.patch
Patch30: gcc8-rh1668903-1.patch
Patch31: gcc8-rh1668903-2.patch
Patch32: gcc8-rh1668903-3.patch
Patch1000: nvptx-tools-no-ptxas.patch
Patch1001: nvptx-tools-build.patch
@ -847,15 +851,20 @@ to NVidia PTX capable devices if available.
%patch13 -p0 -b .rh1574936~
%patch14 -p0 -b .libgcc-hardened~
%endif
%patch15 -p0 -b .rh1612514~
%patch16 -p0 -b .pr60790~
%patch17 -p1 -b .rh1652016~
%patch15 -p0 -b .rh1670535~
%patch16 -p0 -b .pr85400~
%patch17 -p0 -b .libgomp-20190503~
%patch18 -p0 -b .pr86747~
%patch19 -p0 -b .libgomp-testsuite~
%patch20 -p0 -b .rh1711346~
%patch21 -p0 -b .rh1730380~
%patch22 -p0 -b .pr86098~
%patch23 -p0 -b .pr90139~
%patch24 -p0 -b .pr90756~
%patch21 -p1 -b .rh1652929-1~
%patch22 -p1 -b .rh1652929-2~
%patch23 -p1 -b .rh1652929-3~
%patch24 -p1 -b .rh1652929-4~
%patch25 -p1 -b .rh1652929-5~
%patch30 -p0 -b .rh1668903-1~
%patch31 -p0 -b .rh1668903-2~
%patch32 -p0 -b .rh1668903-3~
cd nvptx-tools-%{nvptx_tools_gitrev}
%patch1000 -p1 -b .nvptx-tools-no-ptxas~
@ -1350,36 +1359,39 @@ mkdir -p %{buildroot}/%{_lib}
mv -f %{buildroot}%{_prefix}/%{_lib}/libgcc_s.so.1 %{buildroot}/%{_lib}/libgcc_s-%{gcc_major}-%{DATE}.so.1
chmod 755 %{buildroot}/%{_lib}/libgcc_s-%{gcc_major}-%{DATE}.so.1
ln -sf libgcc_s-%{gcc_major}-%{DATE}.so.1 %{buildroot}/%{_lib}/libgcc_s.so.1
%ifarch %{ix86} x86_64 ppc ppc64 ppc64p7 ppc64le %{arm}
rm -f $FULLPATH/libgcc_s.so
echo '/* GNU ld script
Use the shared library, but some functions are only in
the static library, so try that secondarily. */
OUTPUT_FORMAT('`gcc -Wl,--print-output-format -nostdlib -r -o /dev/null`')
GROUP ( /%{_lib}/libgcc_s.so.1 libgcc.a )' > $FULLPATH/libgcc_s.so
%else
ln -sf /%{_lib}/libgcc_s.so.1 $FULLPATH/libgcc_s.so
%endif
%ifarch sparcv9 ppc
%ifarch ppc
rm -f $FULLPATH/64/libgcc_s.so
echo '/* GNU ld script
Use the shared library, but some functions are only in
the static library, so try that secondarily. */
OUTPUT_FORMAT('`gcc -m64 -Wl,--print-output-format -nostdlib -r -o /dev/null`')
GROUP ( /lib64/libgcc_s.so.1 libgcc.a )' > $FULLPATH/64/libgcc_s.so
%else
ln -sf /lib64/libgcc_s.so.1 $FULLPATH/64/libgcc_s.so
%endif
%endif
%ifarch %{multilib_64_archs}
%ifarch x86_64 ppc64 ppc64p7
rm -f $FULLPATH/64/libgcc_s.so
echo '/* GNU ld script
Use the shared library, but some functions are only in
the static library, so try that secondarily. */
OUTPUT_FORMAT('`gcc -m32 -Wl,--print-output-format -nostdlib -r -o /dev/null`')
GROUP ( /lib/libgcc_s.so.1 libgcc.a )' > $FULLPATH/32/libgcc_s.so
%else
ln -sf /lib/libgcc_s.so.1 $FULLPATH/32/libgcc_s.so
%endif
%ifarch ppc
rm -f $FULLPATH/libgcc_s.so
echo '/* GNU ld script
Use the shared library, but some functions are only in
the static library, so try that secondarily. */
OUTPUT_FORMAT(elf32-powerpc)
GROUP ( /lib/libgcc_s.so.1 libgcc.a )' > $FULLPATH/libgcc_s.so
%endif
%ifarch ppc64 ppc64p7
rm -f $FULLPATH/32/libgcc_s.so
echo '/* GNU ld script
Use the shared library, but some functions are only in
the static library, so try that secondarily. */
OUTPUT_FORMAT(elf32-powerpc)
GROUP ( /lib/libgcc_s.so.1 libgcc.a )' > $FULLPATH/32/libgcc_s.so
%endif
%ifarch %{arm}
rm -f $FULLPATH/libgcc_s.so
echo '/* GNU ld script
Use the shared library, but some functions are only in
the static library, so try that secondarily. */
OUTPUT_FORMAT(elf32-littlearm)
GROUP ( /lib/libgcc_s.so.1 libgcc.a )' > $FULLPATH/libgcc_s.so
%endif
mv -f %{buildroot}%{_prefix}/%{_lib}/libgomp.spec $FULLPATH/
@ -2335,6 +2347,14 @@ fi
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/vec_types.h
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/htmintrin.h
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/htmxlintrin.h
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/bmi2intrin.h
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/bmiintrin.h
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/xmmintrin.h
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/mm_malloc.h
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/emmintrin.h
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/mmintrin.h
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/x86intrin.h
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/amo.h
%endif
%ifarch %{arm}
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/unwind-arm-common.h
@ -3157,21 +3177,60 @@ fi
%endif
%changelog
* Mon Dec 10 2018 Marek Polacek <polacek@redhat.com 8.2.1-3.5
* Tue Jul 16 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4.5
- fix shift count operand printing (#1730380)
- fix tree-outof-ssa.c ICE with vector types (PR middle-end/90139, #1730454)
- fix out-of-ssa with unsupported vector types (PR rtl-optimization/90756,
#1730454)
- fix ICE with template placeholder for TTP (PR c++/86098, #1730454)
* Mon Jun 3 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4.4
- backport workaround for broken C/C++ wrappers to LAPACK (#1711346)
* Fri May 24 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4.3
- additional fix for the libgomp testsuite (#1707568)
* Tue May 21 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4.2
- backport the -fuse-ld=lld option (#1670535)
- TLS model fix (#1678555, PR c++/85400)
- two small autoFDO fixes (#1686082)
- libgomp update (#1707568)
- member template redeclaration fix (#1652704, PR c++/86747)
- turn libgcc_s.so into a linker script on i?86, x86_64, ppc64le and also on
ppc and ppc64 for 64-bit multilib (#1708309)
- avoid using unaligned vsx or lxvd2x/stxvd2x for memcpy/memmove inline
expansion (#1666977)
* Wed May 8 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4.1
- tweak gcc8-rh1668903-1.patch and gcc8-rh1668903-2.patch patches
* Tue May 7 2019 Marek Polacek <polacek@redhat.com> 8.3.1-4
- update from Fedora 8.3.1-4 (#1680182)
- drop gcc8-pr60790.patch, gcc8-pr89629.patch, gcc8-rh1668903-4.patch
* Tue May 7 2019 Marek Polacek <polacek@redhat.com> 8.3.1-3
- update from Fedora 8.3.1-3 (#1680182)
- remove load and test FP splitter (#1673116)
- fix *movsi_from_df (#1677652)
- add missing headers
- add support for live patching (#1668903)
- retire gcc8-rh1612514.patch, gcc8-rh1652016.patch, gcc8-rh1652929-?.patch
* Mon Dec 10 2018 Marek Polacek <polacek@redhat.com> 8.2.1-3.5
- remove python2 dependecy (#1595385)
* Tue Nov 27 2018 Jeff Law <law@redhat.com 8.2.1-3.4
* Tue Nov 27 2018 Jeff Law <law@redhat.com> 8.2.1-3.4
- Backport PPC string inlines from trunk which allow for valgrind's
memcheck to work properly (#1652929)
- Backport bugfix for clz pattern on s390 affecting jemalloc (#1652016)
* Mon Oct 15 2018 Marek Polacek <polacek@redhat.com 8.2.1-3.3
* Mon Oct 15 2018 Marek Polacek <polacek@redhat.com> 8.2.1-3.3
- avoid IFUNC resolver access to uninitialized data (#1559350, PR libgcc/60790)
* Thu Oct 11 2018 Marek Polacek <polacek@redhat.com 8.2.1-3.2
* Thu Oct 11 2018 Marek Polacek <polacek@redhat.com> 8.2.1-3.2
- fix typos in manual (#1612514)
* Mon Oct 8 2018 Marek Polacek <polacek@redhat.com 8.2.1-3.1
* Mon Oct 8 2018 Marek Polacek <polacek@redhat.com> 8.2.1-3.1
- enable hardening of binaries (#1624114)
- disable libgccjit on RHEL