import gcc-8.2.1-3.5.el8
This commit is contained in:
commit
0e369741f6
3
.gcc.metadata
Normal file
3
.gcc.metadata
Normal file
@ -0,0 +1,3 @@
|
||||
1fe3aa7ce95faa0f4d7f08f0dfefd86ff4b43015 SOURCES/gcc-8.2.1-20180905.tar.xz
|
||||
3bdb3cc01fa7690a0e20ea5cfffcbe690f7665eb SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
|
||||
ce8eb83be0ac37fb5d5388df455a980fe37b4f13 SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz
|
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
SOURCES/gcc-8.2.1-20180905.tar.xz
|
||||
SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
|
||||
SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz
|
27
SOURCES/gcc8-Wno-format-security.patch
Normal file
27
SOURCES/gcc8-Wno-format-security.patch
Normal file
@ -0,0 +1,27 @@
|
||||
2017-02-25 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* configure.ac: When adding -Wno-format, also add -Wno-format-security.
|
||||
* configure: Regenerated.
|
||||
|
||||
--- gcc/configure.ac.jj 2017-02-13 12:20:53.000000000 +0100
|
||||
+++ gcc/configure.ac 2017-02-25 12:42:32.859175403 +0100
|
||||
@@ -481,7 +481,7 @@ AC_ARG_ENABLE(build-format-warnings,
|
||||
AS_HELP_STRING([--disable-build-format-warnings],[don't use -Wformat while building GCC]),
|
||||
[],[enable_build_format_warnings=yes])
|
||||
AS_IF([test $enable_build_format_warnings = no],
|
||||
- [wf_opt=-Wno-format],[wf_opt=])
|
||||
+ [wf_opt="-Wno-format -Wno-format-security"],[wf_opt=])
|
||||
ACX_PROG_CXX_WARNING_OPTS(
|
||||
m4_quote(m4_do([-W -Wall -Wno-narrowing -Wwrite-strings ],
|
||||
[-Wcast-qual $wf_opt])), [loose_warn])
|
||||
--- gcc/configure.jj 2017-02-13 12:20:52.000000000 +0100
|
||||
+++ gcc/configure 2017-02-25 12:42:50.041946391 +0100
|
||||
@@ -6647,7 +6647,7 @@ else
|
||||
fi
|
||||
|
||||
if test $enable_build_format_warnings = no; then :
|
||||
- wf_opt=-Wno-format
|
||||
+ wf_opt="-Wno-format -Wno-format-security"
|
||||
else
|
||||
wf_opt=
|
||||
fi
|
117
SOURCES/gcc8-foffload-default.patch
Normal file
117
SOURCES/gcc8-foffload-default.patch
Normal file
@ -0,0 +1,117 @@
|
||||
2017-01-20 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* gcc.c (offload_targets_default): New variable.
|
||||
(process_command): Set it if -foffload is defaulted.
|
||||
(driver::maybe_putenv_OFFLOAD_TARGETS): Add OFFLOAD_TARGET_DEFAULT=1
|
||||
into environment if -foffload has been defaulted.
|
||||
* lto-wrapper.c (OFFLOAD_TARGET_DEFAULT_ENV): Define.
|
||||
(compile_images_for_offload_targets): If OFFLOAD_TARGET_DEFAULT
|
||||
is in the environment, don't fail if corresponding mkoffload
|
||||
can't be found. Free and clear offload_names if no valid offload
|
||||
is found.
|
||||
libgomp/
|
||||
* target.c (gomp_load_plugin_for_device): If a plugin can't be
|
||||
dlopened, assume it has no devices silently.
|
||||
|
||||
--- gcc/gcc.c.jj 2017-01-17 10:28:40.000000000 +0100
|
||||
+++ gcc/gcc.c 2017-01-20 16:26:29.649962902 +0100
|
||||
@@ -290,6 +290,10 @@ static const char *spec_host_machine = D
|
||||
|
||||
static char *offload_targets = NULL;
|
||||
|
||||
+/* Set to true if -foffload has not been used and offload_targets
|
||||
+ is set to the configured in default. */
|
||||
+static bool offload_targets_default;
|
||||
+
|
||||
/* Nonzero if cross-compiling.
|
||||
When -b is used, the value comes from the `specs' file. */
|
||||
|
||||
@@ -4457,7 +4461,10 @@ process_command (unsigned int decoded_op
|
||||
/* If the user didn't specify any, default to all configured offload
|
||||
targets. */
|
||||
if (ENABLE_OFFLOADING && offload_targets == NULL)
|
||||
- handle_foffload_option (OFFLOAD_TARGETS);
|
||||
+ {
|
||||
+ handle_foffload_option (OFFLOAD_TARGETS);
|
||||
+ offload_targets_default = true;
|
||||
+ }
|
||||
|
||||
if (output_file
|
||||
&& strcmp (output_file, "-") != 0
|
||||
@@ -7693,6 +7700,8 @@ driver::maybe_putenv_OFFLOAD_TARGETS ()
|
||||
obstack_grow (&collect_obstack, offload_targets,
|
||||
strlen (offload_targets) + 1);
|
||||
xputenv (XOBFINISH (&collect_obstack, char *));
|
||||
+ if (offload_targets_default)
|
||||
+ xputenv ("OFFLOAD_TARGET_DEFAULT=1");
|
||||
}
|
||||
|
||||
free (offload_targets);
|
||||
--- gcc/lto-wrapper.c.jj 2017-01-01 12:45:34.000000000 +0100
|
||||
+++ gcc/lto-wrapper.c 2017-01-20 16:34:18.294016997 +0100
|
||||
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3.
|
||||
/* Environment variable, used for passing the names of offload targets from GCC
|
||||
driver to lto-wrapper. */
|
||||
#define OFFLOAD_TARGET_NAMES_ENV "OFFLOAD_TARGET_NAMES"
|
||||
+#define OFFLOAD_TARGET_DEFAULT_ENV "OFFLOAD_TARGET_DEFAULT"
|
||||
|
||||
enum lto_mode_d {
|
||||
LTO_MODE_NONE, /* Not doing LTO. */
|
||||
@@ -790,8 +791,10 @@ compile_images_for_offload_targets (unsi
|
||||
if (!target_names)
|
||||
return;
|
||||
unsigned num_targets = parse_env_var (target_names, &names, NULL);
|
||||
+ const char *target_names_default = getenv (OFFLOAD_TARGET_DEFAULT_ENV);
|
||||
|
||||
int next_name_entry = 0;
|
||||
+ bool hsa_seen = false;
|
||||
const char *compiler_path = getenv ("COMPILER_PATH");
|
||||
if (!compiler_path)
|
||||
goto out;
|
||||
@@ -804,18 +807,32 @@ compile_images_for_offload_targets (unsi
|
||||
/* HSA does not use LTO-like streaming and a different compiler, skip
|
||||
it. */
|
||||
if (strcmp (names[i], "hsa") == 0)
|
||||
- continue;
|
||||
+ {
|
||||
+ hsa_seen = true;
|
||||
+ continue;
|
||||
+ }
|
||||
|
||||
offload_names[next_name_entry]
|
||||
= compile_offload_image (names[i], compiler_path, in_argc, in_argv,
|
||||
compiler_opts, compiler_opt_count,
|
||||
linker_opts, linker_opt_count);
|
||||
if (!offload_names[next_name_entry])
|
||||
- fatal_error (input_location,
|
||||
- "problem with building target image for %s\n", names[i]);
|
||||
+ {
|
||||
+ if (target_names_default != NULL)
|
||||
+ continue;
|
||||
+ fatal_error (input_location,
|
||||
+ "problem with building target image for %s\n",
|
||||
+ names[i]);
|
||||
+ }
|
||||
next_name_entry++;
|
||||
}
|
||||
|
||||
+ if (next_name_entry == 0 && !hsa_seen)
|
||||
+ {
|
||||
+ free (offload_names);
|
||||
+ offload_names = NULL;
|
||||
+ }
|
||||
+
|
||||
out:
|
||||
free_array_of_ptrs ((void **) names, num_targets);
|
||||
}
|
||||
--- libgomp/target.c.jj 2017-01-01 12:45:52.000000000 +0100
|
||||
+++ libgomp/target.c 2017-01-20 20:12:13.756710875 +0100
|
||||
@@ -2356,7 +2356,7 @@ gomp_load_plugin_for_device (struct gomp
|
||||
|
||||
void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
|
||||
if (!plugin_handle)
|
||||
- goto dl_fail;
|
||||
+ return 0;
|
||||
|
||||
/* Check if all required functions are available in the plugin and store
|
||||
their handlers. None of the symbols can legitimately be NULL,
|
124
SOURCES/gcc8-hack.patch
Normal file
124
SOURCES/gcc8-hack.patch
Normal file
@ -0,0 +1,124 @@
|
||||
--- libada/Makefile.in.jj 2009-01-14 12:07:35.000000000 +0100
|
||||
+++ libada/Makefile.in 2009-01-15 14:25:33.000000000 +0100
|
||||
@@ -66,18 +66,40 @@ libsubdir := $(libdir)/gcc/$(target_nonc
|
||||
ADA_RTS_DIR=$(GCC_DIR)/ada/rts$(subst /,_,$(MULTISUBDIR))
|
||||
ADA_RTS_SUBDIR=./rts$(subst /,_,$(MULTISUBDIR))
|
||||
|
||||
+DEFAULTMULTIFLAGS :=
|
||||
+ifeq ($(MULTISUBDIR),)
|
||||
+targ:=$(subst -, ,$(target))
|
||||
+arch:=$(word 1,$(targ))
|
||||
+ifeq ($(words $(targ)),2)
|
||||
+osys:=$(word 2,$(targ))
|
||||
+else
|
||||
+osys:=$(word 3,$(targ))
|
||||
+endif
|
||||
+ifeq ($(strip $(filter-out i%86 x86_64 powerpc% ppc% s390% sparc% linux%, $(arch) $(osys))),)
|
||||
+ifeq ($(shell $(CC) $(CFLAGS) -print-multi-os-directory),../lib64)
|
||||
+DEFAULTMULTIFLAGS := -m64
|
||||
+else
|
||||
+ifeq ($(strip $(filter-out s390%, $(arch))),)
|
||||
+DEFAULTMULTIFLAGS := -m31
|
||||
+else
|
||||
+DEFAULTMULTIFLAGS := -m32
|
||||
+endif
|
||||
+endif
|
||||
+endif
|
||||
+endif
|
||||
+
|
||||
# exeext should not be used because it's the *host* exeext. We're building
|
||||
# a *target* library, aren't we?!? Likewise for CC. Still, provide bogus
|
||||
# definitions just in case something slips through the safety net provided
|
||||
# by recursive make invocations in gcc/ada/Makefile.in
|
||||
LIBADA_FLAGS_TO_PASS = \
|
||||
"MAKEOVERRIDES=" \
|
||||
- "LDFLAGS=$(LDFLAGS)" \
|
||||
+ "LDFLAGS=$(LDFLAGS) $(DEFAULTMULTIFLAGS)" \
|
||||
"LN_S=$(LN_S)" \
|
||||
"SHELL=$(SHELL)" \
|
||||
- "GNATLIBFLAGS=$(GNATLIBFLAGS) $(MULTIFLAGS)" \
|
||||
- "GNATLIBCFLAGS=$(GNATLIBCFLAGS) $(MULTIFLAGS)" \
|
||||
- "GNATLIBCFLAGS_FOR_C=$(GNATLIBCFLAGS_FOR_C) $(MULTIFLAGS)" \
|
||||
+ "GNATLIBFLAGS=$(GNATLIBFLAGS) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \
|
||||
+ "GNATLIBCFLAGS=$(GNATLIBCFLAGS) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \
|
||||
+ "GNATLIBCFLAGS_FOR_C=$(GNATLIBCFLAGS_FOR_C) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \
|
||||
"PICFLAG_FOR_TARGET=$(PICFLAG)" \
|
||||
"THREAD_KIND=$(THREAD_KIND)" \
|
||||
"TRACE=$(TRACE)" \
|
||||
@@ -88,7 +110,7 @@ LIBADA_FLAGS_TO_PASS = \
|
||||
"exeext=.exeext.should.not.be.used " \
|
||||
'CC=the.host.compiler.should.not.be.needed' \
|
||||
"GCC_FOR_TARGET=$(CC)" \
|
||||
- "CFLAGS=$(CFLAGS)"
|
||||
+ "CFLAGS=$(CFLAGS) $(DEFAULTMULTIFLAGS)"
|
||||
|
||||
# Rules to build gnatlib.
|
||||
.PHONY: gnatlib gnatlib-plain gnatlib-sjlj gnatlib-zcx gnatlib-shared osconstool
|
||||
--- config-ml.in.jj 2010-06-30 09:50:44.000000000 +0200
|
||||
+++ config-ml.in 2010-07-02 21:24:17.994211151 +0200
|
||||
@@ -511,6 +511,8 @@ multi-do:
|
||||
ADAFLAGS="$(ADAFLAGS) $${flags}" \
|
||||
prefix="$(prefix)" \
|
||||
exec_prefix="$(exec_prefix)" \
|
||||
+ mandir="$(mandir)" \
|
||||
+ infodir="$(infodir)" \
|
||||
GOCFLAGS="$(GOCFLAGS) $${flags}" \
|
||||
CXXFLAGS="$(CXXFLAGS) $${flags}" \
|
||||
LIBCFLAGS="$(LIBCFLAGS) $${flags}" \
|
||||
--- libcpp/macro.c.jj 2015-01-14 11:01:34.000000000 +0100
|
||||
+++ libcpp/macro.c 2015-01-14 14:22:19.286949884 +0100
|
||||
@@ -2947,8 +2947,6 @@ create_iso_definition (cpp_reader *pfile
|
||||
cpp_token *token;
|
||||
const cpp_token *ctoken;
|
||||
bool following_paste_op = false;
|
||||
- const char *paste_op_error_msg =
|
||||
- N_("'##' cannot appear at either end of a macro expansion");
|
||||
unsigned int num_extra_tokens = 0;
|
||||
|
||||
/* Get the first token of the expansion (or the '(' of a
|
||||
@@ -3059,7 +3057,8 @@ create_iso_definition (cpp_reader *pfile
|
||||
function-like macros, but not at the end. */
|
||||
if (following_paste_op)
|
||||
{
|
||||
- cpp_error (pfile, CPP_DL_ERROR, paste_op_error_msg);
|
||||
+ cpp_error (pfile, CPP_DL_ERROR,
|
||||
+ "'##' cannot appear at either end of a macro expansion");
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@@ -3072,7 +3071,8 @@ create_iso_definition (cpp_reader *pfile
|
||||
function-like macros, but not at the beginning. */
|
||||
if (macro->count == 1)
|
||||
{
|
||||
- cpp_error (pfile, CPP_DL_ERROR, paste_op_error_msg);
|
||||
+ cpp_error (pfile, CPP_DL_ERROR,
|
||||
+ "'##' cannot appear at either end of a macro expansion");
|
||||
return false;
|
||||
}
|
||||
|
||||
--- libcpp/expr.c.jj 2015-01-14 11:01:34.000000000 +0100
|
||||
+++ libcpp/expr.c 2015-01-14 14:35:52.851002344 +0100
|
||||
@@ -672,16 +672,17 @@ cpp_classify_number (cpp_reader *pfile,
|
||||
if ((result & CPP_N_WIDTH) == CPP_N_LARGE
|
||||
&& CPP_OPTION (pfile, cpp_warn_long_long))
|
||||
{
|
||||
- const char *message = CPP_OPTION (pfile, cplusplus)
|
||||
- ? N_("use of C++11 long long integer constant")
|
||||
- : N_("use of C99 long long integer constant");
|
||||
-
|
||||
if (CPP_OPTION (pfile, c99))
|
||||
cpp_warning_with_line (pfile, CPP_W_LONG_LONG, virtual_location,
|
||||
- 0, message);
|
||||
+ 0, CPP_OPTION (pfile, cplusplus)
|
||||
+ ? N_("use of C++11 long long integer constant")
|
||||
+ : N_("use of C99 long long integer constant"));
|
||||
else
|
||||
cpp_pedwarning_with_line (pfile, CPP_W_LONG_LONG,
|
||||
- virtual_location, 0, message);
|
||||
+ virtual_location, 0,
|
||||
+ CPP_OPTION (pfile, cplusplus)
|
||||
+ ? N_("use of C++11 long long integer constant")
|
||||
+ : N_("use of C99 long long integer constant"));
|
||||
}
|
||||
|
||||
result |= CPP_N_INTEGER;
|
11
SOURCES/gcc8-i386-libgomp.patch
Normal file
11
SOURCES/gcc8-i386-libgomp.patch
Normal file
@ -0,0 +1,11 @@
|
||||
--- libgomp/configure.tgt.jj 2008-01-10 20:53:48.000000000 +0100
|
||||
+++ libgomp/configure.tgt 2008-03-27 12:44:51.000000000 +0100
|
||||
@@ -67,7 +67,7 @@ if test $enable_linux_futex = yes; then
|
||||
;;
|
||||
*)
|
||||
if test -z "$with_arch"; then
|
||||
- XCFLAGS="${XCFLAGS} -march=i486 -mtune=${target_cpu}"
|
||||
+ XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic"
|
||||
fi
|
||||
esac
|
||||
;;
|
715
SOURCES/gcc8-isl-dl.patch
Normal file
715
SOURCES/gcc8-isl-dl.patch
Normal file
@ -0,0 +1,715 @@
|
||||
--- gcc/Makefile.in.jj 2015-06-06 10:00:25.000000000 +0200
|
||||
+++ gcc/Makefile.in 2015-11-04 14:56:02.643536437 +0100
|
||||
@@ -1046,7 +1046,7 @@ BUILD_LIBDEPS= $(BUILD_LIBIBERTY)
|
||||
# and the system's installed libraries.
|
||||
LIBS = @LIBS@ libcommon.a $(CPPLIB) $(LIBINTL) $(LIBICONV) $(LIBBACKTRACE) \
|
||||
$(LIBIBERTY) $(LIBDECNUMBER) $(HOST_LIBS)
|
||||
-BACKENDLIBS = $(ISLLIBS) $(GMPLIBS) $(PLUGINLIBS) $(HOST_LIBS) \
|
||||
+BACKENDLIBS = $(if $(ISLLIBS),-ldl) $(GMPLIBS) $(PLUGINLIBS) $(HOST_LIBS) \
|
||||
$(ZLIB)
|
||||
# Any system libraries needed just for GNAT.
|
||||
SYSLIBS = @GNAT_LIBEXC@
|
||||
@@ -2196,6 +2196,15 @@ $(out_object_file): $(out_file)
|
||||
$(common_out_object_file): $(common_out_file)
|
||||
$(COMPILE) $<
|
||||
$(POSTCOMPILE)
|
||||
+
|
||||
+graphite%.o : \
|
||||
+ ALL_CFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CFLAGS))
|
||||
+graphite.o : \
|
||||
+ ALL_CFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CFLAGS))
|
||||
+graphite%.o : \
|
||||
+ ALL_CXXFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CXXFLAGS))
|
||||
+graphite.o : \
|
||||
+ ALL_CXXFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CXXFLAGS))
|
||||
#
|
||||
# Generate header and source files from the machine description,
|
||||
# and compile them.
|
||||
--- gcc/graphite.h.jj 2016-01-27 12:44:06.000000000 +0100
|
||||
+++ gcc/graphite.h 2016-01-27 13:26:38.309876856 +0100
|
||||
@@ -39,6 +39,590 @@ along with GCC; see the file COPYING3.
|
||||
#include <isl/schedule_node.h>
|
||||
#include <isl/id.h>
|
||||
#include <isl/space.h>
|
||||
+#include <isl/version.h>
|
||||
+#include <dlfcn.h>
|
||||
+
|
||||
+#define DYNSYMS \
|
||||
+ DYNSYM (isl_aff_add_coefficient_si); \
|
||||
+ DYNSYM (isl_aff_free); \
|
||||
+ DYNSYM (isl_aff_get_space); \
|
||||
+ DYNSYM (isl_aff_set_coefficient_si); \
|
||||
+ DYNSYM (isl_aff_set_constant_si); \
|
||||
+ DYNSYM (isl_aff_zero_on_domain); \
|
||||
+ DYNSYM (isl_band_free); \
|
||||
+ DYNSYM (isl_band_get_children); \
|
||||
+ DYNSYM (isl_band_get_partial_schedule); \
|
||||
+ DYNSYM (isl_band_has_children); \
|
||||
+ DYNSYM (isl_band_list_free); \
|
||||
+ DYNSYM (isl_band_list_get_band); \
|
||||
+ DYNSYM (isl_band_list_get_ctx); \
|
||||
+ DYNSYM (isl_band_list_n_band); \
|
||||
+ DYNSYM (isl_band_n_member); \
|
||||
+ DYNSYM (isl_basic_map_add_constraint); \
|
||||
+ DYNSYM (isl_basic_map_project_out); \
|
||||
+ DYNSYM (isl_basic_map_universe); \
|
||||
+ DYNSYM (isl_constraint_set_coefficient_si); \
|
||||
+ DYNSYM (isl_constraint_set_constant_si); \
|
||||
+ DYNSYM (isl_ctx_alloc); \
|
||||
+ DYNSYM (isl_ctx_free); \
|
||||
+ DYNSYM (isl_equality_alloc); \
|
||||
+ DYNSYM (isl_id_alloc); \
|
||||
+ DYNSYM (isl_id_copy); \
|
||||
+ DYNSYM (isl_id_free); \
|
||||
+ DYNSYM (isl_inequality_alloc); \
|
||||
+ DYNSYM (isl_local_space_copy); \
|
||||
+ DYNSYM (isl_local_space_free); \
|
||||
+ DYNSYM (isl_local_space_from_space); \
|
||||
+ DYNSYM (isl_local_space_range); \
|
||||
+ DYNSYM (isl_map_add_constraint); \
|
||||
+ DYNSYM (isl_map_add_dims); \
|
||||
+ DYNSYM (isl_map_align_params); \
|
||||
+ DYNSYM (isl_map_apply_range); \
|
||||
+ DYNSYM (isl_map_copy); \
|
||||
+ DYNSYM (isl_map_dim); \
|
||||
+ DYNSYM (isl_map_dump); \
|
||||
+ DYNSYM (isl_map_equate); \
|
||||
+ DYNSYM (isl_map_fix_si); \
|
||||
+ DYNSYM (isl_map_flat_product); \
|
||||
+ DYNSYM (isl_map_flat_range_product); \
|
||||
+ DYNSYM (isl_map_free); \
|
||||
+ DYNSYM (isl_map_from_basic_map); \
|
||||
+ DYNSYM (isl_map_from_pw_aff); \
|
||||
+ DYNSYM (isl_map_from_union_map); \
|
||||
+ DYNSYM (isl_map_get_ctx); \
|
||||
+ DYNSYM (isl_map_get_space); \
|
||||
+ DYNSYM (isl_map_get_tuple_id); \
|
||||
+ DYNSYM (isl_map_insert_dims); \
|
||||
+ DYNSYM (isl_map_intersect); \
|
||||
+ DYNSYM (isl_map_intersect_domain); \
|
||||
+ DYNSYM (isl_map_intersect_range); \
|
||||
+ DYNSYM (isl_map_is_empty); \
|
||||
+ DYNSYM (isl_map_lex_ge); \
|
||||
+ DYNSYM (isl_map_lex_le); \
|
||||
+ DYNSYM (isl_map_n_out); \
|
||||
+ DYNSYM (isl_map_range); \
|
||||
+ DYNSYM (isl_map_set_tuple_id); \
|
||||
+ DYNSYM (isl_map_universe); \
|
||||
+ DYNSYM (isl_options_set_on_error); \
|
||||
+ DYNSYM (isl_options_set_schedule_serialize_sccs); \
|
||||
+ DYNSYM (isl_printer_set_yaml_style); \
|
||||
+ DYNSYM (isl_options_set_schedule_max_constant_term); \
|
||||
+ DYNSYM (isl_options_set_schedule_maximize_band_depth); \
|
||||
+ DYNSYM (isl_printer_free); \
|
||||
+ DYNSYM (isl_printer_print_aff); \
|
||||
+ DYNSYM (isl_printer_print_constraint); \
|
||||
+ DYNSYM (isl_printer_print_map); \
|
||||
+ DYNSYM (isl_printer_print_set); \
|
||||
+ DYNSYM (isl_printer_to_file); \
|
||||
+ DYNSYM (isl_pw_aff_add); \
|
||||
+ DYNSYM (isl_pw_aff_alloc); \
|
||||
+ DYNSYM (isl_pw_aff_copy); \
|
||||
+ DYNSYM (isl_pw_aff_eq_set); \
|
||||
+ DYNSYM (isl_pw_aff_free); \
|
||||
+ DYNSYM (isl_pw_aff_from_aff); \
|
||||
+ DYNSYM (isl_pw_aff_ge_set); \
|
||||
+ DYNSYM (isl_pw_aff_gt_set); \
|
||||
+ DYNSYM (isl_pw_aff_is_cst); \
|
||||
+ DYNSYM (isl_pw_aff_le_set); \
|
||||
+ DYNSYM (isl_pw_aff_lt_set); \
|
||||
+ DYNSYM (isl_pw_aff_mul); \
|
||||
+ DYNSYM (isl_pw_aff_ne_set); \
|
||||
+ DYNSYM (isl_pw_aff_nonneg_set); \
|
||||
+ DYNSYM (isl_pw_aff_set_tuple_id); \
|
||||
+ DYNSYM (isl_pw_aff_sub); \
|
||||
+ DYNSYM (isl_pw_aff_zero_set); \
|
||||
+ DYNSYM (isl_schedule_free); \
|
||||
+ DYNSYM (isl_schedule_get_band_forest); \
|
||||
+ DYNSYM (isl_set_add_constraint); \
|
||||
+ DYNSYM (isl_set_add_dims); \
|
||||
+ DYNSYM (isl_set_apply); \
|
||||
+ DYNSYM (isl_set_coalesce); \
|
||||
+ DYNSYM (isl_set_copy); \
|
||||
+ DYNSYM (isl_set_dim); \
|
||||
+ DYNSYM (isl_set_fix_si); \
|
||||
+ DYNSYM (isl_set_free); \
|
||||
+ DYNSYM (isl_set_get_space); \
|
||||
+ DYNSYM (isl_set_get_tuple_id); \
|
||||
+ DYNSYM (isl_set_intersect); \
|
||||
+ DYNSYM (isl_set_is_empty); \
|
||||
+ DYNSYM (isl_set_n_dim); \
|
||||
+ DYNSYM (isl_set_nat_universe); \
|
||||
+ DYNSYM (isl_set_project_out); \
|
||||
+ DYNSYM (isl_set_set_tuple_id); \
|
||||
+ DYNSYM (isl_set_universe); \
|
||||
+ DYNSYM (isl_space_add_dims); \
|
||||
+ DYNSYM (isl_space_alloc); \
|
||||
+ DYNSYM (isl_space_copy); \
|
||||
+ DYNSYM (isl_space_dim); \
|
||||
+ DYNSYM (isl_space_domain); \
|
||||
+ DYNSYM (isl_space_find_dim_by_id); \
|
||||
+ DYNSYM (isl_space_free); \
|
||||
+ DYNSYM (isl_space_from_domain); \
|
||||
+ DYNSYM (isl_space_get_tuple_id); \
|
||||
+ DYNSYM (isl_space_params_alloc); \
|
||||
+ DYNSYM (isl_space_range); \
|
||||
+ DYNSYM (isl_space_set_alloc); \
|
||||
+ DYNSYM (isl_space_set_dim_id); \
|
||||
+ DYNSYM (isl_space_set_tuple_id); \
|
||||
+ DYNSYM (isl_union_map_add_map); \
|
||||
+ DYNSYM (isl_union_map_align_params); \
|
||||
+ DYNSYM (isl_union_map_apply_domain); \
|
||||
+ DYNSYM (isl_union_map_apply_range); \
|
||||
+ DYNSYM (isl_union_map_compute_flow); \
|
||||
+ DYNSYM (isl_union_map_copy); \
|
||||
+ DYNSYM (isl_union_map_empty); \
|
||||
+ DYNSYM (isl_union_map_flat_range_product); \
|
||||
+ DYNSYM (isl_union_map_foreach_map); \
|
||||
+ DYNSYM (isl_union_map_free); \
|
||||
+ DYNSYM (isl_union_map_from_map); \
|
||||
+ DYNSYM (isl_union_map_get_ctx); \
|
||||
+ DYNSYM (isl_union_map_get_space); \
|
||||
+ DYNSYM (isl_union_map_gist_domain); \
|
||||
+ DYNSYM (isl_union_map_gist_range); \
|
||||
+ DYNSYM (isl_union_map_intersect_domain); \
|
||||
+ DYNSYM (isl_union_map_is_empty); \
|
||||
+ DYNSYM (isl_union_map_subtract); \
|
||||
+ DYNSYM (isl_union_map_union); \
|
||||
+ DYNSYM (isl_union_set_add_set); \
|
||||
+ DYNSYM (isl_union_set_compute_schedule); \
|
||||
+ DYNSYM (isl_union_set_copy); \
|
||||
+ DYNSYM (isl_union_set_empty); \
|
||||
+ DYNSYM (isl_union_set_from_set); \
|
||||
+ DYNSYM (isl_aff_add_constant_val); \
|
||||
+ DYNSYM (isl_aff_get_coefficient_val); \
|
||||
+ DYNSYM (isl_aff_get_ctx); \
|
||||
+ DYNSYM (isl_aff_mod_val); \
|
||||
+ DYNSYM (isl_ast_build_ast_from_schedule); \
|
||||
+ DYNSYM (isl_ast_build_free); \
|
||||
+ DYNSYM (isl_ast_build_from_context); \
|
||||
+ DYNSYM (isl_ast_build_get_ctx); \
|
||||
+ DYNSYM (isl_ast_build_get_schedule); \
|
||||
+ DYNSYM (isl_ast_build_get_schedule_space); \
|
||||
+ DYNSYM (isl_ast_build_set_before_each_for); \
|
||||
+ DYNSYM (isl_ast_build_set_options); \
|
||||
+ DYNSYM (isl_ast_expr_free); \
|
||||
+ DYNSYM (isl_ast_expr_from_val); \
|
||||
+ DYNSYM (isl_ast_expr_get_ctx); \
|
||||
+ DYNSYM (isl_ast_expr_get_id); \
|
||||
+ DYNSYM (isl_ast_expr_get_op_arg); \
|
||||
+ DYNSYM (isl_ast_expr_get_op_n_arg); \
|
||||
+ DYNSYM (isl_ast_expr_get_op_type); \
|
||||
+ DYNSYM (isl_ast_expr_get_type); \
|
||||
+ DYNSYM (isl_ast_expr_get_val); \
|
||||
+ DYNSYM (isl_ast_expr_sub); \
|
||||
+ DYNSYM (isl_ast_node_block_get_children); \
|
||||
+ DYNSYM (isl_ast_node_for_get_body); \
|
||||
+ DYNSYM (isl_ast_node_for_get_cond); \
|
||||
+ DYNSYM (isl_ast_node_for_get_inc); \
|
||||
+ DYNSYM (isl_ast_node_for_get_init); \
|
||||
+ DYNSYM (isl_ast_node_for_get_iterator); \
|
||||
+ DYNSYM (isl_ast_node_free); \
|
||||
+ DYNSYM (isl_ast_node_get_annotation); \
|
||||
+ DYNSYM (isl_ast_node_get_type); \
|
||||
+ DYNSYM (isl_ast_node_if_get_cond); \
|
||||
+ DYNSYM (isl_ast_node_if_get_else); \
|
||||
+ DYNSYM (isl_ast_node_if_get_then); \
|
||||
+ DYNSYM (isl_ast_node_list_free); \
|
||||
+ DYNSYM (isl_ast_node_list_get_ast_node); \
|
||||
+ DYNSYM (isl_ast_node_list_n_ast_node); \
|
||||
+ DYNSYM (isl_ast_node_user_get_expr); \
|
||||
+ DYNSYM (isl_constraint_set_coefficient_val); \
|
||||
+ DYNSYM (isl_constraint_set_constant_val); \
|
||||
+ DYNSYM (isl_id_get_user); \
|
||||
+ DYNSYM (isl_local_space_get_ctx); \
|
||||
+ DYNSYM (isl_map_fix_val); \
|
||||
+ DYNSYM (isl_options_set_ast_build_atomic_upper_bound); \
|
||||
+ DYNSYM (isl_printer_print_ast_node); \
|
||||
+ DYNSYM (isl_printer_print_str); \
|
||||
+ DYNSYM (isl_printer_set_output_format); \
|
||||
+ DYNSYM (isl_pw_aff_mod_val); \
|
||||
+ DYNSYM (isl_schedule_constraints_compute_schedule); \
|
||||
+ DYNSYM (isl_schedule_constraints_on_domain); \
|
||||
+ DYNSYM (isl_schedule_constraints_set_coincidence); \
|
||||
+ DYNSYM (isl_schedule_constraints_set_proximity); \
|
||||
+ DYNSYM (isl_schedule_constraints_set_validity); \
|
||||
+ DYNSYM (isl_set_get_dim_id); \
|
||||
+ DYNSYM (isl_set_max_val); \
|
||||
+ DYNSYM (isl_set_min_val); \
|
||||
+ DYNSYM (isl_set_params); \
|
||||
+ DYNSYM (isl_space_align_params); \
|
||||
+ DYNSYM (isl_space_map_from_domain_and_range); \
|
||||
+ DYNSYM (isl_space_set_tuple_name); \
|
||||
+ DYNSYM (isl_space_wrap); \
|
||||
+ DYNSYM (isl_union_map_from_domain_and_range); \
|
||||
+ DYNSYM (isl_union_map_range); \
|
||||
+ DYNSYM (isl_union_set_union); \
|
||||
+ DYNSYM (isl_union_set_universe); \
|
||||
+ DYNSYM (isl_val_2exp); \
|
||||
+ DYNSYM (isl_val_add_ui); \
|
||||
+ DYNSYM (isl_val_copy); \
|
||||
+ DYNSYM (isl_val_free); \
|
||||
+ DYNSYM (isl_val_int_from_si); \
|
||||
+ DYNSYM (isl_val_int_from_ui); \
|
||||
+ DYNSYM (isl_val_mul); \
|
||||
+ DYNSYM (isl_val_neg); \
|
||||
+ DYNSYM (isl_val_sub); \
|
||||
+ DYNSYM (isl_printer_print_union_map); \
|
||||
+ DYNSYM (isl_pw_aff_get_ctx); \
|
||||
+ DYNSYM (isl_val_is_int); \
|
||||
+ DYNSYM (isl_ctx_get_max_operations); \
|
||||
+ DYNSYM (isl_ctx_set_max_operations); \
|
||||
+ DYNSYM (isl_ctx_last_error); \
|
||||
+ DYNSYM (isl_ctx_reset_operations); \
|
||||
+ DYNSYM (isl_map_coalesce); \
|
||||
+ DYNSYM (isl_printer_print_schedule); \
|
||||
+ DYNSYM (isl_set_set_dim_id); \
|
||||
+ DYNSYM (isl_union_map_coalesce); \
|
||||
+ DYNSYM (isl_multi_val_set_val); \
|
||||
+ DYNSYM (isl_multi_val_zero); \
|
||||
+ DYNSYM (isl_options_set_schedule_max_coefficient); \
|
||||
+ DYNSYM (isl_options_set_tile_scale_tile_loops); \
|
||||
+ DYNSYM (isl_schedule_copy); \
|
||||
+ DYNSYM (isl_schedule_get_map); \
|
||||
+ DYNSYM (isl_schedule_map_schedule_node_bottom_up); \
|
||||
+ DYNSYM (isl_schedule_node_band_get_permutable); \
|
||||
+ DYNSYM (isl_schedule_node_band_get_space); \
|
||||
+ DYNSYM (isl_schedule_node_band_tile); \
|
||||
+ DYNSYM (isl_schedule_node_child); \
|
||||
+ DYNSYM (isl_schedule_node_free); \
|
||||
+ DYNSYM (isl_schedule_node_get_child); \
|
||||
+ DYNSYM (isl_schedule_node_get_ctx); \
|
||||
+ DYNSYM (isl_schedule_node_get_type); \
|
||||
+ DYNSYM (isl_schedule_node_n_children); \
|
||||
+ DYNSYM (isl_union_map_is_equal); \
|
||||
+ DYNSYM (isl_union_access_info_compute_flow); \
|
||||
+ DYNSYM (isl_union_access_info_from_sink); \
|
||||
+ DYNSYM (isl_union_access_info_set_may_source); \
|
||||
+ DYNSYM (isl_union_access_info_set_must_source); \
|
||||
+ DYNSYM (isl_union_access_info_set_schedule); \
|
||||
+ DYNSYM (isl_union_flow_free); \
|
||||
+ DYNSYM (isl_union_flow_get_may_dependence); \
|
||||
+ DYNSYM (isl_union_flow_get_must_dependence); \
|
||||
+ DYNSYM (isl_aff_var_on_domain); \
|
||||
+ DYNSYM (isl_multi_aff_from_aff); \
|
||||
+ DYNSYM (isl_schedule_get_ctx); \
|
||||
+ DYNSYM (isl_multi_aff_set_tuple_id); \
|
||||
+ DYNSYM (isl_multi_aff_dim); \
|
||||
+ DYNSYM (isl_schedule_get_domain); \
|
||||
+ DYNSYM (isl_union_set_is_empty); \
|
||||
+ DYNSYM (isl_union_set_get_space); \
|
||||
+ DYNSYM (isl_union_pw_multi_aff_empty); \
|
||||
+ DYNSYM (isl_union_set_foreach_set); \
|
||||
+ DYNSYM (isl_union_set_free); \
|
||||
+ DYNSYM (isl_multi_union_pw_aff_from_union_pw_multi_aff); \
|
||||
+ DYNSYM (isl_multi_union_pw_aff_apply_multi_aff); \
|
||||
+ DYNSYM (isl_schedule_insert_partial_schedule); \
|
||||
+ DYNSYM (isl_union_pw_multi_aff_free); \
|
||||
+ DYNSYM (isl_pw_multi_aff_project_out_map); \
|
||||
+ DYNSYM (isl_union_pw_multi_aff_add_pw_multi_aff); \
|
||||
+ DYNSYM (isl_schedule_from_domain); \
|
||||
+ DYNSYM (isl_schedule_sequence); \
|
||||
+ DYNSYM (isl_ast_build_node_from_schedule); \
|
||||
+ DYNSYM (isl_ast_node_mark_get_node); \
|
||||
+ DYNSYM (isl_schedule_node_band_member_get_ast_loop_type); \
|
||||
+ DYNSYM (isl_schedule_node_band_member_set_ast_loop_type); \
|
||||
+ DYNSYM (isl_val_n_abs_num_chunks); \
|
||||
+ DYNSYM (isl_val_get_abs_num_chunks); \
|
||||
+ DYNSYM (isl_val_int_from_chunks); \
|
||||
+ DYNSYM (isl_val_is_neg); \
|
||||
+ DYNSYM (isl_version); \
|
||||
+ DYNSYM (isl_options_get_on_error); \
|
||||
+ DYNSYM (isl_ctx_reset_error);
|
||||
+
|
||||
+extern struct isl_pointers_s__
|
||||
+{
|
||||
+ bool inited;
|
||||
+ void *h;
|
||||
+#define DYNSYM(x) __typeof (x) *p_##x
|
||||
+ DYNSYMS
|
||||
+#undef DYNSYM
|
||||
+} isl_pointers__;
|
||||
+
|
||||
+#define isl_aff_add_coefficient_si (*isl_pointers__.p_isl_aff_add_coefficient_si)
|
||||
+#define isl_aff_free (*isl_pointers__.p_isl_aff_free)
|
||||
+#define isl_aff_get_space (*isl_pointers__.p_isl_aff_get_space)
|
||||
+#define isl_aff_set_coefficient_si (*isl_pointers__.p_isl_aff_set_coefficient_si)
|
||||
+#define isl_aff_set_constant_si (*isl_pointers__.p_isl_aff_set_constant_si)
|
||||
+#define isl_aff_zero_on_domain (*isl_pointers__.p_isl_aff_zero_on_domain)
|
||||
+#define isl_band_free (*isl_pointers__.p_isl_band_free)
|
||||
+#define isl_band_get_children (*isl_pointers__.p_isl_band_get_children)
|
||||
+#define isl_band_get_partial_schedule (*isl_pointers__.p_isl_band_get_partial_schedule)
|
||||
+#define isl_band_has_children (*isl_pointers__.p_isl_band_has_children)
|
||||
+#define isl_band_list_free (*isl_pointers__.p_isl_band_list_free)
|
||||
+#define isl_band_list_get_band (*isl_pointers__.p_isl_band_list_get_band)
|
||||
+#define isl_band_list_get_ctx (*isl_pointers__.p_isl_band_list_get_ctx)
|
||||
+#define isl_band_list_n_band (*isl_pointers__.p_isl_band_list_n_band)
|
||||
+#define isl_band_n_member (*isl_pointers__.p_isl_band_n_member)
|
||||
+#define isl_basic_map_add_constraint (*isl_pointers__.p_isl_basic_map_add_constraint)
|
||||
+#define isl_basic_map_project_out (*isl_pointers__.p_isl_basic_map_project_out)
|
||||
+#define isl_basic_map_universe (*isl_pointers__.p_isl_basic_map_universe)
|
||||
+#define isl_constraint_set_coefficient_si (*isl_pointers__.p_isl_constraint_set_coefficient_si)
|
||||
+#define isl_constraint_set_constant_si (*isl_pointers__.p_isl_constraint_set_constant_si)
|
||||
+#define isl_ctx_alloc (*isl_pointers__.p_isl_ctx_alloc)
|
||||
+#define isl_ctx_free (*isl_pointers__.p_isl_ctx_free)
|
||||
+#define isl_equality_alloc (*isl_pointers__.p_isl_equality_alloc)
|
||||
+#define isl_id_alloc (*isl_pointers__.p_isl_id_alloc)
|
||||
+#define isl_id_copy (*isl_pointers__.p_isl_id_copy)
|
||||
+#define isl_id_free (*isl_pointers__.p_isl_id_free)
|
||||
+#define isl_inequality_alloc (*isl_pointers__.p_isl_inequality_alloc)
|
||||
+#define isl_local_space_copy (*isl_pointers__.p_isl_local_space_copy)
|
||||
+#define isl_local_space_free (*isl_pointers__.p_isl_local_space_free)
|
||||
+#define isl_local_space_from_space (*isl_pointers__.p_isl_local_space_from_space)
|
||||
+#define isl_local_space_range (*isl_pointers__.p_isl_local_space_range)
|
||||
+#define isl_map_add_constraint (*isl_pointers__.p_isl_map_add_constraint)
|
||||
+#define isl_map_add_dims (*isl_pointers__.p_isl_map_add_dims)
|
||||
+#define isl_map_align_params (*isl_pointers__.p_isl_map_align_params)
|
||||
+#define isl_map_apply_range (*isl_pointers__.p_isl_map_apply_range)
|
||||
+#define isl_map_copy (*isl_pointers__.p_isl_map_copy)
|
||||
+#define isl_map_dim (*isl_pointers__.p_isl_map_dim)
|
||||
+#define isl_map_dump (*isl_pointers__.p_isl_map_dump)
|
||||
+#define isl_map_equate (*isl_pointers__.p_isl_map_equate)
|
||||
+#define isl_map_fix_si (*isl_pointers__.p_isl_map_fix_si)
|
||||
+#define isl_map_flat_product (*isl_pointers__.p_isl_map_flat_product)
|
||||
+#define isl_map_flat_range_product (*isl_pointers__.p_isl_map_flat_range_product)
|
||||
+#define isl_map_free (*isl_pointers__.p_isl_map_free)
|
||||
+#define isl_map_from_basic_map (*isl_pointers__.p_isl_map_from_basic_map)
|
||||
+#define isl_map_from_pw_aff (*isl_pointers__.p_isl_map_from_pw_aff)
|
||||
+#define isl_map_from_union_map (*isl_pointers__.p_isl_map_from_union_map)
|
||||
+#define isl_map_get_ctx (*isl_pointers__.p_isl_map_get_ctx)
|
||||
+#define isl_map_get_space (*isl_pointers__.p_isl_map_get_space)
|
||||
+#define isl_map_get_tuple_id (*isl_pointers__.p_isl_map_get_tuple_id)
|
||||
+#define isl_map_insert_dims (*isl_pointers__.p_isl_map_insert_dims)
|
||||
+#define isl_map_intersect (*isl_pointers__.p_isl_map_intersect)
|
||||
+#define isl_map_intersect_domain (*isl_pointers__.p_isl_map_intersect_domain)
|
||||
+#define isl_map_intersect_range (*isl_pointers__.p_isl_map_intersect_range)
|
||||
+#define isl_map_is_empty (*isl_pointers__.p_isl_map_is_empty)
|
||||
+#define isl_map_lex_ge (*isl_pointers__.p_isl_map_lex_ge)
|
||||
+#define isl_map_lex_le (*isl_pointers__.p_isl_map_lex_le)
|
||||
+#define isl_map_n_out (*isl_pointers__.p_isl_map_n_out)
|
||||
+#define isl_map_range (*isl_pointers__.p_isl_map_range)
|
||||
+#define isl_map_set_tuple_id (*isl_pointers__.p_isl_map_set_tuple_id)
|
||||
+#define isl_map_universe (*isl_pointers__.p_isl_map_universe)
|
||||
+#define isl_options_set_on_error (*isl_pointers__.p_isl_options_set_on_error)
|
||||
+#define isl_options_set_schedule_serialize_sccs (*isl_pointers__.p_isl_options_set_schedule_serialize_sccs)
|
||||
+#define isl_printer_set_yaml_style (*isl_pointers__.p_isl_printer_set_yaml_style)
|
||||
+#define isl_options_set_schedule_max_constant_term (*isl_pointers__.p_isl_options_set_schedule_max_constant_term)
|
||||
+#define isl_options_set_schedule_maximize_band_depth (*isl_pointers__.p_isl_options_set_schedule_maximize_band_depth)
|
||||
+#define isl_printer_free (*isl_pointers__.p_isl_printer_free)
|
||||
+#define isl_printer_print_aff (*isl_pointers__.p_isl_printer_print_aff)
|
||||
+#define isl_printer_print_constraint (*isl_pointers__.p_isl_printer_print_constraint)
|
||||
+#define isl_printer_print_map (*isl_pointers__.p_isl_printer_print_map)
|
||||
+#define isl_printer_print_set (*isl_pointers__.p_isl_printer_print_set)
|
||||
+#define isl_printer_to_file (*isl_pointers__.p_isl_printer_to_file)
|
||||
+#define isl_pw_aff_add (*isl_pointers__.p_isl_pw_aff_add)
|
||||
+#define isl_pw_aff_alloc (*isl_pointers__.p_isl_pw_aff_alloc)
|
||||
+#define isl_pw_aff_copy (*isl_pointers__.p_isl_pw_aff_copy)
|
||||
+#define isl_pw_aff_eq_set (*isl_pointers__.p_isl_pw_aff_eq_set)
|
||||
+#define isl_pw_aff_free (*isl_pointers__.p_isl_pw_aff_free)
|
||||
+#define isl_pw_aff_from_aff (*isl_pointers__.p_isl_pw_aff_from_aff)
|
||||
+#define isl_pw_aff_ge_set (*isl_pointers__.p_isl_pw_aff_ge_set)
|
||||
+#define isl_pw_aff_gt_set (*isl_pointers__.p_isl_pw_aff_gt_set)
|
||||
+#define isl_pw_aff_is_cst (*isl_pointers__.p_isl_pw_aff_is_cst)
|
||||
+#define isl_pw_aff_le_set (*isl_pointers__.p_isl_pw_aff_le_set)
|
||||
+#define isl_pw_aff_lt_set (*isl_pointers__.p_isl_pw_aff_lt_set)
|
||||
+#define isl_pw_aff_mul (*isl_pointers__.p_isl_pw_aff_mul)
|
||||
+#define isl_pw_aff_ne_set (*isl_pointers__.p_isl_pw_aff_ne_set)
|
||||
+#define isl_pw_aff_nonneg_set (*isl_pointers__.p_isl_pw_aff_nonneg_set)
|
||||
+#define isl_pw_aff_set_tuple_id (*isl_pointers__.p_isl_pw_aff_set_tuple_id)
|
||||
+#define isl_pw_aff_sub (*isl_pointers__.p_isl_pw_aff_sub)
|
||||
+#define isl_pw_aff_zero_set (*isl_pointers__.p_isl_pw_aff_zero_set)
|
||||
+#define isl_schedule_free (*isl_pointers__.p_isl_schedule_free)
|
||||
+#define isl_schedule_get_band_forest (*isl_pointers__.p_isl_schedule_get_band_forest)
|
||||
+#define isl_set_add_constraint (*isl_pointers__.p_isl_set_add_constraint)
|
||||
+#define isl_set_add_dims (*isl_pointers__.p_isl_set_add_dims)
|
||||
+#define isl_set_apply (*isl_pointers__.p_isl_set_apply)
|
||||
+#define isl_set_coalesce (*isl_pointers__.p_isl_set_coalesce)
|
||||
+#define isl_set_copy (*isl_pointers__.p_isl_set_copy)
|
||||
+#define isl_set_dim (*isl_pointers__.p_isl_set_dim)
|
||||
+#define isl_set_fix_si (*isl_pointers__.p_isl_set_fix_si)
|
||||
+#define isl_set_free (*isl_pointers__.p_isl_set_free)
|
||||
+#define isl_set_get_space (*isl_pointers__.p_isl_set_get_space)
|
||||
+#define isl_set_get_tuple_id (*isl_pointers__.p_isl_set_get_tuple_id)
|
||||
+#define isl_set_intersect (*isl_pointers__.p_isl_set_intersect)
|
||||
+#define isl_set_is_empty (*isl_pointers__.p_isl_set_is_empty)
|
||||
+#define isl_set_n_dim (*isl_pointers__.p_isl_set_n_dim)
|
||||
+#define isl_set_nat_universe (*isl_pointers__.p_isl_set_nat_universe)
|
||||
+#define isl_set_project_out (*isl_pointers__.p_isl_set_project_out)
|
||||
+#define isl_set_set_tuple_id (*isl_pointers__.p_isl_set_set_tuple_id)
|
||||
+#define isl_set_universe (*isl_pointers__.p_isl_set_universe)
|
||||
+#define isl_space_add_dims (*isl_pointers__.p_isl_space_add_dims)
|
||||
+#define isl_space_alloc (*isl_pointers__.p_isl_space_alloc)
|
||||
+#define isl_space_copy (*isl_pointers__.p_isl_space_copy)
|
||||
+#define isl_space_dim (*isl_pointers__.p_isl_space_dim)
|
||||
+#define isl_space_domain (*isl_pointers__.p_isl_space_domain)
|
||||
+#define isl_space_find_dim_by_id (*isl_pointers__.p_isl_space_find_dim_by_id)
|
||||
+#define isl_space_free (*isl_pointers__.p_isl_space_free)
|
||||
+#define isl_space_from_domain (*isl_pointers__.p_isl_space_from_domain)
|
||||
+#define isl_space_get_tuple_id (*isl_pointers__.p_isl_space_get_tuple_id)
|
||||
+#define isl_space_params_alloc (*isl_pointers__.p_isl_space_params_alloc)
|
||||
+#define isl_space_range (*isl_pointers__.p_isl_space_range)
|
||||
+#define isl_space_set_alloc (*isl_pointers__.p_isl_space_set_alloc)
|
||||
+#define isl_space_set_dim_id (*isl_pointers__.p_isl_space_set_dim_id)
|
||||
+#define isl_space_set_tuple_id (*isl_pointers__.p_isl_space_set_tuple_id)
|
||||
+#define isl_union_map_add_map (*isl_pointers__.p_isl_union_map_add_map)
|
||||
+#define isl_union_map_align_params (*isl_pointers__.p_isl_union_map_align_params)
|
||||
+#define isl_union_map_apply_domain (*isl_pointers__.p_isl_union_map_apply_domain)
|
||||
+#define isl_union_map_apply_range (*isl_pointers__.p_isl_union_map_apply_range)
|
||||
+#define isl_union_map_compute_flow (*isl_pointers__.p_isl_union_map_compute_flow)
|
||||
+#define isl_union_map_copy (*isl_pointers__.p_isl_union_map_copy)
|
||||
+#define isl_union_map_empty (*isl_pointers__.p_isl_union_map_empty)
|
||||
+#define isl_union_map_flat_range_product (*isl_pointers__.p_isl_union_map_flat_range_product)
|
||||
+#define isl_union_map_foreach_map (*isl_pointers__.p_isl_union_map_foreach_map)
|
||||
+#define isl_union_map_free (*isl_pointers__.p_isl_union_map_free)
|
||||
+#define isl_union_map_from_map (*isl_pointers__.p_isl_union_map_from_map)
|
||||
+#define isl_union_map_get_ctx (*isl_pointers__.p_isl_union_map_get_ctx)
|
||||
+#define isl_union_map_get_space (*isl_pointers__.p_isl_union_map_get_space)
|
||||
+#define isl_union_map_gist_domain (*isl_pointers__.p_isl_union_map_gist_domain)
|
||||
+#define isl_union_map_gist_range (*isl_pointers__.p_isl_union_map_gist_range)
|
||||
+#define isl_union_map_intersect_domain (*isl_pointers__.p_isl_union_map_intersect_domain)
|
||||
+#define isl_union_map_is_empty (*isl_pointers__.p_isl_union_map_is_empty)
|
||||
+#define isl_union_map_subtract (*isl_pointers__.p_isl_union_map_subtract)
|
||||
+#define isl_union_map_union (*isl_pointers__.p_isl_union_map_union)
|
||||
+#define isl_union_set_add_set (*isl_pointers__.p_isl_union_set_add_set)
|
||||
+#define isl_union_set_compute_schedule (*isl_pointers__.p_isl_union_set_compute_schedule)
|
||||
+#define isl_union_set_copy (*isl_pointers__.p_isl_union_set_copy)
|
||||
+#define isl_union_set_empty (*isl_pointers__.p_isl_union_set_empty)
|
||||
+#define isl_union_set_from_set (*isl_pointers__.p_isl_union_set_from_set)
|
||||
+#define isl_aff_add_constant_val (*isl_pointers__.p_isl_aff_add_constant_val)
|
||||
+#define isl_aff_get_coefficient_val (*isl_pointers__.p_isl_aff_get_coefficient_val)
|
||||
+#define isl_aff_get_ctx (*isl_pointers__.p_isl_aff_get_ctx)
|
||||
+#define isl_aff_mod_val (*isl_pointers__.p_isl_aff_mod_val)
|
||||
+#define isl_ast_build_ast_from_schedule (*isl_pointers__.p_isl_ast_build_ast_from_schedule)
|
||||
+#define isl_ast_build_free (*isl_pointers__.p_isl_ast_build_free)
|
||||
+#define isl_ast_build_from_context (*isl_pointers__.p_isl_ast_build_from_context)
|
||||
+#define isl_ast_build_get_ctx (*isl_pointers__.p_isl_ast_build_get_ctx)
|
||||
+#define isl_ast_build_get_schedule (*isl_pointers__.p_isl_ast_build_get_schedule)
|
||||
+#define isl_ast_build_get_schedule_space (*isl_pointers__.p_isl_ast_build_get_schedule_space)
|
||||
+#define isl_ast_build_set_before_each_for (*isl_pointers__.p_isl_ast_build_set_before_each_for)
|
||||
+#define isl_ast_build_set_options (*isl_pointers__.p_isl_ast_build_set_options)
|
||||
+#define isl_ast_expr_free (*isl_pointers__.p_isl_ast_expr_free)
|
||||
+#define isl_ast_expr_from_val (*isl_pointers__.p_isl_ast_expr_from_val)
|
||||
+#define isl_ast_expr_get_ctx (*isl_pointers__.p_isl_ast_expr_get_ctx)
|
||||
+#define isl_ast_expr_get_id (*isl_pointers__.p_isl_ast_expr_get_id)
|
||||
+#define isl_ast_expr_get_op_arg (*isl_pointers__.p_isl_ast_expr_get_op_arg)
|
||||
+#define isl_ast_expr_get_op_n_arg (*isl_pointers__.p_isl_ast_expr_get_op_n_arg)
|
||||
+#define isl_ast_expr_get_op_type (*isl_pointers__.p_isl_ast_expr_get_op_type)
|
||||
+#define isl_ast_expr_get_type (*isl_pointers__.p_isl_ast_expr_get_type)
|
||||
+#define isl_ast_expr_get_val (*isl_pointers__.p_isl_ast_expr_get_val)
|
||||
+#define isl_ast_expr_sub (*isl_pointers__.p_isl_ast_expr_sub)
|
||||
+#define isl_ast_node_block_get_children (*isl_pointers__.p_isl_ast_node_block_get_children)
|
||||
+#define isl_ast_node_for_get_body (*isl_pointers__.p_isl_ast_node_for_get_body)
|
||||
+#define isl_ast_node_for_get_cond (*isl_pointers__.p_isl_ast_node_for_get_cond)
|
||||
+#define isl_ast_node_for_get_inc (*isl_pointers__.p_isl_ast_node_for_get_inc)
|
||||
+#define isl_ast_node_for_get_init (*isl_pointers__.p_isl_ast_node_for_get_init)
|
||||
+#define isl_ast_node_for_get_iterator (*isl_pointers__.p_isl_ast_node_for_get_iterator)
|
||||
+#define isl_ast_node_free (*isl_pointers__.p_isl_ast_node_free)
|
||||
+#define isl_ast_node_get_annotation (*isl_pointers__.p_isl_ast_node_get_annotation)
|
||||
+#define isl_ast_node_get_type (*isl_pointers__.p_isl_ast_node_get_type)
|
||||
+#define isl_ast_node_if_get_cond (*isl_pointers__.p_isl_ast_node_if_get_cond)
|
||||
+#define isl_ast_node_if_get_else (*isl_pointers__.p_isl_ast_node_if_get_else)
|
||||
+#define isl_ast_node_if_get_then (*isl_pointers__.p_isl_ast_node_if_get_then)
|
||||
+#define isl_ast_node_list_free (*isl_pointers__.p_isl_ast_node_list_free)
|
||||
+#define isl_ast_node_list_get_ast_node (*isl_pointers__.p_isl_ast_node_list_get_ast_node)
|
||||
+#define isl_ast_node_list_n_ast_node (*isl_pointers__.p_isl_ast_node_list_n_ast_node)
|
||||
+#define isl_ast_node_user_get_expr (*isl_pointers__.p_isl_ast_node_user_get_expr)
|
||||
+#define isl_constraint_set_coefficient_val (*isl_pointers__.p_isl_constraint_set_coefficient_val)
|
||||
+#define isl_constraint_set_constant_val (*isl_pointers__.p_isl_constraint_set_constant_val)
|
||||
+#define isl_id_get_user (*isl_pointers__.p_isl_id_get_user)
|
||||
+#define isl_local_space_get_ctx (*isl_pointers__.p_isl_local_space_get_ctx)
|
||||
+#define isl_map_fix_val (*isl_pointers__.p_isl_map_fix_val)
|
||||
+#define isl_options_set_ast_build_atomic_upper_bound (*isl_pointers__.p_isl_options_set_ast_build_atomic_upper_bound)
|
||||
+#define isl_printer_print_ast_node (*isl_pointers__.p_isl_printer_print_ast_node)
|
||||
+#define isl_printer_print_str (*isl_pointers__.p_isl_printer_print_str)
|
||||
+#define isl_printer_set_output_format (*isl_pointers__.p_isl_printer_set_output_format)
|
||||
+#define isl_pw_aff_mod_val (*isl_pointers__.p_isl_pw_aff_mod_val)
|
||||
+#define isl_schedule_constraints_compute_schedule (*isl_pointers__.p_isl_schedule_constraints_compute_schedule)
|
||||
+#define isl_schedule_constraints_on_domain (*isl_pointers__.p_isl_schedule_constraints_on_domain)
|
||||
+#define isl_schedule_constraints_set_coincidence (*isl_pointers__.p_isl_schedule_constraints_set_coincidence)
|
||||
+#define isl_schedule_constraints_set_proximity (*isl_pointers__.p_isl_schedule_constraints_set_proximity)
|
||||
+#define isl_schedule_constraints_set_validity (*isl_pointers__.p_isl_schedule_constraints_set_validity)
|
||||
+#define isl_set_get_dim_id (*isl_pointers__.p_isl_set_get_dim_id)
|
||||
+#define isl_set_max_val (*isl_pointers__.p_isl_set_max_val)
|
||||
+#define isl_set_min_val (*isl_pointers__.p_isl_set_min_val)
|
||||
+#define isl_set_params (*isl_pointers__.p_isl_set_params)
|
||||
+#define isl_space_align_params (*isl_pointers__.p_isl_space_align_params)
|
||||
+#define isl_space_map_from_domain_and_range (*isl_pointers__.p_isl_space_map_from_domain_and_range)
|
||||
+#define isl_space_set_tuple_name (*isl_pointers__.p_isl_space_set_tuple_name)
|
||||
+#define isl_space_wrap (*isl_pointers__.p_isl_space_wrap)
|
||||
+#define isl_union_map_from_domain_and_range (*isl_pointers__.p_isl_union_map_from_domain_and_range)
|
||||
+#define isl_union_map_range (*isl_pointers__.p_isl_union_map_range)
|
||||
+#define isl_union_set_union (*isl_pointers__.p_isl_union_set_union)
|
||||
+#define isl_union_set_universe (*isl_pointers__.p_isl_union_set_universe)
|
||||
+#define isl_val_2exp (*isl_pointers__.p_isl_val_2exp)
|
||||
+#define isl_val_add_ui (*isl_pointers__.p_isl_val_add_ui)
|
||||
+#define isl_val_copy (*isl_pointers__.p_isl_val_copy)
|
||||
+#define isl_val_free (*isl_pointers__.p_isl_val_free)
|
||||
+#define isl_val_int_from_si (*isl_pointers__.p_isl_val_int_from_si)
|
||||
+#define isl_val_int_from_ui (*isl_pointers__.p_isl_val_int_from_ui)
|
||||
+#define isl_val_mul (*isl_pointers__.p_isl_val_mul)
|
||||
+#define isl_val_neg (*isl_pointers__.p_isl_val_neg)
|
||||
+#define isl_val_sub (*isl_pointers__.p_isl_val_sub)
|
||||
+#define isl_printer_print_union_map (*isl_pointers__.p_isl_printer_print_union_map)
|
||||
+#define isl_pw_aff_get_ctx (*isl_pointers__.p_isl_pw_aff_get_ctx)
|
||||
+#define isl_val_is_int (*isl_pointers__.p_isl_val_is_int)
|
||||
+#define isl_ctx_get_max_operations (*isl_pointers__.p_isl_ctx_get_max_operations)
|
||||
+#define isl_ctx_set_max_operations (*isl_pointers__.p_isl_ctx_set_max_operations)
|
||||
+#define isl_ctx_last_error (*isl_pointers__.p_isl_ctx_last_error)
|
||||
+#define isl_ctx_reset_operations (*isl_pointers__.p_isl_ctx_reset_operations)
|
||||
+#define isl_map_coalesce (*isl_pointers__.p_isl_map_coalesce)
|
||||
+#define isl_printer_print_schedule (*isl_pointers__.p_isl_printer_print_schedule)
|
||||
+#define isl_set_set_dim_id (*isl_pointers__.p_isl_set_set_dim_id)
|
||||
+#define isl_union_map_coalesce (*isl_pointers__.p_isl_union_map_coalesce)
|
||||
+#define isl_multi_val_set_val (*isl_pointers__.p_isl_multi_val_set_val)
|
||||
+#define isl_multi_val_zero (*isl_pointers__.p_isl_multi_val_zero)
|
||||
+#define isl_options_set_schedule_max_coefficient (*isl_pointers__.p_isl_options_set_schedule_max_coefficient)
|
||||
+#define isl_options_set_tile_scale_tile_loops (*isl_pointers__.p_isl_options_set_tile_scale_tile_loops)
|
||||
+#define isl_schedule_copy (*isl_pointers__.p_isl_schedule_copy)
|
||||
+#define isl_schedule_get_map (*isl_pointers__.p_isl_schedule_get_map)
|
||||
+#define isl_schedule_map_schedule_node_bottom_up (*isl_pointers__.p_isl_schedule_map_schedule_node_bottom_up)
|
||||
+#define isl_schedule_node_band_get_permutable (*isl_pointers__.p_isl_schedule_node_band_get_permutable)
|
||||
+#define isl_schedule_node_band_get_space (*isl_pointers__.p_isl_schedule_node_band_get_space)
|
||||
+#define isl_schedule_node_band_tile (*isl_pointers__.p_isl_schedule_node_band_tile)
|
||||
+#define isl_schedule_node_child (*isl_pointers__.p_isl_schedule_node_child)
|
||||
+#define isl_schedule_node_free (*isl_pointers__.p_isl_schedule_node_free)
|
||||
+#define isl_schedule_node_get_child (*isl_pointers__.p_isl_schedule_node_get_child)
|
||||
+#define isl_schedule_node_get_ctx (*isl_pointers__.p_isl_schedule_node_get_ctx)
|
||||
+#define isl_schedule_node_get_type (*isl_pointers__.p_isl_schedule_node_get_type)
|
||||
+#define isl_schedule_node_n_children (*isl_pointers__.p_isl_schedule_node_n_children)
|
||||
+#define isl_union_map_is_equal (*isl_pointers__.p_isl_union_map_is_equal)
|
||||
+#define isl_union_access_info_compute_flow (*isl_pointers__.p_isl_union_access_info_compute_flow)
|
||||
+#define isl_union_access_info_from_sink (*isl_pointers__.p_isl_union_access_info_from_sink)
|
||||
+#define isl_union_access_info_set_may_source (*isl_pointers__.p_isl_union_access_info_set_may_source)
|
||||
+#define isl_union_access_info_set_must_source (*isl_pointers__.p_isl_union_access_info_set_must_source)
|
||||
+#define isl_union_access_info_set_schedule (*isl_pointers__.p_isl_union_access_info_set_schedule)
|
||||
+#define isl_union_flow_free (*isl_pointers__.p_isl_union_flow_free)
|
||||
+#define isl_union_flow_get_may_dependence (*isl_pointers__.p_isl_union_flow_get_may_dependence)
|
||||
+#define isl_union_flow_get_must_dependence (*isl_pointers__.p_isl_union_flow_get_must_dependence)
|
||||
+#define isl_aff_var_on_domain (*isl_pointers__.p_isl_aff_var_on_domain)
|
||||
+#define isl_multi_aff_from_aff (*isl_pointers__.p_isl_multi_aff_from_aff)
|
||||
+#define isl_schedule_get_ctx (*isl_pointers__.p_isl_schedule_get_ctx)
|
||||
+#define isl_multi_aff_set_tuple_id (*isl_pointers__.p_isl_multi_aff_set_tuple_id)
|
||||
+#define isl_multi_aff_dim (*isl_pointers__.p_isl_multi_aff_dim)
|
||||
+#define isl_schedule_get_domain (*isl_pointers__.p_isl_schedule_get_domain)
|
||||
+#define isl_union_set_is_empty (*isl_pointers__.p_isl_union_set_is_empty)
|
||||
+#define isl_union_set_get_space (*isl_pointers__.p_isl_union_set_get_space)
|
||||
+#define isl_union_pw_multi_aff_empty (*isl_pointers__.p_isl_union_pw_multi_aff_empty)
|
||||
+#define isl_union_set_foreach_set (*isl_pointers__.p_isl_union_set_foreach_set)
|
||||
+#define isl_union_set_free (*isl_pointers__.p_isl_union_set_free)
|
||||
+#define isl_multi_union_pw_aff_from_union_pw_multi_aff (*isl_pointers__.p_isl_multi_union_pw_aff_from_union_pw_multi_aff)
|
||||
+#define isl_multi_union_pw_aff_apply_multi_aff (*isl_pointers__.p_isl_multi_union_pw_aff_apply_multi_aff)
|
||||
+#define isl_schedule_insert_partial_schedule (*isl_pointers__.p_isl_schedule_insert_partial_schedule)
|
||||
+#define isl_union_pw_multi_aff_free (*isl_pointers__.p_isl_union_pw_multi_aff_free)
|
||||
+#define isl_pw_multi_aff_project_out_map (*isl_pointers__.p_isl_pw_multi_aff_project_out_map)
|
||||
+#define isl_union_pw_multi_aff_add_pw_multi_aff (*isl_pointers__.p_isl_union_pw_multi_aff_add_pw_multi_aff)
|
||||
+#define isl_schedule_from_domain (*isl_pointers__.p_isl_schedule_from_domain)
|
||||
+#define isl_schedule_sequence (*isl_pointers__.p_isl_schedule_sequence)
|
||||
+#define isl_ast_build_node_from_schedule (*isl_pointers__.p_isl_ast_build_node_from_schedule)
|
||||
+#define isl_ast_node_mark_get_node (*isl_pointers__.p_isl_ast_node_mark_get_node)
|
||||
+#define isl_schedule_node_band_member_get_ast_loop_type (*isl_pointers__.p_isl_schedule_node_band_member_get_ast_loop_type)
|
||||
+#define isl_schedule_node_band_member_set_ast_loop_type (*isl_pointers__.p_isl_schedule_node_band_member_set_ast_loop_type)
|
||||
+#define isl_val_n_abs_num_chunks (*isl_pointers__.p_isl_val_n_abs_num_chunks)
|
||||
+#define isl_val_get_abs_num_chunks (*isl_pointers__.p_isl_val_get_abs_num_chunks)
|
||||
+#define isl_val_int_from_chunks (*isl_pointers__.p_isl_val_int_from_chunks)
|
||||
+#define isl_val_is_neg (*isl_pointers__.p_isl_val_is_neg)
|
||||
+#define isl_version (*isl_pointers__.p_isl_version)
|
||||
+#define isl_options_get_on_error (*isl_pointers__.p_isl_options_get_on_error)
|
||||
+#define isl_ctx_reset_error (*isl_pointers__.p_isl_ctx_reset_error)
|
||||
|
||||
typedef struct poly_dr *poly_dr_p;
|
||||
|
||||
@@ -461,5 +1045,6 @@ extern void build_scops (vec<scop_p> *);
|
||||
extern void dot_all_sese (FILE *, vec<sese_l> &);
|
||||
extern void dot_sese (sese_l &);
|
||||
extern void dot_cfg ();
|
||||
+extern const char *get_isl_version (bool);
|
||||
|
||||
#endif
|
||||
--- gcc/graphite.c.jj 2015-11-04 14:15:32.000000000 +0100
|
||||
+++ gcc/graphite.c 2015-11-04 14:56:02.645536409 +0100
|
||||
@@ -60,6 +60,35 @@ along with GCC; see the file COPYING3.
|
||||
#include "tree-into-ssa.h"
|
||||
#include "graphite.h"
|
||||
|
||||
+__typeof (isl_pointers__) isl_pointers__;
|
||||
+
|
||||
+static bool
|
||||
+init_isl_pointers (void)
|
||||
+{
|
||||
+ void *h;
|
||||
+
|
||||
+ if (isl_pointers__.inited)
|
||||
+ return isl_pointers__.h != NULL;
|
||||
+ h = dlopen ("libisl.so.15", RTLD_LAZY);
|
||||
+ isl_pointers__.h = h;
|
||||
+ if (h == NULL)
|
||||
+ return false;
|
||||
+#define DYNSYM(x) \
|
||||
+ do \
|
||||
+ { \
|
||||
+ union { __typeof (isl_pointers__.p_##x) p; void *q; } u; \
|
||||
+ u.q = dlsym (h, #x); \
|
||||
+ if (u.q == NULL) \
|
||||
+ return false; \
|
||||
+ isl_pointers__.p_##x = u.p; \
|
||||
+ } \
|
||||
+ while (0)
|
||||
+ DYNSYMS
|
||||
+#undef DYNSYM
|
||||
+ isl_pointers__.inited = true;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
/* Print global statistics to FILE. */
|
||||
|
||||
static void
|
||||
@@ -365,6 +394,15 @@ graphite_transform_loops (void)
|
||||
if (parallelized_function_p (cfun->decl))
|
||||
return;
|
||||
|
||||
+ if (number_of_loops (cfun) <= 1)
|
||||
+ return;
|
||||
+
|
||||
+ if (!init_isl_pointers ())
|
||||
+ {
|
||||
+ sorry ("Graphite loop optimizations cannot be used");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
|
||||
/* We rely on post-dominators during merging of SESE regions so those
|
||||
@@ -455,6 +493,14 @@ graphite_transform_loops (void)
|
||||
}
|
||||
}
|
||||
|
||||
+const char *
|
||||
+get_isl_version (bool force)
|
||||
+{
|
||||
+ if (force)
|
||||
+ init_isl_pointers ();
|
||||
+ return (isl_pointers__.inited && isl_version) ? isl_version () : "none";
|
||||
+}
|
||||
+
|
||||
#else /* If isl is not available: #ifndef HAVE_isl. */
|
||||
|
||||
static void
|
||||
--- gcc/toplev.c.jj 2017-02-19 13:02:31.000000000 +0100
|
||||
+++ gcc/toplev.c 2017-02-19 16:50:25.536301350 +0100
|
||||
@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3.
|
||||
|
||||
#ifdef HAVE_isl
|
||||
#include <isl/version.h>
|
||||
+extern const char *get_isl_version (bool);
|
||||
#endif
|
||||
|
||||
static void general_init (const char *, bool);
|
||||
@@ -683,7 +684,7 @@ print_version (FILE *file, const char *i
|
||||
#ifndef HAVE_isl
|
||||
"none"
|
||||
#else
|
||||
- isl_version ()
|
||||
+ get_isl_version (*indent == 0)
|
||||
#endif
|
||||
);
|
||||
if (strcmp (GCC_GMP_STRINGIFY_VERSION, gmp_version))
|
14
SOURCES/gcc8-libgcc-hardened.patch
Normal file
14
SOURCES/gcc8-libgcc-hardened.patch
Normal file
@ -0,0 +1,14 @@
|
||||
--- libgcc/config/t-slibgcc.mp 2018-10-03 16:07:00.336990246 -0400
|
||||
+++ libgcc/config/t-slibgcc 2018-10-03 16:06:26.719946740 -0400
|
||||
@@ -30,9 +30,10 @@ SHLIB_LC = -lc
|
||||
SHLIB_MAKE_SOLINK = $(LN_S) $(SHLIB_SONAME) $(SHLIB_DIR)/$(SHLIB_SOLINK)
|
||||
SHLIB_INSTALL_SOLINK = $(LN_S) $(SHLIB_SONAME) \
|
||||
$(DESTDIR)$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
|
||||
+SHLIB_EXTRA_LDFLAGS = -Wl,-z,relro -Wl,-z,now
|
||||
|
||||
SHLIB_LINK = $(CC) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
|
||||
- $(SHLIB_LDFLAGS) \
|
||||
+ $(SHLIB_LDFLAGS) $(SHLIB_EXTRA_LDFLAGS) \
|
||||
-o $(SHLIB_DIR)/$(SHLIB_SONAME).tmp @multilib_flags@ \
|
||||
$(SHLIB_OBJS) $(SHLIB_LC) && \
|
||||
rm -f $(SHLIB_DIR)/$(SHLIB_SOLINK) && \
|
17
SOURCES/gcc8-libgomp-omp_h-multilib.patch
Normal file
17
SOURCES/gcc8-libgomp-omp_h-multilib.patch
Normal file
@ -0,0 +1,17 @@
|
||||
2008-06-09 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* omp.h.in (omp_nest_lock_t): Fix up for Linux multilibs.
|
||||
|
||||
--- libgomp/omp.h.in.jj 2008-06-09 13:34:05.000000000 +0200
|
||||
+++ libgomp/omp.h.in 2008-06-09 13:34:48.000000000 +0200
|
||||
@@ -42,8 +42,8 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
- unsigned char _x[@OMP_NEST_LOCK_SIZE@]
|
||||
- __attribute__((__aligned__(@OMP_NEST_LOCK_ALIGN@)));
|
||||
+ unsigned char _x[8 + sizeof (void *)]
|
||||
+ __attribute__((__aligned__(sizeof (void *))));
|
||||
} omp_nest_lock_t;
|
||||
#endif
|
||||
|
24
SOURCES/gcc8-libstdc++-docs.patch
Normal file
24
SOURCES/gcc8-libstdc++-docs.patch
Normal file
@ -0,0 +1,24 @@
|
||||
--- libstdc++-v3/doc/html/index.html.jj 2011-01-03 12:53:21.282829010 +0100
|
||||
+++ libstdc++-v3/doc/html/index.html 2011-01-04 18:06:28.999851145 +0100
|
||||
@@ -5,6 +5,8 @@
|
||||
<a class="link" href="https://www.fsf.org" target="_top">FSF
|
||||
</a>
|
||||
</p><p>
|
||||
+ Release 8.1.1
|
||||
+ </p><p>
|
||||
Permission is granted to copy, distribute and/or modify this
|
||||
document under the terms of the GNU Free Documentation
|
||||
License, Version 1.2 or any later version published by the
|
||||
--- libstdc++-v3/doc/html/api.html.jj 2011-01-03 12:53:21.000000000 +0100
|
||||
+++ libstdc++-v3/doc/html/api.html 2011-01-04 18:12:01.672757784 +0100
|
||||
@@ -20,7 +20,9 @@
|
||||
member functions for the library classes, finding out what is in a
|
||||
particular include file, looking at inheritance diagrams, etc.
|
||||
</p><p>
|
||||
- The API documentation, rendered into HTML, can be viewed online
|
||||
+ The API documentation, rendered into HTML, can be viewed locally
|
||||
+ <a class="link" href="api/index.html" target="_top">for the 8.1.1 release</a>,
|
||||
+ online
|
||||
<a class="link" href="http://gcc.gnu.org/onlinedocs/" target="_top">for each GCC release</a>
|
||||
and
|
||||
<a class="link" href="http://gcc.gnu.org/onlinedocs/libstdc++/latest-doxygen/index.html" target="_top">
|
27
SOURCES/gcc8-libtool-no-rpath.patch
Normal file
27
SOURCES/gcc8-libtool-no-rpath.patch
Normal file
@ -0,0 +1,27 @@
|
||||
libtool sucks.
|
||||
--- ltmain.sh.jj 2007-12-07 14:53:21.000000000 +0100
|
||||
+++ ltmain.sh 2008-09-05 21:51:48.000000000 +0200
|
||||
@@ -5394,6 +5394,7 @@ EOF
|
||||
rpath="$finalize_rpath"
|
||||
test "$mode" != relink && rpath="$compile_rpath$rpath"
|
||||
for libdir in $rpath; do
|
||||
+ case "$libdir" in /usr/lib|/usr/lib64|/usr/lib/../lib|/usr/lib/../lib64) continue;; esac
|
||||
if test -n "$hardcode_libdir_flag_spec"; then
|
||||
if test -n "$hardcode_libdir_separator"; then
|
||||
if test -z "$hardcode_libdirs"; then
|
||||
@@ -6071,6 +6072,7 @@ EOF
|
||||
rpath=
|
||||
hardcode_libdirs=
|
||||
for libdir in $compile_rpath $finalize_rpath; do
|
||||
+ case "$libdir" in /usr/lib|/usr/lib64|/usr/lib/../lib|/usr/lib/../lib64) continue;; esac
|
||||
if test -n "$hardcode_libdir_flag_spec"; then
|
||||
if test -n "$hardcode_libdir_separator"; then
|
||||
if test -z "$hardcode_libdirs"; then
|
||||
@@ -6120,6 +6122,7 @@ EOF
|
||||
rpath=
|
||||
hardcode_libdirs=
|
||||
for libdir in $finalize_rpath; do
|
||||
+ case "$libdir" in /usr/lib|/usr/lib64|/usr/lib/../lib|/usr/lib/../lib64) continue;; esac
|
||||
if test -n "$hardcode_libdir_flag_spec"; then
|
||||
if test -n "$hardcode_libdir_separator"; then
|
||||
if test -z "$hardcode_libdirs"; then
|
17
SOURCES/gcc8-mcet.patch
Normal file
17
SOURCES/gcc8-mcet.patch
Normal file
@ -0,0 +1,17 @@
|
||||
2018-04-24 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* config/i386/i386.opt (mcet): Remporarily re-add as alias to -mshstk.
|
||||
|
||||
--- gcc/config/i386/i386.opt (revision 259613)
|
||||
+++ gcc/config/i386/i386.opt (revision 259612)
|
||||
@@ -1006,6 +1006,10 @@ mgeneral-regs-only
|
||||
Target Report RejectNegative Mask(GENERAL_REGS_ONLY) Var(ix86_target_flags) Save
|
||||
Generate code which uses only the general registers.
|
||||
|
||||
+mcet
|
||||
+Target Undocumented Alias(mshstk)
|
||||
+;; Deprecated
|
||||
+
|
||||
mshstk
|
||||
Target Report Mask(ISA_SHSTK) Var(ix86_isa_flags) Save
|
||||
Enable shadow stack built-in functions from Control-flow Enforcement
|
50
SOURCES/gcc8-no-add-needed.patch
Normal file
50
SOURCES/gcc8-no-add-needed.patch
Normal file
@ -0,0 +1,50 @@
|
||||
2010-02-08 Roland McGrath <roland@redhat.com>
|
||||
|
||||
* config/rs6000/sysv4.h (LINK_EH_SPEC): Pass --no-add-needed to the
|
||||
linker.
|
||||
* config/gnu-user.h (LINK_EH_SPEC): Likewise.
|
||||
* config/alpha/elf.h (LINK_EH_SPEC): Likewise.
|
||||
* config/ia64/linux.h (LINK_EH_SPEC): Likewise.
|
||||
|
||||
--- gcc/config/alpha/elf.h.jj 2011-01-03 12:52:31.118056764 +0100
|
||||
+++ gcc/config/alpha/elf.h 2011-01-04 18:14:10.931874160 +0100
|
||||
@@ -168,5 +168,5 @@ extern int alpha_this_gpdisp_sequence_nu
|
||||
I imagine that other systems will catch up. In the meantime, it
|
||||
doesn't harm to make sure that the data exists to be used later. */
|
||||
#if defined(HAVE_LD_EH_FRAME_HDR)
|
||||
-#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
|
||||
+#define LINK_EH_SPEC "--no-add-needed %{!static|static-pie:--eh-frame-hdr} "
|
||||
#endif
|
||||
--- gcc/config/ia64/linux.h.jj 2011-01-03 13:02:11.462994522 +0100
|
||||
+++ gcc/config/ia64/linux.h 2011-01-04 18:14:10.931874160 +0100
|
||||
@@ -76,7 +76,7 @@ do { \
|
||||
Signalize that because we have fde-glibc, we don't need all C shared libs
|
||||
linked against -lgcc_s. */
|
||||
#undef LINK_EH_SPEC
|
||||
-#define LINK_EH_SPEC ""
|
||||
+#define LINK_EH_SPEC "--no-add-needed "
|
||||
|
||||
#undef TARGET_INIT_LIBFUNCS
|
||||
#define TARGET_INIT_LIBFUNCS ia64_soft_fp_init_libfuncs
|
||||
--- gcc/config/gnu-user.h.jj 2011-01-03 12:53:03.739057299 +0100
|
||||
+++ gcc/config/gnu-user.h 2011-01-04 18:14:10.932814884 +0100
|
||||
@@ -133,7 +133,7 @@ see the files COPYING3 and COPYING.RUNTI
|
||||
#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
|
||||
|
||||
#if defined(HAVE_LD_EH_FRAME_HDR)
|
||||
-#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
|
||||
+#define LINK_EH_SPEC "--no-add-needed %{!static|static-pie:--eh-frame-hdr} "
|
||||
#endif
|
||||
|
||||
#undef LINK_GCC_C_SEQUENCE_SPEC
|
||||
--- gcc/config/rs6000/sysv4.h.jj 2011-01-03 13:02:18.255994215 +0100
|
||||
+++ gcc/config/rs6000/sysv4.h 2011-01-04 18:14:10.933888871 +0100
|
||||
@@ -816,7 +816,7 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEF
|
||||
-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
|
||||
|
||||
#if defined(HAVE_LD_EH_FRAME_HDR)
|
||||
-# define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
|
||||
+# define LINK_EH_SPEC "--no-add-needed %{!static|static-pie:--eh-frame-hdr} "
|
||||
#endif
|
||||
|
||||
#define CPP_OS_LINUX_SPEC "-D__unix__ -D__gnu_linux__ -D__linux__ \
|
84
SOURCES/gcc8-pr60790.patch
Normal file
84
SOURCES/gcc8-pr60790.patch
Normal file
@ -0,0 +1,84 @@
|
||||
PR libgcc/60790
|
||||
x86: Do not assume ELF constructors run before IFUNC resolvers.
|
||||
* config/x86/host-config.h (libat_feat1_ecx, libat_feat1_edx):
|
||||
Remove declarations.
|
||||
(__libat_feat1, __libat_feat1_init): Declare.
|
||||
(FEAT1_REGISTER): Define.
|
||||
(load_feat1): New function.
|
||||
(IFUNC_COND_1): Adjust.
|
||||
* config/x86/init.c (libat_feat1_ecx, libat_feat1_edx)
|
||||
(init_cpuid): Remove definitions.
|
||||
(__libat_feat1): New variable.
|
||||
(__libat_feat1_init): New function.
|
||||
|
||||
--- libatomic/config/x86/host-config.h (revision 264990)
|
||||
+++ libatomic/config/x86/host-config.h (working copy)
|
||||
@@ -25,13 +25,39 @@
|
||||
#if HAVE_IFUNC
|
||||
#include <cpuid.h>
|
||||
|
||||
-extern unsigned int libat_feat1_ecx HIDDEN;
|
||||
-extern unsigned int libat_feat1_edx HIDDEN;
|
||||
+#ifdef __x86_64__
|
||||
+# define FEAT1_REGISTER ecx
|
||||
+#else
|
||||
+# define FEAT1_REGISTER edx
|
||||
+#endif
|
||||
|
||||
+/* Value of the CPUID feature register FEAT1_REGISTER for the cmpxchg
|
||||
+ bit for IFUNC_COND1 below. */
|
||||
+extern unsigned int __libat_feat1 HIDDEN;
|
||||
+
|
||||
+/* Initialize libat_feat1 and return its value. */
|
||||
+unsigned int __libat_feat1_init (void) HIDDEN;
|
||||
+
|
||||
+/* Return the value of the relevant feature register for the relevant
|
||||
+ cmpxchg bit, or 0 if there is no CPUID support. */
|
||||
+static inline unsigned int
|
||||
+__attribute__ ((const))
|
||||
+load_feat1 (void)
|
||||
+{
|
||||
+ /* See the store in __libat_feat1_init. */
|
||||
+ unsigned int feat1 = __atomic_load_n (&__libat_feat1, __ATOMIC_RELAXED);
|
||||
+ if (feat1 == 0)
|
||||
+ /* Assume that initialization has not happened yet. This may get
|
||||
+ called repeatedly if the CPU does not have any feature bits at
|
||||
+ all. */
|
||||
+ feat1 = __libat_feat1_init ();
|
||||
+ return feat1;
|
||||
+}
|
||||
+
|
||||
#ifdef __x86_64__
|
||||
-# define IFUNC_COND_1 (libat_feat1_ecx & bit_CMPXCHG16B)
|
||||
+# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG16B)
|
||||
#else
|
||||
-# define IFUNC_COND_1 (libat_feat1_edx & bit_CMPXCHG8B)
|
||||
+# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG8B)
|
||||
#endif
|
||||
|
||||
#ifdef __x86_64__
|
||||
--- libatomic/config/x86/init.c (revision 264990)
|
||||
+++ libatomic/config/x86/init.c (working copy)
|
||||
@@ -26,13 +26,17 @@
|
||||
|
||||
#if HAVE_IFUNC
|
||||
|
||||
-unsigned int libat_feat1_ecx, libat_feat1_edx;
|
||||
+unsigned int __libat_feat1;
|
||||
|
||||
-static void __attribute__((constructor))
|
||||
-init_cpuid (void)
|
||||
+unsigned int
|
||||
+__libat_feat1_init (void)
|
||||
{
|
||||
- unsigned int eax, ebx;
|
||||
- __get_cpuid (1, &eax, &ebx, &libat_feat1_ecx, &libat_feat1_edx);
|
||||
+ unsigned int eax, ebx, ecx, edx;
|
||||
+ FEAT1_REGISTER = 0;
|
||||
+ __get_cpuid (1, &eax, &ebx, &ecx, &edx);
|
||||
+ /* See the load in load_feat1. */
|
||||
+ __atomic_store_n (&__libat_feat1, FEAT1_REGISTER, __ATOMIC_RELAXED);
|
||||
+ return FEAT1_REGISTER;
|
||||
}
|
||||
|
||||
#endif /* HAVE_IFUNC */
|
445
SOURCES/gcc8-rh1512529-aarch64.patch
Normal file
445
SOURCES/gcc8-rh1512529-aarch64.patch
Normal file
@ -0,0 +1,445 @@
|
||||
--- gcc/config/aarch64/aarch64.c
|
||||
+++ gcc/config/aarch64/aarch64.c
|
||||
@@ -3799,7 +3799,14 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
|
||||
output_asm_insn ("sub\t%0, %0, %1", xops);
|
||||
|
||||
/* Probe at TEST_ADDR. */
|
||||
- output_asm_insn ("str\txzr, [%0]", xops);
|
||||
+ if (flag_stack_clash_protection)
|
||||
+ {
|
||||
+ gcc_assert (xops[0] == stack_pointer_rtx);
|
||||
+ xops[1] = GEN_INT (PROBE_INTERVAL - 8);
|
||||
+ output_asm_insn ("str\txzr, [%0, %1]", xops);
|
||||
+ }
|
||||
+ else
|
||||
+ output_asm_insn ("str\txzr, [%0]", xops);
|
||||
|
||||
/* Test if TEST_ADDR == LAST_ADDR. */
|
||||
xops[1] = reg2;
|
||||
@@ -4589,6 +4596,133 @@ aarch64_set_handled_components (sbitmap components)
|
||||
cfun->machine->reg_is_wrapped_separately[regno] = true;
|
||||
}
|
||||
|
||||
+/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
|
||||
+ registers. */
|
||||
+
|
||||
+static void
|
||||
+aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
+ poly_int64 poly_size)
|
||||
+{
|
||||
+ HOST_WIDE_INT size;
|
||||
+ if (!poly_size.is_constant (&size))
|
||||
+ {
|
||||
+ sorry ("stack probes for SVE frames");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ HOST_WIDE_INT probe_interval
|
||||
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
|
||||
+ HOST_WIDE_INT guard_size
|
||||
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
|
||||
+ HOST_WIDE_INT guard_used_by_caller = 1024;
|
||||
+
|
||||
+ /* SIZE should be large enough to require probing here. ie, it
|
||||
+ must be larger than GUARD_SIZE - GUARD_USED_BY_CALLER.
|
||||
+
|
||||
+ We can allocate GUARD_SIZE - GUARD_USED_BY_CALLER as a single chunk
|
||||
+ without any probing. */
|
||||
+ gcc_assert (size >= guard_size - guard_used_by_caller);
|
||||
+ aarch64_sub_sp (temp1, temp2, guard_size - guard_used_by_caller, true);
|
||||
+ HOST_WIDE_INT orig_size = size;
|
||||
+ size -= (guard_size - guard_used_by_caller);
|
||||
+
|
||||
+ HOST_WIDE_INT rounded_size = size & -probe_interval;
|
||||
+ HOST_WIDE_INT residual = size - rounded_size;
|
||||
+
|
||||
+ /* We can handle a small number of allocations/probes inline. Otherwise
|
||||
+ punt to a loop. */
|
||||
+ if (rounded_size && rounded_size <= 4 * probe_interval)
|
||||
+ {
|
||||
+ /* We don't use aarch64_sub_sp here because we don't want to
|
||||
+ repeatedly load TEMP1. */
|
||||
+ rtx step = GEN_INT (-probe_interval);
|
||||
+ if (probe_interval > ARITH_FACTOR)
|
||||
+ {
|
||||
+ emit_move_insn (temp1, step);
|
||||
+ step = temp1;
|
||||
+ }
|
||||
+
|
||||
+ for (HOST_WIDE_INT i = 0; i < rounded_size; i += probe_interval)
|
||||
+ {
|
||||
+ rtx_insn *insn = emit_insn (gen_add2_insn (stack_pointer_rtx, step));
|
||||
+ add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
|
||||
+
|
||||
+ if (probe_interval > ARITH_FACTOR)
|
||||
+ {
|
||||
+ RTX_FRAME_RELATED_P (insn) = 1;
|
||||
+ rtx adj = plus_constant (Pmode, stack_pointer_rtx, -probe_interval);
|
||||
+ add_reg_note (insn, REG_CFA_ADJUST_CFA,
|
||||
+ gen_rtx_SET (stack_pointer_rtx, adj));
|
||||
+ }
|
||||
+
|
||||
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||
+ (probe_interval
|
||||
+ - GET_MODE_SIZE (word_mode))));
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ }
|
||||
+ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
|
||||
+ }
|
||||
+ else if (rounded_size)
|
||||
+ {
|
||||
+ /* Compute the ending address. */
|
||||
+ unsigned int scratchreg = REGNO (temp1);
|
||||
+ emit_move_insn (temp1, GEN_INT (-rounded_size));
|
||||
+ rtx_insn *insn
|
||||
+ = emit_insn (gen_add3_insn (temp1, stack_pointer_rtx, temp1));
|
||||
+
|
||||
+ /* For the initial allocation, we don't have a frame pointer
|
||||
+ set up, so we always need CFI notes. If we're doing the
|
||||
+ final allocation, then we may have a frame pointer, in which
|
||||
+ case it is the CFA, otherwise we need CFI notes.
|
||||
+
|
||||
+ We can determine which allocation we are doing by looking at
|
||||
+ the temporary register. IP0 is the initial allocation, IP1
|
||||
+ is the final allocation. */
|
||||
+ if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
|
||||
+ {
|
||||
+ /* We want the CFA independent of the stack pointer for the
|
||||
+ duration of the loop. */
|
||||
+ add_reg_note (insn, REG_CFA_DEF_CFA,
|
||||
+ plus_constant (Pmode, temp1,
|
||||
+ (rounded_size + (orig_size - size))));
|
||||
+ RTX_FRAME_RELATED_P (insn) = 1;
|
||||
+ }
|
||||
+
|
||||
+ /* This allocates and probes the stack.
|
||||
+
|
||||
+ It also probes at a 4k interval regardless of the value of
|
||||
+ PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL. */
|
||||
+ insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
|
||||
+ stack_pointer_rtx, temp1));
|
||||
+
|
||||
+ /* Now reset the CFA register if needed. */
|
||||
+ if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
|
||||
+ {
|
||||
+ add_reg_note (insn, REG_CFA_DEF_CFA,
|
||||
+ plus_constant (Pmode, stack_pointer_rtx,
|
||||
+ (rounded_size + (orig_size - size))));
|
||||
+ RTX_FRAME_RELATED_P (insn) = 1;
|
||||
+ }
|
||||
+
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
|
||||
+ }
|
||||
+ else
|
||||
+ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
|
||||
+
|
||||
+ /* Handle any residuals.
|
||||
+ Note that any residual must be probed. */
|
||||
+ if (residual)
|
||||
+ {
|
||||
+ aarch64_sub_sp (temp1, temp2, residual, true);
|
||||
+ add_reg_note (get_last_insn (), REG_STACK_CHECK, const0_rtx);
|
||||
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||
+ (residual - GET_MODE_SIZE (word_mode))));
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ }
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
|
||||
is saved at BASE + OFFSET. */
|
||||
|
||||
@@ -4686,7 +4820,54 @@ aarch64_expand_prologue (void)
|
||||
rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
|
||||
rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
|
||||
|
||||
- aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
|
||||
+ /* We do not fully protect aarch64 against stack clash style attacks
|
||||
+ as doing so would be prohibitively expensive with less utility over
|
||||
+ time as newer compilers are deployed.
|
||||
+
|
||||
+ We assume the guard is at least 64k. Furthermore, we assume that
|
||||
+ the caller has not pushed the stack pointer more than 1k into
|
||||
+ the guard. A caller that pushes the stack pointer than 1k into
|
||||
+ the guard is considered invalid.
|
||||
+
|
||||
+ Note that the caller's ability to push the stack pointer into the
|
||||
+ guard is a function of the number and size of outgoing arguments and/or
|
||||
+ dynamic stack allocations due to the mandatory save of the link register
|
||||
+ in the caller's frame.
|
||||
+
|
||||
+ With those assumptions the callee can allocate up to 63k of stack
|
||||
+ space without probing.
|
||||
+
|
||||
+ When probing is needed, we emit a probe at the start of the prologue
|
||||
+ and every PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes thereafter.
|
||||
+
|
||||
+ We have to track how much space has been allocated, but we do not
|
||||
+ track stores into the stack as implicit probes except for the
|
||||
+ fp/lr store. */
|
||||
+ HOST_WIDE_INT guard_size
|
||||
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
|
||||
+ HOST_WIDE_INT guard_used_by_caller = 1024;
|
||||
+ if (flag_stack_clash_protection)
|
||||
+ {
|
||||
+ if (known_eq (frame_size, 0))
|
||||
+ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
|
||||
+ else if (known_lt (initial_adjust, guard_size - guard_used_by_caller)
|
||||
+ && known_lt (final_adjust, guard_size - guard_used_by_caller))
|
||||
+ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
|
||||
+ }
|
||||
+
|
||||
+ /* In theory we should never have both an initial adjustment
|
||||
+ and a callee save adjustment. Verify that is the case since the
|
||||
+ code below does not handle it for -fstack-clash-protection. */
|
||||
+ gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
|
||||
+
|
||||
+ /* Only probe if the initial adjustment is larger than the guard
|
||||
+ less the amount of the guard reserved for use by the caller's
|
||||
+ outgoing args. */
|
||||
+ if (flag_stack_clash_protection
|
||||
+ && maybe_ge (initial_adjust, guard_size - guard_used_by_caller))
|
||||
+ aarch64_allocate_and_probe_stack_space (ip0_rtx, ip1_rtx, initial_adjust);
|
||||
+ else
|
||||
+ aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
|
||||
|
||||
if (callee_adjust != 0)
|
||||
aarch64_push_regs (reg1, reg2, callee_adjust);
|
||||
@@ -4742,7 +4923,31 @@ aarch64_expand_prologue (void)
|
||||
callee_adjust != 0 || emit_frame_chain);
|
||||
aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
|
||||
callee_adjust != 0 || emit_frame_chain);
|
||||
- aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
|
||||
+
|
||||
+ /* We may need to probe the final adjustment as well. */
|
||||
+ if (flag_stack_clash_protection && maybe_ne (final_adjust, 0))
|
||||
+ {
|
||||
+ /* First probe if the final adjustment is larger than the guard size
|
||||
+ less the amount of the guard reserved for use by the caller's
|
||||
+ outgoing args. */
|
||||
+ if (maybe_ge (final_adjust, guard_size - guard_used_by_caller))
|
||||
+ aarch64_allocate_and_probe_stack_space (ip1_rtx, ip0_rtx,
|
||||
+ final_adjust);
|
||||
+ else
|
||||
+ aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
|
||||
+
|
||||
+ /* We must also probe if the final adjustment is larger than the guard
|
||||
+ that is assumed used by the caller. This may be sub-optimal. */
|
||||
+ if (maybe_ge (final_adjust, guard_used_by_caller))
|
||||
+ {
|
||||
+ if (dump_file)
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash aarch64 large outgoing arg, probing\n");
|
||||
+ emit_stack_probe (stack_pointer_rtx);
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
|
||||
}
|
||||
|
||||
/* Return TRUE if we can use a simple_return insn.
|
||||
@@ -10476,6 +10681,12 @@ aarch64_override_options_internal (struct gcc_options *opts)
|
||||
&& opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
|
||||
opts->x_flag_prefetch_loop_arrays = 1;
|
||||
|
||||
+ /* We assume the guard page is 64k. */
|
||||
+ maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
|
||||
+ 16,
|
||||
+ opts->x_param_values,
|
||||
+ global_options_set.x_param_values);
|
||||
+
|
||||
aarch64_override_options_after_change_1 (opts);
|
||||
}
|
||||
|
||||
@@ -17161,6 +17372,28 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn)
|
||||
}
|
||||
}
|
||||
|
||||
+/* It has been decided that to allow up to 1kb of outgoing argument
|
||||
+ space to be allocated w/o probing. If more than 1kb of outgoing
|
||||
+ argment space is allocated, then it must be probed and the last
|
||||
+ probe must occur no more than 1kbyte away from the end of the
|
||||
+ allocated space.
|
||||
+
|
||||
+ This implies that the residual part of an alloca allocation may
|
||||
+ need probing in cases where the generic code might not otherwise
|
||||
+ think a probe is needed.
|
||||
+
|
||||
+ This target hook returns TRUE when allocating RESIDUAL bytes of
|
||||
+ alloca space requires an additional probe, otherwise FALSE is
|
||||
+ returned. */
|
||||
+
|
||||
+static bool
|
||||
+aarch64_stack_clash_protection_final_dynamic_probe (rtx residual)
|
||||
+{
|
||||
+ return (residual == CONST0_RTX (Pmode)
|
||||
+ || GET_CODE (residual) != CONST_INT
|
||||
+ || INTVAL (residual) >= 1024);
|
||||
+}
|
||||
+
|
||||
/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
|
||||
|
||||
static int
|
||||
@@ -17669,6 +17902,10 @@ aarch64_libgcc_floating_mode_supported_p
|
||||
#undef TARGET_CONSTANT_ALIGNMENT
|
||||
#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
|
||||
|
||||
+#undef TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE
|
||||
+#define TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE \
|
||||
+ aarch64_stack_clash_protection_final_dynamic_probe
|
||||
+
|
||||
#undef TARGET_COMPUTE_PRESSURE_CLASSES
|
||||
#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
|
||||
|
||||
--- gcc/config/aarch64/aarch64.md
|
||||
+++ gcc/config/aarch64/aarch64.md
|
||||
@@ -5812,7 +5812,7 @@
|
||||
)
|
||||
|
||||
(define_insn "probe_stack_range"
|
||||
- [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
+ [(set (match_operand:DI 0 "register_operand" "=rk")
|
||||
(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
|
||||
(match_operand:DI 2 "register_operand" "r")]
|
||||
UNSPECV_PROBE_STACK_RANGE))]
|
||||
--- gcc/testsuite/gcc.target/aarch64/stack-check-12.c
|
||||
+++ gcc/testsuite/gcc.target/aarch64/stack-check-12.c
|
||||
@@ -0,0 +1,20 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+
|
||||
+extern void arf (unsigned long int *, unsigned long int *);
|
||||
+void
|
||||
+frob ()
|
||||
+{
|
||||
+ unsigned long int num[1000];
|
||||
+ unsigned long int den[1000];
|
||||
+ arf (den, num);
|
||||
+}
|
||||
+
|
||||
+/* This verifies that the scheduler did not break the dependencies
|
||||
+ by adjusting the offsets within the probe and that the scheduler
|
||||
+ did not reorder around the stack probes. */
|
||||
+/* { dg-final { scan-assembler-times "sub\\tsp, sp, #4096\\n\\tstr\\txzr, .sp, 4088." 3 } } */
|
||||
+
|
||||
+
|
||||
+
|
||||
--- gcc/testsuite/gcc.target/aarch64/stack-check-13.c
|
||||
+++ gcc/testsuite/gcc.target/aarch64/stack-check-13.c
|
||||
@@ -0,0 +1,28 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+
|
||||
+#define ARG32(X) X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
|
||||
+#define ARG192(X) ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X)
|
||||
+void out1(ARG192(__int128));
|
||||
+int t1(int);
|
||||
+
|
||||
+int t3(int x)
|
||||
+{
|
||||
+ if (x < 1000)
|
||||
+ return t1 (x) + 1;
|
||||
+
|
||||
+ out1 (ARG192(1));
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+
|
||||
+
|
||||
+/* This test creates a large (> 1k) outgoing argument area that needs
|
||||
+ to be probed. We don't test the exact size of the space or the
|
||||
+ exact offset to make the test a little less sensitive to trivial
|
||||
+ output changes. */
|
||||
+/* { dg-final { scan-assembler-times "sub\\tsp, sp, #....\\n\\tstr\\txzr, \\\[sp" 1 } } */
|
||||
+
|
||||
+
|
||||
+
|
||||
--- gcc/testsuite/gcc.target/aarch64/stack-check-14.c
|
||||
+++ gcc/testsuite/gcc.target/aarch64/stack-check-14.c
|
||||
@@ -0,0 +1,25 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+
|
||||
+int t1(int);
|
||||
+
|
||||
+int t2(int x)
|
||||
+{
|
||||
+ char *p = __builtin_alloca (4050);
|
||||
+ x = t1 (x);
|
||||
+ return p[x];
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/* This test has a constant sized alloca that is smaller than the
|
||||
+ probe interval. But it actually requires two probes instead
|
||||
+ of one because of the optimistic assumptions we made in the
|
||||
+ aarch64 prologue code WRT probing state.
|
||||
+
|
||||
+ The form can change quite a bit so we just check for two
|
||||
+ probes without looking at the actual address. */
|
||||
+/* { dg-final { scan-assembler-times "str\\txzr," 2 } } */
|
||||
+
|
||||
+
|
||||
+
|
||||
--- gcc/testsuite/gcc.target/aarch64/stack-check-15.c
|
||||
+++ gcc/testsuite/gcc.target/aarch64/stack-check-15.c
|
||||
@@ -0,0 +1,24 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+
|
||||
+int t1(int);
|
||||
+
|
||||
+int t2(int x)
|
||||
+{
|
||||
+ char *p = __builtin_alloca (x);
|
||||
+ x = t1 (x);
|
||||
+ return p[x];
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/* This test has a variable sized alloca. It requires 3 probes.
|
||||
+ One in the loop, one for the residual and at the end of the
|
||||
+ alloca area.
|
||||
+
|
||||
+ The form can change quite a bit so we just check for two
|
||||
+ probes without looking at the actual address. */
|
||||
+/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */
|
||||
+
|
||||
+
|
||||
+
|
||||
--- gcc/testsuite/lib/target-supports.exp
|
||||
+++ gcc/testsuite/lib/target-supports.exp
|
||||
@@ -9201,14 +9201,9 @@ proc check_effective_target_autoincdec { } {
|
||||
#
|
||||
proc check_effective_target_supports_stack_clash_protection { } {
|
||||
|
||||
- # Temporary until the target bits are fully ACK'd.
|
||||
-# if { [istarget aarch*-*-*] } {
|
||||
-# return 1
|
||||
-# }
|
||||
-
|
||||
if { [istarget x86_64-*-*] || [istarget i?86-*-*]
|
||||
|| [istarget powerpc*-*-*] || [istarget rs6000*-*-*]
|
||||
- || [istarget s390*-*-*] } {
|
||||
+ || [istarget aarch64*-**] || [istarget s390*-*-*] } {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
@@ -9217,9 +9212,9 @@ proc check_effective_target_supports_stack_clash_protection { } {
|
||||
# Return 1 if the target creates a frame pointer for non-leaf functions
|
||||
# Note we ignore cases where we apply tail call optimization here.
|
||||
proc check_effective_target_frame_pointer_for_non_leaf { } {
|
||||
- if { [istarget aarch*-*-*] } {
|
||||
- return 1
|
||||
- }
|
||||
+# if { [istarget aarch*-*-*] } {
|
||||
+# return 1
|
||||
+# }
|
||||
|
||||
# Solaris/x86 defaults to -fno-omit-frame-pointer.
|
||||
if { [istarget i?86-*-solaris*] || [istarget x86_64-*-solaris*] } {
|
31
SOURCES/gcc8-rh1574936.patch
Normal file
31
SOURCES/gcc8-rh1574936.patch
Normal file
@ -0,0 +1,31 @@
|
||||
crt files and statically linked libgcc objects cause false positives
|
||||
in annobin coverage, so we add the assembler flag to generate notes
|
||||
for them.
|
||||
|
||||
The patch also adds notes to libgcc_s.so, but this is harmless because
|
||||
these notes only confer that there is no other annobin markup.
|
||||
|
||||
2018-07-25 Florian Weimer <fweimer@redhat.com>
|
||||
|
||||
* Makefile.in (LIBGCC2_CFLAGS, CRTSTUFF_CFLAGS): Add
|
||||
-Wa,--generate-missing-build-notes=yes.
|
||||
|
||||
--- libgcc/Makefile.in 2018-01-13 13:05:41.000000000 +0100
|
||||
+++ libgcc/Makefile.in 2018-07-25 13:15:02.036226940 +0200
|
||||
@@ -244,6 +244,7 @@
|
||||
LIBGCC2_CFLAGS = -O2 $(LIBGCC2_INCLUDES) $(GCC_CFLAGS) $(HOST_LIBGCC2_CFLAGS) \
|
||||
$(LIBGCC2_DEBUG_CFLAGS) -DIN_LIBGCC2 \
|
||||
-fbuilding-libgcc -fno-stack-protector \
|
||||
+ -Wa,--generate-missing-build-notes=yes \
|
||||
$(INHIBIT_LIBC_CFLAGS)
|
||||
|
||||
# Additional options to use when compiling libgcc2.a.
|
||||
@@ -297,6 +298,7 @@
|
||||
$(NO_PIE_CFLAGS) -finhibit-size-directive -fno-inline -fno-exceptions \
|
||||
-fno-zero-initialized-in-bss -fno-toplevel-reorder -fno-tree-vectorize \
|
||||
-fbuilding-libgcc -fno-stack-protector $(FORCE_EXPLICIT_EH_REGISTRY) \
|
||||
+ -Wa,--generate-missing-build-notes=yes \
|
||||
$(INHIBIT_LIBC_CFLAGS)
|
||||
|
||||
# Extra flags to use when compiling crt{begin,end}.o.
|
||||
|
85
SOURCES/gcc8-rh1612514.patch
Normal file
85
SOURCES/gcc8-rh1612514.patch
Normal file
@ -0,0 +1,85 @@
|
||||
2018-08-03 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* doc/gcov.texi (-x): Remove duplicate "to".
|
||||
* doc/invoke.texi (-Wnoexcept-type): Remove duplicate "calls".
|
||||
(-Wif-not-aligned): Remove duplicate "is".
|
||||
(-flto): Remove duplicate "the".
|
||||
(MicroBlaze Options): In examples of "-mcpu=cpu-type", remove
|
||||
duplicate "v5.00.b".
|
||||
(MSP430 Options): Remove duplicate "and" from the description
|
||||
of "-mgprel-sec=regexp".
|
||||
(x86 Options): Remove duplicate copies of "vmldLog102" and
|
||||
vmlsLog104 from description of "-mveclibabi=type".
|
||||
|
||||
--- gcc/doc/gcov.texi
|
||||
+++ gcc/doc/gcov.texi
|
||||
@@ -340,7 +340,7 @@ Print verbose informations related to basic blocks and arcs.
|
||||
|
||||
@item -x
|
||||
@itemx --hash-filenames
|
||||
-By default, gcov uses the full pathname of the source files to to create
|
||||
+By default, gcov uses the full pathname of the source files to create
|
||||
an output filename. This can lead to long filenames that can overflow
|
||||
filesystem limits. This option creates names of the form
|
||||
@file{@var{source-file}##@var{md5}.gcov},
|
||||
--- gcc/doc/invoke.texi
|
||||
+++ gcc/doc/invoke.texi
|
||||
@@ -3056,7 +3056,7 @@ void h() @{ f(g); @}
|
||||
@end smallexample
|
||||
|
||||
@noindent
|
||||
-In C++14, @code{f} calls calls @code{f<void(*)()>}, but in
|
||||
+In C++14, @code{f} calls @code{f<void(*)()>}, but in
|
||||
C++17 it calls @code{f<void(*)()noexcept>}.
|
||||
|
||||
@item -Wclass-memaccess @r{(C++ and Objective-C++ only)}
|
||||
@@ -4587,7 +4587,7 @@ The @option{-Wimplicit-fallthrough=3} warning is enabled by @option{-Wextra}.
|
||||
@opindex Wif-not-aligned
|
||||
@opindex Wno-if-not-aligned
|
||||
Control if warning triggered by the @code{warn_if_not_aligned} attribute
|
||||
-should be issued. This is is enabled by default.
|
||||
+should be issued. This is enabled by default.
|
||||
Use @option{-Wno-if-not-aligned} to disable it.
|
||||
|
||||
@item -Wignored-qualifiers @r{(C and C++ only)}
|
||||
@@ -9613,7 +9613,7 @@ for LTO, use @command{gcc-ar} and @command{gcc-ranlib} instead of @command{ar}
|
||||
and @command{ranlib};
|
||||
to show the symbols of object files with GIMPLE bytecode, use
|
||||
@command{gcc-nm}. Those commands require that @command{ar}, @command{ranlib}
|
||||
-and @command{nm} have been compiled with plugin support. At link time, use the the
|
||||
+and @command{nm} have been compiled with plugin support. At link time, use the
|
||||
flag @option{-fuse-linker-plugin} to ensure that the library participates in
|
||||
the LTO optimization process:
|
||||
|
||||
@@ -20159,7 +20159,7 @@ Use features of, and schedule code for, the given CPU.
|
||||
Supported values are in the format @samp{v@var{X}.@var{YY}.@var{Z}},
|
||||
where @var{X} is a major version, @var{YY} is the minor version, and
|
||||
@var{Z} is compatibility code. Example values are @samp{v3.00.a},
|
||||
-@samp{v4.00.b}, @samp{v5.00.a}, @samp{v5.00.b}, @samp{v5.00.b}, @samp{v6.00.a}.
|
||||
+@samp{v4.00.b}, @samp{v5.00.a}, @samp{v5.00.b}, @samp{v6.00.a}.
|
||||
|
||||
@item -mxl-soft-mul
|
||||
@opindex mxl-soft-mul
|
||||
@@ -21839,7 +21839,7 @@ GP-relative addressing. It is most useful in conjunction with
|
||||
The @var{regexp} is a POSIX Extended Regular Expression.
|
||||
|
||||
This option does not affect the behavior of the @option{-G} option, and
|
||||
-and the specified sections are in addition to the standard @code{.sdata}
|
||||
+the specified sections are in addition to the standard @code{.sdata}
|
||||
and @code{.sbss} small-data sections that are recognized by @option{-mgpopt}.
|
||||
|
||||
@item -mr0rel-sec=@var{regexp}
|
||||
@@ -27613,11 +27613,11 @@ To use this option, both @option{-ftree-vectorize} and
|
||||
ABI-compatible library must be specified at link time.
|
||||
|
||||
GCC currently emits calls to @code{vmldExp2},
|
||||
-@code{vmldLn2}, @code{vmldLog102}, @code{vmldLog102}, @code{vmldPow2},
|
||||
+@code{vmldLn2}, @code{vmldLog102}, @code{vmldPow2},
|
||||
@code{vmldTanh2}, @code{vmldTan2}, @code{vmldAtan2}, @code{vmldAtanh2},
|
||||
@code{vmldCbrt2}, @code{vmldSinh2}, @code{vmldSin2}, @code{vmldAsinh2},
|
||||
@code{vmldAsin2}, @code{vmldCosh2}, @code{vmldCos2}, @code{vmldAcosh2},
|
||||
-@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4}, @code{vmlsLog104},
|
||||
+@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4},
|
||||
@code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, @code{vmlsTan4},
|
||||
@code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, @code{vmlsSinh4},
|
||||
@code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, @code{vmlsCosh4},
|
124
SOURCES/gcc8-rh1652016.patch
Normal file
124
SOURCES/gcc8-rh1652016.patch
Normal file
@ -0,0 +1,124 @@
|
||||
commit e7c4d49ab27338e6bc8b0272c4036da58482bde0
|
||||
Author: krebbel <krebbel@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Mon Nov 26 15:15:57 2018 +0000
|
||||
|
||||
S/390: Fix flogr RTX.
|
||||
|
||||
The flogr instruction uses a 64 bit register pair target operand. In
|
||||
the RTX we model this as a write to a TImode register. Unfortunately
|
||||
the RTX's being assigned to the two parts of the target operand were
|
||||
swapped. This is no problem if in the end the flogr instruction will
|
||||
be emitted since the instruction still does what the clzdi expander
|
||||
expects. However, a problem arises when the RTX is used to optimize
|
||||
CLZ for a constant input operand. Even then it matters only if the
|
||||
expression couldn't be folded on tree level already.
|
||||
|
||||
In the testcase this happened thanks to loop unrolling on RTL level.
|
||||
The iteration variable is used as an argument to the clz
|
||||
builtin. Due to the loop unrolling it becomes a constant and after
|
||||
folding the broken RTX leads to a wrong assumption.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
2018-11-26 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
Backport from mainline
|
||||
2018-11-20 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
* config/s390/s390.md ("clztidi2"): Swap the RTX's written to the
|
||||
DImode parts of the target operand.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
2018-11-26 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
Backport from mainline
|
||||
2018-11-20 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||
|
||||
* gcc.target/s390/flogr-1.c: New test.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-8-branch@266465 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
|
||||
index c4d391bc9b5..53bb1985285 100644
|
||||
--- a/gcc/config/s390/s390.md
|
||||
+++ b/gcc/config/s390/s390.md
|
||||
@@ -8861,17 +8861,17 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
+; CLZ result is in hard reg op0 - this is the high part of the target operand
|
||||
+; The source with the left-most one bit cleared is in hard reg op0 + 1 - the low part
|
||||
(define_insn "clztidi2"
|
||||
[(set (match_operand:TI 0 "register_operand" "=d")
|
||||
(ior:TI
|
||||
- (ashift:TI
|
||||
- (zero_extend:TI
|
||||
- (xor:DI (match_operand:DI 1 "register_operand" "d")
|
||||
- (lshiftrt (match_operand:DI 2 "const_int_operand" "")
|
||||
- (subreg:SI (clz:DI (match_dup 1)) 4))))
|
||||
-
|
||||
- (const_int 64))
|
||||
- (zero_extend:TI (clz:DI (match_dup 1)))))
|
||||
+ (ashift:TI (zero_extend:TI (clz:DI (match_operand:DI 1 "register_operand" "d")))
|
||||
+ (const_int 64))
|
||||
+ (zero_extend:TI
|
||||
+ (xor:DI (match_dup 1)
|
||||
+ (lshiftrt (match_operand:DI 2 "const_int_operand" "")
|
||||
+ (subreg:SI (clz:DI (match_dup 1)) 4))))))
|
||||
(clobber (reg:CC CC_REGNUM))]
|
||||
"UINTVAL (operands[2]) == HOST_WIDE_INT_1U << 63
|
||||
&& TARGET_EXTIMM && TARGET_ZARCH"
|
||||
diff --git a/gcc/testsuite/gcc.target/s390/flogr-1.c b/gcc/testsuite/gcc.target/s390/flogr-1.c
|
||||
new file mode 100644
|
||||
index 00000000000..a3869000d62
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/s390/flogr-1.c
|
||||
@@ -0,0 +1,47 @@
|
||||
+/* { dg-do run } */
|
||||
+/* { dg-options "-O2 -funroll-loops -march=z9-109" } */
|
||||
+/* { dg-require-effective-target stdint_types } */
|
||||
+
|
||||
+/* Folding of the FLOGR caused a wrong value to be returned by
|
||||
+ __builtin_clz becuase of a problem in the RTX we emit for FLOGR.
|
||||
+ The problematic folding can only be triggered with constants inputs
|
||||
+ introduced on RTL level. In this case it happens with loop
|
||||
+ unrolling. */
|
||||
+
|
||||
+#include <stdint.h>
|
||||
+#include <assert.h>
|
||||
+
|
||||
+static inline uint32_t pow2_ceil_u32(uint32_t x) {
|
||||
+ if (x <= 1) {
|
||||
+ return x;
|
||||
+ }
|
||||
+ int msb_on_index;
|
||||
+ msb_on_index = (31 ^ __builtin_clz(x - 1));
|
||||
+ assert(msb_on_index < 31);
|
||||
+ return 1U << (msb_on_index + 1);
|
||||
+}
|
||||
+
|
||||
+void __attribute__((noinline,noclone))
|
||||
+die (int a)
|
||||
+{
|
||||
+ if (a)
|
||||
+ __builtin_abort ();
|
||||
+}
|
||||
+
|
||||
+void test_pow2_ceil_u32(void) {
|
||||
+ unsigned i;
|
||||
+
|
||||
+ for (i = 0; i < 18; i++) {
|
||||
+ uint32_t a_ = (pow2_ceil_u32(((uint32_t)1) << i));
|
||||
+ if (!(a_ == (((uint32_t)1) << i))) {
|
||||
+ die(1);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main(void) {
|
||||
+ test_pow2_ceil_u32();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
572
SOURCES/gcc8-rh1652929-1.patch
Normal file
572
SOURCES/gcc8-rh1652929-1.patch
Normal file
@ -0,0 +1,572 @@
|
||||
commit 87c504d3b293ebe6d36f3b50696cd307b02b0daa
|
||||
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Tue Jun 19 21:23:39 2018 +0000
|
||||
|
||||
2018-06-19 Aaron Sawdey <acsawdey@linux.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000-string.c (select_block_compare_mode): Check
|
||||
TARGET_EFFICIENT_OVERLAPPING_UNALIGNED here instead of in caller.
|
||||
(do_and3, do_and3_mask, do_compb3, do_rotl3): New functions.
|
||||
(expand_block_compare): Change select_block_compare_mode call.
|
||||
(expand_strncmp_align_check): Use new functions, fix comment.
|
||||
(emit_final_str_compare_gpr): New function.
|
||||
(expand_strn_compare): Refactor and clean up code.
|
||||
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Remove *.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@261769 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
||||
index 632d3359711..f9dd54eb639 100644
|
||||
--- a/gcc/config/rs6000/rs6000-string.c
|
||||
+++ b/gcc/config/rs6000/rs6000-string.c
|
||||
@@ -266,6 +266,7 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
||||
else if (bytes == GET_MODE_SIZE (QImode))
|
||||
return QImode;
|
||||
else if (bytes < GET_MODE_SIZE (SImode)
|
||||
+ && TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
|
||||
&& offset >= GET_MODE_SIZE (SImode) - bytes)
|
||||
/* This matches the case were we have SImode and 3 bytes
|
||||
and offset >= 1 and permits us to move back one and overlap
|
||||
@@ -273,6 +274,7 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
||||
unwanted bytes off of the input. */
|
||||
return SImode;
|
||||
else if (word_mode_ok && bytes < UNITS_PER_WORD
|
||||
+ && TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
|
||||
&& offset >= UNITS_PER_WORD-bytes)
|
||||
/* Similarly, if we can use DImode it will get matched here and
|
||||
can do an overlapping read that ends at the end of the block. */
|
||||
@@ -408,6 +410,54 @@ do_add3 (rtx dest, rtx src1, rtx src2)
|
||||
emit_insn (gen_addsi3 (dest, src1, src2));
|
||||
}
|
||||
|
||||
+/* Emit an and of the proper mode for DEST.
|
||||
+
|
||||
+ DEST is the destination register for the and.
|
||||
+ SRC1 is the first and input.
|
||||
+ SRC2 is the second and input.
|
||||
+
|
||||
+ Computes DEST = SRC1&SRC2. */
|
||||
+static void
|
||||
+do_and3 (rtx dest, rtx src1, rtx src2)
|
||||
+{
|
||||
+ if (GET_MODE (dest) == DImode)
|
||||
+ emit_insn (gen_anddi3 (dest, src1, src2));
|
||||
+ else
|
||||
+ emit_insn (gen_andsi3 (dest, src1, src2));
|
||||
+}
|
||||
+
|
||||
+/* Emit an cmpb of the proper mode for DEST.
|
||||
+
|
||||
+ DEST is the destination register for the cmpb.
|
||||
+ SRC1 is the first input.
|
||||
+ SRC2 is the second input.
|
||||
+
|
||||
+ Computes cmpb of SRC1, SRC2. */
|
||||
+static void
|
||||
+do_cmpb3 (rtx dest, rtx src1, rtx src2)
|
||||
+{
|
||||
+ if (GET_MODE (dest) == DImode)
|
||||
+ emit_insn (gen_cmpbdi3 (dest, src1, src2));
|
||||
+ else
|
||||
+ emit_insn (gen_cmpbsi3 (dest, src1, src2));
|
||||
+}
|
||||
+
|
||||
+/* Emit a rotl of the proper mode for DEST.
|
||||
+
|
||||
+ DEST is the destination register for the and.
|
||||
+ SRC1 is the first and input.
|
||||
+ SRC2 is the second and input.
|
||||
+
|
||||
+ Computes DEST = SRC1 rotated left by SRC2. */
|
||||
+static void
|
||||
+do_rotl3 (rtx dest, rtx src1, rtx src2)
|
||||
+{
|
||||
+ if (GET_MODE (dest) == DImode)
|
||||
+ emit_insn (gen_rotldi3 (dest, src1, src2));
|
||||
+ else
|
||||
+ emit_insn (gen_rotlsi3 (dest, src1, src2));
|
||||
+}
|
||||
+
|
||||
/* Generate rtl for a load, shift, and compare of less than a full word.
|
||||
|
||||
LOAD_MODE is the machine mode for the loads.
|
||||
@@ -1395,11 +1445,8 @@ expand_block_compare (rtx operands[])
|
||||
while (bytes > 0)
|
||||
{
|
||||
unsigned int align = compute_current_alignment (base_align, offset);
|
||||
- if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||
- load_mode = select_block_compare_mode (offset, bytes, align,
|
||||
- word_mode_ok);
|
||||
- else
|
||||
- load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
|
||||
+ load_mode = select_block_compare_mode (offset, bytes,
|
||||
+ align, word_mode_ok);
|
||||
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
if (bytes >= load_mode_size)
|
||||
cmp_bytes = load_mode_size;
|
||||
@@ -1627,22 +1674,19 @@ expand_block_compare (rtx operands[])
|
||||
return true;
|
||||
}
|
||||
|
||||
-/* Generate alignment check and branch code to set up for
|
||||
+/* Generate page crossing check and branch code to set up for
|
||||
strncmp when we don't have DI alignment.
|
||||
STRNCMP_LABEL is the label to branch if there is a page crossing.
|
||||
- SRC is the string pointer to be examined.
|
||||
+ SRC_ADDR is the string address to be examined.
|
||||
BYTES is the max number of bytes to compare. */
|
||||
static void
|
||||
-expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
|
||||
+expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes)
|
||||
{
|
||||
rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
|
||||
- rtx src_check = copy_addr_to_reg (XEXP (src, 0));
|
||||
- if (GET_MODE (src_check) == SImode)
|
||||
- emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
|
||||
- else
|
||||
- emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
|
||||
+ rtx src_pgoff = gen_reg_rtx (GET_MODE (src_addr));
|
||||
+ do_and3 (src_pgoff, src_addr, GEN_INT (0xfff));
|
||||
rtx cond = gen_reg_rtx (CCmode);
|
||||
- emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
|
||||
+ emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_pgoff,
|
||||
GEN_INT (4096 - bytes)));
|
||||
|
||||
rtx cmp_rtx = gen_rtx_GE (VOIDmode, cond, const0_rtx);
|
||||
@@ -1654,6 +1698,76 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
|
||||
LABEL_NUSES (strncmp_label) += 1;
|
||||
}
|
||||
|
||||
+/* Generate the final sequence that identifies the differing
|
||||
+ byte and generates the final result, taking into account
|
||||
+ zero bytes:
|
||||
+
|
||||
+ cmpb cmpb_result1, src1, src2
|
||||
+ cmpb cmpb_result2, src1, zero
|
||||
+ orc cmpb_result1, cmp_result1, cmpb_result2
|
||||
+ cntlzd get bit of first zero/diff byte
|
||||
+ addi convert for rldcl use
|
||||
+ rldcl rldcl extract diff/zero byte
|
||||
+ subf subtract for final result
|
||||
+
|
||||
+ STR1 is the reg rtx for data from string 1.
|
||||
+ STR2 is the reg rtx for data from string 2.
|
||||
+ RESULT is the reg rtx for the comparison result. */
|
||||
+
|
||||
+static void
|
||||
+emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
+{
|
||||
+ machine_mode m = GET_MODE (str1);
|
||||
+ rtx cmpb_diff = gen_reg_rtx (m);
|
||||
+ rtx cmpb_zero = gen_reg_rtx (m);
|
||||
+ rtx rot_amt = gen_reg_rtx (m);
|
||||
+ rtx zero_reg = gen_reg_rtx (m);
|
||||
+
|
||||
+ rtx rot1_1 = gen_reg_rtx (m);
|
||||
+ rtx rot1_2 = gen_reg_rtx (m);
|
||||
+ rtx rot2_1 = gen_reg_rtx (m);
|
||||
+ rtx rot2_2 = gen_reg_rtx (m);
|
||||
+
|
||||
+ if (m == SImode)
|
||||
+ {
|
||||
+ emit_insn (gen_cmpbsi3 (cmpb_diff, str1, str2));
|
||||
+ emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
|
||||
+ emit_insn (gen_cmpbsi3 (cmpb_zero, str1, zero_reg));
|
||||
+ emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
|
||||
+ emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
+ emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
|
||||
+ emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
+ emit_insn (gen_rotlsi3 (rot1_1, str1,
|
||||
+ gen_lowpart (SImode, rot_amt)));
|
||||
+ emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
||||
+ emit_insn (gen_rotlsi3 (rot2_1, str2,
|
||||
+ gen_lowpart (SImode, rot_amt)));
|
||||
+ emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
||||
+ emit_insn (gen_subsi3 (result, rot1_2, rot2_2));
|
||||
+ }
|
||||
+ else if (m == DImode)
|
||||
+ {
|
||||
+ emit_insn (gen_cmpbdi3 (cmpb_diff, str1, str2));
|
||||
+ emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
|
||||
+ emit_insn (gen_cmpbdi3 (cmpb_zero, str1, zero_reg));
|
||||
+ emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
|
||||
+ emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
+ emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
|
||||
+ emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
+ emit_insn (gen_rotldi3 (rot1_1, str1,
|
||||
+ gen_lowpart (SImode, rot_amt)));
|
||||
+ emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
||||
+ emit_insn (gen_rotldi3 (rot2_1, str2,
|
||||
+ gen_lowpart (SImode, rot_amt)));
|
||||
+ emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
||||
+ emit_insn (gen_subdi3 (result, rot1_2, rot2_2));
|
||||
+ }
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
/* Expand a string compare operation with length, and return
|
||||
true if successful. Return false if we should let the
|
||||
compiler generate normal code, probably a strncmp call.
|
||||
@@ -1684,8 +1798,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
align_rtx = operands[4];
|
||||
}
|
||||
unsigned HOST_WIDE_INT cmp_bytes = 0;
|
||||
- rtx src1 = orig_src1;
|
||||
- rtx src2 = orig_src2;
|
||||
+ rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
||||
+ rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
||||
|
||||
/* If we have a length, it must be constant. This simplifies things
|
||||
a bit as we don't have to generate code to check if we've exceeded
|
||||
@@ -1698,8 +1812,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
return false;
|
||||
|
||||
unsigned int base_align = UINTVAL (align_rtx);
|
||||
- int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
|
||||
- int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
|
||||
+ unsigned int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
|
||||
+ unsigned int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
|
||||
|
||||
/* targetm.slow_unaligned_access -- don't do unaligned stuff. */
|
||||
if (targetm.slow_unaligned_access (word_mode, align1)
|
||||
@@ -1751,8 +1865,9 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx final_move_label = gen_label_rtx ();
|
||||
rtx final_label = gen_label_rtx ();
|
||||
rtx begin_compare_label = NULL;
|
||||
+ unsigned int required_align = 8;
|
||||
|
||||
- if (base_align < 8)
|
||||
+ if (base_align < required_align)
|
||||
{
|
||||
/* Generate code that checks distance to 4k boundary for this case. */
|
||||
begin_compare_label = gen_label_rtx ();
|
||||
@@ -1775,14 +1890,14 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
}
|
||||
else
|
||||
{
|
||||
- align_test = ROUND_UP (align_test, 8);
|
||||
- base_align = 8;
|
||||
+ align_test = ROUND_UP (align_test, required_align);
|
||||
+ base_align = required_align;
|
||||
}
|
||||
|
||||
- if (align1 < 8)
|
||||
- expand_strncmp_align_check (strncmp_label, src1, align_test);
|
||||
- if (align2 < 8)
|
||||
- expand_strncmp_align_check (strncmp_label, src2, align_test);
|
||||
+ if (align1 < required_align)
|
||||
+ expand_strncmp_align_check (strncmp_label, src1_addr, align_test);
|
||||
+ if (align2 < required_align)
|
||||
+ expand_strncmp_align_check (strncmp_label, src2_addr, align_test);
|
||||
|
||||
/* Now generate the following sequence:
|
||||
- branch to begin_compare
|
||||
@@ -1799,25 +1914,13 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
|
||||
emit_label (strncmp_label);
|
||||
|
||||
- if (!REG_P (XEXP (src1, 0)))
|
||||
- {
|
||||
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
|
||||
- src1 = replace_equiv_address (src1, src1_reg);
|
||||
- }
|
||||
-
|
||||
- if (!REG_P (XEXP (src2, 0)))
|
||||
- {
|
||||
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
|
||||
- src2 = replace_equiv_address (src2, src2_reg);
|
||||
- }
|
||||
-
|
||||
if (no_length)
|
||||
{
|
||||
tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
|
||||
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
||||
target, LCT_NORMAL, GET_MODE (target),
|
||||
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
||||
- force_reg (Pmode, XEXP (src2, 0)), Pmode);
|
||||
+ force_reg (Pmode, src1_addr), Pmode,
|
||||
+ force_reg (Pmode, src2_addr), Pmode);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1830,8 +1933,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
|
||||
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
||||
target, LCT_NORMAL, GET_MODE (target),
|
||||
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
||||
- force_reg (Pmode, XEXP (src2, 0)), Pmode,
|
||||
+ force_reg (Pmode, src1_addr), Pmode,
|
||||
+ force_reg (Pmode, src2_addr), Pmode,
|
||||
len_rtx, Pmode);
|
||||
}
|
||||
|
||||
@@ -1847,12 +1950,12 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
|
||||
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
|
||||
|
||||
- /* Generate sequence of ld/ldbrx, cmpb to compare out
|
||||
+ /* Generate a sequence of GPR or VEC/VSX instructions to compare out
|
||||
to the length specified. */
|
||||
unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
|
||||
while (bytes_to_compare > 0)
|
||||
{
|
||||
- /* Compare sequence:
|
||||
+ /* GPR compare sequence:
|
||||
check each 8B with: ld/ld cmpd bne
|
||||
If equal, use rldicr/cmpb to check for zero byte.
|
||||
cleanup code at end:
|
||||
@@ -1866,13 +1969,10 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
|
||||
The last compare can branch around the cleanup code if the
|
||||
result is zero because the strings are exactly equal. */
|
||||
+
|
||||
unsigned int align = compute_current_alignment (base_align, offset);
|
||||
- if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||
- load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
|
||||
- word_mode_ok);
|
||||
- else
|
||||
- load_mode = select_block_compare_mode (0, bytes_to_compare, align,
|
||||
- word_mode_ok);
|
||||
+ load_mode = select_block_compare_mode (offset, bytes_to_compare,
|
||||
+ align, word_mode_ok);
|
||||
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
if (bytes_to_compare >= load_mode_size)
|
||||
cmp_bytes = load_mode_size;
|
||||
@@ -1895,25 +1995,10 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rid of the extra bytes. */
|
||||
cmp_bytes = bytes_to_compare;
|
||||
|
||||
- src1 = adjust_address (orig_src1, load_mode, offset);
|
||||
- src2 = adjust_address (orig_src2, load_mode, offset);
|
||||
-
|
||||
- if (!REG_P (XEXP (src1, 0)))
|
||||
- {
|
||||
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
|
||||
- src1 = replace_equiv_address (src1, src1_reg);
|
||||
- }
|
||||
- set_mem_size (src1, load_mode_size);
|
||||
-
|
||||
- if (!REG_P (XEXP (src2, 0)))
|
||||
- {
|
||||
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
|
||||
- src2 = replace_equiv_address (src2, src2_reg);
|
||||
- }
|
||||
- set_mem_size (src2, load_mode_size);
|
||||
-
|
||||
- do_load_for_compare (tmp_reg_src1, src1, load_mode);
|
||||
- do_load_for_compare (tmp_reg_src2, src2, load_mode);
|
||||
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
|
||||
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
|
||||
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
|
||||
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
|
||||
|
||||
/* We must always left-align the data we read, and
|
||||
clear any bytes to the right that are beyond the string.
|
||||
@@ -1926,16 +2011,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
{
|
||||
/* Rotate left first. */
|
||||
rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
||||
- if (word_mode == DImode)
|
||||
- {
|
||||
- emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
|
||||
- emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
|
||||
- emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
|
||||
- }
|
||||
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
||||
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
||||
}
|
||||
|
||||
if (cmp_bytes < word_mode_size)
|
||||
@@ -1944,16 +2021,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
turned into a rldicr instruction. */
|
||||
HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- if (word_mode == DImode)
|
||||
- {
|
||||
- emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
|
||||
- emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
|
||||
- emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
|
||||
- }
|
||||
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
||||
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
||||
}
|
||||
|
||||
/* Cases to handle. A and B are chunks of the two strings.
|
||||
@@ -2010,31 +2079,16 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
rtx condz = gen_reg_rtx (CCmode);
|
||||
rtx zero_reg = gen_reg_rtx (word_mode);
|
||||
- if (word_mode == SImode)
|
||||
- {
|
||||
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
- {
|
||||
- /* Don't want to look at zero bytes past end. */
|
||||
- HOST_WIDE_INT mb =
|
||||
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
|
||||
- }
|
||||
- }
|
||||
- else
|
||||
+ emit_move_insn (zero_reg, GEN_INT (0));
|
||||
+ do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
|
||||
+
|
||||
+ if (cmp_bytes < word_mode_size)
|
||||
{
|
||||
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
- {
|
||||
- /* Don't want to look at zero bytes past end. */
|
||||
- HOST_WIDE_INT mb =
|
||||
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
|
||||
- }
|
||||
+ /* Don't want to look at zero bytes past end. */
|
||||
+ HOST_WIDE_INT mb =
|
||||
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
+ do_and3 (cmpb_zero, cmpb_zero, mask);
|
||||
}
|
||||
|
||||
emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
|
||||
@@ -2054,22 +2108,10 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
if (equality_compare_rest)
|
||||
{
|
||||
/* Update pointers past what has been compared already. */
|
||||
- src1 = adjust_address (orig_src1, load_mode, offset);
|
||||
- src2 = adjust_address (orig_src2, load_mode, offset);
|
||||
-
|
||||
- if (!REG_P (XEXP (src1, 0)))
|
||||
- {
|
||||
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
|
||||
- src1 = replace_equiv_address (src1, src1_reg);
|
||||
- }
|
||||
- set_mem_size (src1, load_mode_size);
|
||||
-
|
||||
- if (!REG_P (XEXP (src2, 0)))
|
||||
- {
|
||||
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
|
||||
- src2 = replace_equiv_address (src2, src2_reg);
|
||||
- }
|
||||
- set_mem_size (src2, load_mode_size);
|
||||
+ rtx src1 = force_reg (Pmode,
|
||||
+ gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset)));
|
||||
+ rtx src2 = force_reg (Pmode,
|
||||
+ gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset)));
|
||||
|
||||
/* Construct call to strcmp/strncmp to compare the rest of the string. */
|
||||
if (no_length)
|
||||
@@ -2077,8 +2119,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
|
||||
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
||||
target, LCT_NORMAL, GET_MODE (target),
|
||||
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
||||
- force_reg (Pmode, XEXP (src2, 0)), Pmode);
|
||||
+ src1, Pmode, src2, Pmode);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -2087,9 +2128,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
|
||||
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
||||
target, LCT_NORMAL, GET_MODE (target),
|
||||
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
||||
- force_reg (Pmode, XEXP (src2, 0)), Pmode,
|
||||
- len_rtx, Pmode);
|
||||
+ src1, Pmode, src2, Pmode, len_rtx, Pmode);
|
||||
}
|
||||
|
||||
rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
|
||||
@@ -2102,63 +2141,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
if (cleanup_label)
|
||||
emit_label (cleanup_label);
|
||||
|
||||
- /* Generate the final sequence that identifies the differing
|
||||
- byte and generates the final result, taking into account
|
||||
- zero bytes:
|
||||
-
|
||||
- cmpb cmpb_result1, src1, src2
|
||||
- cmpb cmpb_result2, src1, zero
|
||||
- orc cmpb_result1, cmp_result1, cmpb_result2
|
||||
- cntlzd get bit of first zero/diff byte
|
||||
- addi convert for rldcl use
|
||||
- rldcl rldcl extract diff/zero byte
|
||||
- subf subtract for final result
|
||||
- */
|
||||
-
|
||||
- rtx cmpb_diff = gen_reg_rtx (word_mode);
|
||||
- rtx cmpb_zero = gen_reg_rtx (word_mode);
|
||||
- rtx rot_amt = gen_reg_rtx (word_mode);
|
||||
- rtx zero_reg = gen_reg_rtx (word_mode);
|
||||
-
|
||||
- rtx rot1_1 = gen_reg_rtx (word_mode);
|
||||
- rtx rot1_2 = gen_reg_rtx (word_mode);
|
||||
- rtx rot2_1 = gen_reg_rtx (word_mode);
|
||||
- rtx rot2_2 = gen_reg_rtx (word_mode);
|
||||
-
|
||||
- if (word_mode == SImode)
|
||||
- {
|
||||
- emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
|
||||
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
||||
- emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
|
||||
- emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
- emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
|
||||
- emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
- emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
|
||||
- gen_lowpart (SImode, rot_amt)));
|
||||
- emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
||||
- emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
|
||||
- gen_lowpart (SImode, rot_amt)));
|
||||
- emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
||||
- emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
|
||||
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
||||
- emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
|
||||
- emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
- emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
|
||||
- emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
- emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
|
||||
- gen_lowpart (SImode, rot_amt)));
|
||||
- emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
||||
- emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
|
||||
- gen_lowpart (SImode, rot_amt)));
|
||||
- emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
||||
- emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
|
||||
- }
|
||||
+ emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
|
||||
|
||||
emit_label (final_move_label);
|
||||
emit_insn (gen_movsi (target,
|
||||
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
|
||||
index 0fc77aa18b0..e6921e96a3d 100644
|
||||
--- a/gcc/config/rs6000/vsx.md
|
||||
+++ b/gcc/config/rs6000/vsx.md
|
||||
@@ -1210,7 +1210,7 @@
|
||||
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
|
||||
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
|
||||
;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
|
||||
-(define_insn "*vsx_mov<mode>_64bit"
|
||||
+(define_insn "vsx_mov<mode>_64bit"
|
||||
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
|
||||
"=ZwO, <VSa>, <VSa>, r, we, ?wQ,
|
||||
?&r, ??r, ??Y, <??r>, wo, v,
|
472
SOURCES/gcc8-rh1652929-2.patch
Normal file
472
SOURCES/gcc8-rh1652929-2.patch
Normal file
@ -0,0 +1,472 @@
|
||||
commit c7a833caa029b84ad579c3fabe006a80f718d7e1
|
||||
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Thu Aug 2 18:11:54 2018 +0000
|
||||
|
||||
2018-07-31 Aaron Sawdey <acsawdey@linux.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000-string.c (select_block_compare_mode): Move test
|
||||
for word_mode_ok here instead of passing as argument.
|
||||
(expand_block_compare): Change select_block_compare_mode() call.
|
||||
(expand_strncmp_gpr_sequence): New function.
|
||||
(expand_strn_compare): Make use of expand_strncmp_gpr_sequence.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@263273 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
||||
index f9dd54eb639..451e9ed33da 100644
|
||||
--- a/gcc/config/rs6000/rs6000-string.c
|
||||
+++ b/gcc/config/rs6000/rs6000-string.c
|
||||
@@ -238,13 +238,11 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
|
||||
|
||||
OFFSET is the current read offset from the beginning of the block.
|
||||
BYTES is the number of bytes remaining to be read.
|
||||
- ALIGN is the minimum alignment of the memory blocks being compared in bytes.
|
||||
- WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
|
||||
- the largest allowable mode. */
|
||||
+ ALIGN is the minimum alignment of the memory blocks being compared in bytes. */
|
||||
static machine_mode
|
||||
select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
||||
unsigned HOST_WIDE_INT bytes,
|
||||
- unsigned HOST_WIDE_INT align, bool word_mode_ok)
|
||||
+ unsigned HOST_WIDE_INT align)
|
||||
{
|
||||
/* First see if we can do a whole load unit
|
||||
as that will be more efficient than a larger load + shift. */
|
||||
@@ -257,6 +255,11 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
||||
/* The most we can read without potential page crossing. */
|
||||
unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
|
||||
|
||||
+ /* If we have an LE target without ldbrx and word_mode is DImode,
|
||||
+ then we must avoid using word_mode. */
|
||||
+ int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
|
||||
+ && word_mode == DImode);
|
||||
+
|
||||
if (word_mode_ok && bytes >= UNITS_PER_WORD)
|
||||
return word_mode;
|
||||
else if (bytes == GET_MODE_SIZE (SImode))
|
||||
@@ -1382,16 +1385,11 @@ expand_block_compare (rtx operands[])
|
||||
else
|
||||
cond = gen_reg_rtx (CCmode);
|
||||
|
||||
- /* If we have an LE target without ldbrx and word_mode is DImode,
|
||||
- then we must avoid using word_mode. */
|
||||
- int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
|
||||
- && word_mode == DImode);
|
||||
-
|
||||
/* Strategy phase. How many ops will this take and should we expand it? */
|
||||
|
||||
unsigned HOST_WIDE_INT offset = 0;
|
||||
machine_mode load_mode =
|
||||
- select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
|
||||
+ select_block_compare_mode (offset, bytes, base_align);
|
||||
unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
|
||||
/* We don't want to generate too much code. The loop code can take
|
||||
@@ -1445,8 +1443,7 @@ expand_block_compare (rtx operands[])
|
||||
while (bytes > 0)
|
||||
{
|
||||
unsigned int align = compute_current_alignment (base_align, offset);
|
||||
- load_mode = select_block_compare_mode (offset, bytes,
|
||||
- align, word_mode_ok);
|
||||
+ load_mode = select_block_compare_mode (offset, bytes, align);
|
||||
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
if (bytes >= load_mode_size)
|
||||
cmp_bytes = load_mode_size;
|
||||
@@ -1698,6 +1695,189 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
|
||||
LABEL_NUSES (strncmp_label) += 1;
|
||||
}
|
||||
|
||||
+/* Generate the sequence of compares for strcmp/strncmp using gpr instructions.
|
||||
+ BYTES_TO_COMPARE is the number of bytes to be compared.
|
||||
+ BASE_ALIGN is the smaller of the alignment of the two strings.
|
||||
+ ORIG_SRC1 is the unmodified rtx for the first string.
|
||||
+ ORIG_SRC2 is the unmodified rtx for the second string.
|
||||
+ TMP_REG_SRC1 is the register for loading the first string.
|
||||
+ TMP_REG_SRC2 is the register for loading the second string.
|
||||
+ RESULT_REG is the rtx for the result register.
|
||||
+ EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
|
||||
+ to strcmp/strncmp if we have equality at the end of the inline comparison.
|
||||
+ CLEANUP_LABEL is rtx for a label we generate if we need code to clean up
|
||||
+ and generate the final comparison result.
|
||||
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
+ set the final result. */
|
||||
+static void
|
||||
+expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
+ unsigned int base_align,
|
||||
+ rtx orig_src1, rtx orig_src2,
|
||||
+ rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
|
||||
+ bool equality_compare_rest, rtx &cleanup_label,
|
||||
+ rtx final_move_label)
|
||||
+{
|
||||
+ unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
|
||||
+ machine_mode load_mode;
|
||||
+ unsigned int load_mode_size;
|
||||
+ unsigned HOST_WIDE_INT cmp_bytes = 0;
|
||||
+ unsigned HOST_WIDE_INT offset = 0;
|
||||
+ rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
||||
+ rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
||||
+
|
||||
+ while (bytes_to_compare > 0)
|
||||
+ {
|
||||
+ /* GPR compare sequence:
|
||||
+ check each 8B with: ld/ld cmpd bne
|
||||
+ If equal, use rldicr/cmpb to check for zero byte.
|
||||
+ cleanup code at end:
|
||||
+ cmpb get byte that differs
|
||||
+ cmpb look for zero byte
|
||||
+ orc combine
|
||||
+ cntlzd get bit of first zero/diff byte
|
||||
+ subfic convert for rldcl use
|
||||
+ rldcl rldcl extract diff/zero byte
|
||||
+ subf subtract for final result
|
||||
+
|
||||
+ The last compare can branch around the cleanup code if the
|
||||
+ result is zero because the strings are exactly equal. */
|
||||
+
|
||||
+ unsigned int align = compute_current_alignment (base_align, offset);
|
||||
+ load_mode = select_block_compare_mode (offset, bytes_to_compare, align);
|
||||
+ load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
+ if (bytes_to_compare >= load_mode_size)
|
||||
+ cmp_bytes = load_mode_size;
|
||||
+ else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||
+ {
|
||||
+ /* Move this load back so it doesn't go past the end.
|
||||
+ P8/P9 can do this efficiently. */
|
||||
+ unsigned int extra_bytes = load_mode_size - bytes_to_compare;
|
||||
+ cmp_bytes = bytes_to_compare;
|
||||
+ if (extra_bytes < offset)
|
||||
+ {
|
||||
+ offset -= extra_bytes;
|
||||
+ cmp_bytes = load_mode_size;
|
||||
+ bytes_to_compare = cmp_bytes;
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ /* P7 and earlier can't do the overlapping load trick fast,
|
||||
+ so this forces a non-overlapping load and a shift to get
|
||||
+ rid of the extra bytes. */
|
||||
+ cmp_bytes = bytes_to_compare;
|
||||
+
|
||||
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
|
||||
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
|
||||
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
|
||||
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
|
||||
+
|
||||
+ /* We must always left-align the data we read, and
|
||||
+ clear any bytes to the right that are beyond the string.
|
||||
+ Otherwise the cmpb sequence won't produce the correct
|
||||
+ results. The beginning of the compare will be done
|
||||
+ with word_mode so will not have any extra shifts or
|
||||
+ clear rights. */
|
||||
+
|
||||
+ if (load_mode_size < word_mode_size)
|
||||
+ {
|
||||
+ /* Rotate left first. */
|
||||
+ rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
||||
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
||||
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
||||
+ }
|
||||
+
|
||||
+ if (cmp_bytes < word_mode_size)
|
||||
+ {
|
||||
+ /* Now clear right. This plus the rotate can be
|
||||
+ turned into a rldicr instruction. */
|
||||
+ HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
||||
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
||||
+ }
|
||||
+
|
||||
+ /* Cases to handle. A and B are chunks of the two strings.
|
||||
+ 1: Not end of comparison:
|
||||
+ A != B: branch to cleanup code to compute result.
|
||||
+ A == B: check for 0 byte, next block if not found.
|
||||
+ 2: End of the inline comparison:
|
||||
+ A != B: branch to cleanup code to compute result.
|
||||
+ A == B: check for 0 byte, call strcmp/strncmp
|
||||
+ 3: compared requested N bytes:
|
||||
+ A == B: branch to result 0.
|
||||
+ A != B: cleanup code to compute result. */
|
||||
+
|
||||
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
||||
+
|
||||
+ rtx dst_label;
|
||||
+ if (remain > 0 || equality_compare_rest)
|
||||
+ {
|
||||
+ /* Branch to cleanup code, otherwise fall through to do
|
||||
+ more compares. */
|
||||
+ if (!cleanup_label)
|
||||
+ cleanup_label = gen_label_rtx ();
|
||||
+ dst_label = cleanup_label;
|
||||
+ }
|
||||
+ else
|
||||
+ /* Branch to end and produce result of 0. */
|
||||
+ dst_label = final_move_label;
|
||||
+
|
||||
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
||||
+ rtx cond = gen_reg_rtx (CCmode);
|
||||
+
|
||||
+ /* Always produce the 0 result, it is needed if
|
||||
+ cmpb finds a 0 byte in this chunk. */
|
||||
+ rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
|
||||
+ rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
|
||||
+
|
||||
+ rtx cmp_rtx;
|
||||
+ if (remain == 0 && !equality_compare_rest)
|
||||
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
|
||||
+ else
|
||||
+ cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
||||
+
|
||||
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
+ lab_ref, pc_rtx);
|
||||
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
+ JUMP_LABEL (j) = dst_label;
|
||||
+ LABEL_NUSES (dst_label) += 1;
|
||||
+
|
||||
+ if (remain > 0 || equality_compare_rest)
|
||||
+ {
|
||||
+ /* Generate a cmpb to test for a 0 byte and branch
|
||||
+ to final result if found. */
|
||||
+ rtx cmpb_zero = gen_reg_rtx (word_mode);
|
||||
+ rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
+ rtx condz = gen_reg_rtx (CCmode);
|
||||
+ rtx zero_reg = gen_reg_rtx (word_mode);
|
||||
+ emit_move_insn (zero_reg, GEN_INT (0));
|
||||
+ do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
|
||||
+
|
||||
+ if (cmp_bytes < word_mode_size)
|
||||
+ {
|
||||
+ /* Don't want to look at zero bytes past end. */
|
||||
+ HOST_WIDE_INT mb =
|
||||
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
+ do_and3 (cmpb_zero, cmpb_zero, mask);
|
||||
+ }
|
||||
+
|
||||
+ emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
|
||||
+ rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
|
||||
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
|
||||
+ lab_ref_fin, pc_rtx);
|
||||
+ rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
+ JUMP_LABEL (j2) = final_move_label;
|
||||
+ LABEL_NUSES (final_move_label) += 1;
|
||||
+
|
||||
+ }
|
||||
+
|
||||
+ offset += cmp_bytes;
|
||||
+ bytes_to_compare -= cmp_bytes;
|
||||
+ }
|
||||
+
|
||||
+}
|
||||
+
|
||||
/* Generate the final sequence that identifies the differing
|
||||
byte and generates the final result, taking into account
|
||||
zero bytes:
|
||||
@@ -1797,7 +1977,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
bytes_rtx = operands[3];
|
||||
align_rtx = operands[4];
|
||||
}
|
||||
- unsigned HOST_WIDE_INT cmp_bytes = 0;
|
||||
+
|
||||
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
||||
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
||||
|
||||
@@ -1822,11 +2002,6 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
|
||||
gcc_assert (GET_MODE (target) == SImode);
|
||||
|
||||
- /* If we have an LE target without ldbrx and word_mode is DImode,
|
||||
- then we must avoid using word_mode. */
|
||||
- int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
|
||||
- && word_mode == DImode);
|
||||
-
|
||||
unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
|
||||
|
||||
unsigned HOST_WIDE_INT offset = 0;
|
||||
@@ -1839,7 +2014,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
bytes = UINTVAL (bytes_rtx);
|
||||
|
||||
machine_mode load_mode =
|
||||
- select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
|
||||
+ select_block_compare_mode (0, bytes, base_align);
|
||||
unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
compare_length = rs6000_string_compare_inline_limit * load_mode_size;
|
||||
|
||||
@@ -1867,6 +2042,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx begin_compare_label = NULL;
|
||||
unsigned int required_align = 8;
|
||||
|
||||
+ required_align = 8;
|
||||
+
|
||||
if (base_align < required_align)
|
||||
{
|
||||
/* Generate code that checks distance to 4k boundary for this case. */
|
||||
@@ -1952,159 +2129,15 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
|
||||
/* Generate a sequence of GPR or VEC/VSX instructions to compare out
|
||||
to the length specified. */
|
||||
- unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
|
||||
- while (bytes_to_compare > 0)
|
||||
- {
|
||||
- /* GPR compare sequence:
|
||||
- check each 8B with: ld/ld cmpd bne
|
||||
- If equal, use rldicr/cmpb to check for zero byte.
|
||||
- cleanup code at end:
|
||||
- cmpb get byte that differs
|
||||
- cmpb look for zero byte
|
||||
- orc combine
|
||||
- cntlzd get bit of first zero/diff byte
|
||||
- subfic convert for rldcl use
|
||||
- rldcl rldcl extract diff/zero byte
|
||||
- subf subtract for final result
|
||||
-
|
||||
- The last compare can branch around the cleanup code if the
|
||||
- result is zero because the strings are exactly equal. */
|
||||
-
|
||||
- unsigned int align = compute_current_alignment (base_align, offset);
|
||||
- load_mode = select_block_compare_mode (offset, bytes_to_compare,
|
||||
- align, word_mode_ok);
|
||||
- load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
- if (bytes_to_compare >= load_mode_size)
|
||||
- cmp_bytes = load_mode_size;
|
||||
- else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||
- {
|
||||
- /* Move this load back so it doesn't go past the end.
|
||||
- P8/P9 can do this efficiently. */
|
||||
- unsigned int extra_bytes = load_mode_size - bytes_to_compare;
|
||||
- cmp_bytes = bytes_to_compare;
|
||||
- if (extra_bytes < offset)
|
||||
- {
|
||||
- offset -= extra_bytes;
|
||||
- cmp_bytes = load_mode_size;
|
||||
- bytes_to_compare = cmp_bytes;
|
||||
- }
|
||||
- }
|
||||
- else
|
||||
- /* P7 and earlier can't do the overlapping load trick fast,
|
||||
- so this forces a non-overlapping load and a shift to get
|
||||
- rid of the extra bytes. */
|
||||
- cmp_bytes = bytes_to_compare;
|
||||
-
|
||||
- rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
|
||||
- do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
|
||||
- rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
|
||||
- do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
|
||||
-
|
||||
- /* We must always left-align the data we read, and
|
||||
- clear any bytes to the right that are beyond the string.
|
||||
- Otherwise the cmpb sequence won't produce the correct
|
||||
- results. The beginning of the compare will be done
|
||||
- with word_mode so will not have any extra shifts or
|
||||
- clear rights. */
|
||||
-
|
||||
- if (load_mode_size < word_mode_size)
|
||||
- {
|
||||
- /* Rotate left first. */
|
||||
- rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
||||
- do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
||||
- do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
||||
- }
|
||||
-
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
- {
|
||||
- /* Now clear right. This plus the rotate can be
|
||||
- turned into a rldicr instruction. */
|
||||
- HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
||||
- do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
||||
- }
|
||||
-
|
||||
- /* Cases to handle. A and B are chunks of the two strings.
|
||||
- 1: Not end of comparison:
|
||||
- A != B: branch to cleanup code to compute result.
|
||||
- A == B: check for 0 byte, next block if not found.
|
||||
- 2: End of the inline comparison:
|
||||
- A != B: branch to cleanup code to compute result.
|
||||
- A == B: check for 0 byte, call strcmp/strncmp
|
||||
- 3: compared requested N bytes:
|
||||
- A == B: branch to result 0.
|
||||
- A != B: cleanup code to compute result. */
|
||||
-
|
||||
- unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
||||
-
|
||||
- rtx dst_label;
|
||||
- if (remain > 0 || equality_compare_rest)
|
||||
- {
|
||||
- /* Branch to cleanup code, otherwise fall through to do
|
||||
- more compares. */
|
||||
- if (!cleanup_label)
|
||||
- cleanup_label = gen_label_rtx ();
|
||||
- dst_label = cleanup_label;
|
||||
- }
|
||||
- else
|
||||
- /* Branch to end and produce result of 0. */
|
||||
- dst_label = final_move_label;
|
||||
-
|
||||
- rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
||||
- rtx cond = gen_reg_rtx (CCmode);
|
||||
-
|
||||
- /* Always produce the 0 result, it is needed if
|
||||
- cmpb finds a 0 byte in this chunk. */
|
||||
- rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
|
||||
- rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
|
||||
-
|
||||
- rtx cmp_rtx;
|
||||
- if (remain == 0 && !equality_compare_rest)
|
||||
- cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
|
||||
- else
|
||||
- cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
||||
-
|
||||
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
- lab_ref, pc_rtx);
|
||||
- rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
- JUMP_LABEL (j) = dst_label;
|
||||
- LABEL_NUSES (dst_label) += 1;
|
||||
-
|
||||
- if (remain > 0 || equality_compare_rest)
|
||||
- {
|
||||
- /* Generate a cmpb to test for a 0 byte and branch
|
||||
- to final result if found. */
|
||||
- rtx cmpb_zero = gen_reg_rtx (word_mode);
|
||||
- rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
- rtx condz = gen_reg_rtx (CCmode);
|
||||
- rtx zero_reg = gen_reg_rtx (word_mode);
|
||||
- emit_move_insn (zero_reg, GEN_INT (0));
|
||||
- do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
|
||||
-
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
- {
|
||||
- /* Don't want to look at zero bytes past end. */
|
||||
- HOST_WIDE_INT mb =
|
||||
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- do_and3 (cmpb_zero, cmpb_zero, mask);
|
||||
- }
|
||||
-
|
||||
- emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
|
||||
- rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
|
||||
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
|
||||
- lab_ref_fin, pc_rtx);
|
||||
- rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
- JUMP_LABEL (j2) = final_move_label;
|
||||
- LABEL_NUSES (final_move_label) += 1;
|
||||
-
|
||||
- }
|
||||
-
|
||||
- offset += cmp_bytes;
|
||||
- bytes_to_compare -= cmp_bytes;
|
||||
- }
|
||||
-
|
||||
+ expand_strncmp_gpr_sequence(compare_length, base_align,
|
||||
+ orig_src1, orig_src2,
|
||||
+ tmp_reg_src1, tmp_reg_src2,
|
||||
+ result_reg,
|
||||
+ equality_compare_rest,
|
||||
+ cleanup_label, final_move_label);
|
||||
+
|
||||
+ offset = compare_length;
|
||||
+
|
||||
if (equality_compare_rest)
|
||||
{
|
||||
/* Update pointers past what has been compared already. */
|
613
SOURCES/gcc8-rh1652929-3.patch
Normal file
613
SOURCES/gcc8-rh1652929-3.patch
Normal file
@ -0,0 +1,613 @@
|
||||
commit e4108e7e619dcf7f21224382bc37ba2ef651eb43
|
||||
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Thu Aug 30 18:17:00 2018 +0000
|
||||
|
||||
2018-08-30 Aaron Sawdey <acsawdey@linux.ibm.com>
|
||||
|
||||
* config/rs6000/altivec.md (altivec_eq<mode>): Remove star.
|
||||
(altivec_vcmpequ<VI_char>_p): Remove star.
|
||||
* config/rs6000/rs6000-string.c (do_load_for_compare): Support
|
||||
vector load modes.
|
||||
(expand_strncmp_vec_sequence): New function.
|
||||
(emit_final_str_compare_vec): New function.
|
||||
(expand_strn_compare): Add support for vector strncmp.
|
||||
* config/rs6000/rs6000.opt (-mstring-compare-inline-limit): Change
|
||||
length specification to bytes.
|
||||
* config/rs6000/vsx.md (vsx_ld_elemrev_v16qi_internal): Remove star.
|
||||
(vcmpnezb_p): New pattern.
|
||||
* doc/invoke.texi (RS/6000 and PowerPC Options): Update documentation
|
||||
for option -mstring-compare-inline-limit.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@263991 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
|
||||
index 13f4654db6a..db4f926bd15 100644
|
||||
--- a/gcc/config/rs6000/altivec.md
|
||||
+++ b/gcc/config/rs6000/altivec.md
|
||||
@@ -608,7 +608,7 @@
|
||||
"vcmpbfp %0,%1,%2"
|
||||
[(set_attr "type" "veccmp")])
|
||||
|
||||
-(define_insn "*altivec_eq<mode>"
|
||||
+(define_insn "altivec_eq<mode>"
|
||||
[(set (match_operand:VI2 0 "altivec_register_operand" "=v")
|
||||
(eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
|
||||
(match_operand:VI2 2 "altivec_register_operand" "v")))]
|
||||
@@ -2438,7 +2438,7 @@
|
||||
|
||||
;; Compare vectors producing a vector result and a predicate, setting CR6 to
|
||||
;; indicate a combined status
|
||||
-(define_insn "*altivec_vcmpequ<VI_char>_p"
|
||||
+(define_insn "altivec_vcmpequ<VI_char>_p"
|
||||
[(set (reg:CC CR6_REGNO)
|
||||
(unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
|
||||
(match_operand:VI2 2 "register_operand" "v"))]
|
||||
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
||||
index 451e9ed33da..ff0414586d0 100644
|
||||
--- a/gcc/config/rs6000/rs6000-string.c
|
||||
+++ b/gcc/config/rs6000/rs6000-string.c
|
||||
@@ -157,6 +157,33 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
|
||||
{
|
||||
switch (GET_MODE (reg))
|
||||
{
|
||||
+ case E_V16QImode:
|
||||
+ switch (mode)
|
||||
+ {
|
||||
+ case E_V16QImode:
|
||||
+ if (!BYTES_BIG_ENDIAN)
|
||||
+ {
|
||||
+ if (TARGET_P9_VECTOR)
|
||||
+ emit_insn (gen_vsx_ld_elemrev_v16qi_internal (reg, mem));
|
||||
+ else
|
||||
+ {
|
||||
+ rtx reg_v2di = simplify_gen_subreg (V2DImode, reg,
|
||||
+ V16QImode, 0);
|
||||
+ gcc_assert (MEM_P (mem));
|
||||
+ rtx addr = XEXP (mem, 0);
|
||||
+ rtx mem_v2di = gen_rtx_MEM (V2DImode, addr);
|
||||
+ MEM_COPY_ATTRIBUTES (mem_v2di, mem);
|
||||
+ set_mem_size (mem, GET_MODE_SIZE (V2DImode));
|
||||
+ emit_insn (gen_vsx_ld_elemrev_v2di (reg_v2di, mem_v2di));
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ emit_insn (gen_vsx_movv2di_64bit (reg, mem));
|
||||
+ break;
|
||||
+ default:
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ break;
|
||||
case E_DImode:
|
||||
switch (mode)
|
||||
{
|
||||
@@ -227,6 +254,12 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
|
||||
gcc_unreachable ();
|
||||
}
|
||||
break;
|
||||
+
|
||||
+ case E_QImode:
|
||||
+ gcc_assert (mode == E_QImode);
|
||||
+ emit_move_insn (reg, mem);
|
||||
+ break;
|
||||
+
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
break;
|
||||
@@ -1705,17 +1738,17 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
|
||||
RESULT_REG is the rtx for the result register.
|
||||
EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
|
||||
to strcmp/strncmp if we have equality at the end of the inline comparison.
|
||||
- CLEANUP_LABEL is rtx for a label we generate if we need code to clean up
|
||||
- and generate the final comparison result.
|
||||
+ P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code
|
||||
+ to clean up and generate the final comparison result.
|
||||
FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
set the final result. */
|
||||
static void
|
||||
-expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
- unsigned int base_align,
|
||||
- rtx orig_src1, rtx orig_src2,
|
||||
- rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
|
||||
- bool equality_compare_rest, rtx &cleanup_label,
|
||||
- rtx final_move_label)
|
||||
+expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
+ unsigned int base_align,
|
||||
+ rtx orig_src1, rtx orig_src2,
|
||||
+ rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
|
||||
+ bool equality_compare_rest, rtx *p_cleanup_label,
|
||||
+ rtx final_move_label)
|
||||
{
|
||||
unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
|
||||
machine_mode load_mode;
|
||||
@@ -1724,6 +1757,8 @@ expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
unsigned HOST_WIDE_INT offset = 0;
|
||||
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
||||
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
||||
+ gcc_assert (p_cleanup_label != NULL);
|
||||
+ rtx cleanup_label = *p_cleanup_label;
|
||||
|
||||
while (bytes_to_compare > 0)
|
||||
{
|
||||
@@ -1876,6 +1911,178 @@ expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
bytes_to_compare -= cmp_bytes;
|
||||
}
|
||||
|
||||
+ *p_cleanup_label = cleanup_label;
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
|
||||
+ instructions.
|
||||
+
|
||||
+ BYTES_TO_COMPARE is the number of bytes to be compared.
|
||||
+ ORIG_SRC1 is the unmodified rtx for the first string.
|
||||
+ ORIG_SRC2 is the unmodified rtx for the second string.
|
||||
+ S1ADDR is the register to use for the base address of the first string.
|
||||
+ S2ADDR is the register to use for the base address of the second string.
|
||||
+ OFF_REG is the register to use for the string offset for loads.
|
||||
+ S1DATA is the register for loading the first string.
|
||||
+ S2DATA is the register for loading the second string.
|
||||
+ VEC_RESULT is the rtx for the vector result indicating the byte difference.
|
||||
+ EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
|
||||
+ to strcmp/strncmp if we have equality at the end of the inline comparison.
|
||||
+ P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code to clean up
|
||||
+ and generate the final comparison result.
|
||||
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
+ set the final result. */
|
||||
+static void
|
||||
+expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
+ rtx orig_src1, rtx orig_src2,
|
||||
+ rtx s1addr, rtx s2addr, rtx off_reg,
|
||||
+ rtx s1data, rtx s2data,
|
||||
+ rtx vec_result, bool equality_compare_rest,
|
||||
+ rtx *p_cleanup_label, rtx final_move_label)
|
||||
+{
|
||||
+ machine_mode load_mode;
|
||||
+ unsigned int load_mode_size;
|
||||
+ unsigned HOST_WIDE_INT cmp_bytes = 0;
|
||||
+ unsigned HOST_WIDE_INT offset = 0;
|
||||
+
|
||||
+ gcc_assert (p_cleanup_label != NULL);
|
||||
+ rtx cleanup_label = *p_cleanup_label;
|
||||
+
|
||||
+ emit_move_insn (s1addr, force_reg (Pmode, XEXP (orig_src1, 0)));
|
||||
+ emit_move_insn (s2addr, force_reg (Pmode, XEXP (orig_src2, 0)));
|
||||
+
|
||||
+ unsigned int i;
|
||||
+ rtx zr[16];
|
||||
+ for (i = 0; i < 16; i++)
|
||||
+ zr[i] = GEN_INT (0);
|
||||
+ rtvec zv = gen_rtvec_v (16, zr);
|
||||
+ rtx zero_reg = gen_reg_rtx (V16QImode);
|
||||
+ rs6000_expand_vector_init (zero_reg, gen_rtx_PARALLEL (V16QImode, zv));
|
||||
+
|
||||
+ while (bytes_to_compare > 0)
|
||||
+ {
|
||||
+ /* VEC/VSX compare sequence for P8:
|
||||
+ check each 16B with:
|
||||
+ lxvd2x 32,28,8
|
||||
+ lxvd2x 33,29,8
|
||||
+ vcmpequb 2,0,1 # compare strings
|
||||
+ vcmpequb 4,0,3 # compare w/ 0
|
||||
+ xxlorc 37,36,34 # first FF byte is either mismatch or end of string
|
||||
+ vcmpequb. 7,5,3 # reg 7 contains 0
|
||||
+ bnl 6,.Lmismatch
|
||||
+
|
||||
+ For the P8 LE case, we use lxvd2x and compare full 16 bytes
|
||||
+ but then use use vgbbd and a shift to get two bytes with the
|
||||
+ information we need in the correct order.
|
||||
+
|
||||
+ VEC/VSX compare sequence if TARGET_P9_VECTOR:
|
||||
+ lxvb16x/lxvb16x # load 16B of each string
|
||||
+ vcmpnezb. # produces difference location or zero byte location
|
||||
+ bne 6,.Lmismatch
|
||||
+
|
||||
+ Use the overlapping compare trick for the last block if it is
|
||||
+ less than 16 bytes.
|
||||
+ */
|
||||
+
|
||||
+ load_mode = V16QImode;
|
||||
+ load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
+
|
||||
+ if (bytes_to_compare >= load_mode_size)
|
||||
+ cmp_bytes = load_mode_size;
|
||||
+ else
|
||||
+ {
|
||||
+ /* Move this load back so it doesn't go past the end. P8/P9
|
||||
+ can do this efficiently. This is never called with less
|
||||
+ than 16 bytes so we should always be able to do this. */
|
||||
+ unsigned int extra_bytes = load_mode_size - bytes_to_compare;
|
||||
+ cmp_bytes = bytes_to_compare;
|
||||
+ gcc_assert (offset > extra_bytes);
|
||||
+ offset -= extra_bytes;
|
||||
+ cmp_bytes = load_mode_size;
|
||||
+ bytes_to_compare = cmp_bytes;
|
||||
+ }
|
||||
+
|
||||
+ /* The offset currently used is always kept in off_reg so that the
|
||||
+ cleanup code on P8 can use it to extract the differing byte. */
|
||||
+ emit_move_insn (off_reg, GEN_INT (offset));
|
||||
+
|
||||
+ rtx addr1 = gen_rtx_PLUS (Pmode, s1addr, off_reg);
|
||||
+ do_load_for_compare_from_addr (load_mode, s1data, addr1, orig_src1);
|
||||
+ rtx addr2 = gen_rtx_PLUS (Pmode, s2addr, off_reg);
|
||||
+ do_load_for_compare_from_addr (load_mode, s2data, addr2, orig_src2);
|
||||
+
|
||||
+ /* Cases to handle. A and B are chunks of the two strings.
|
||||
+ 1: Not end of comparison:
|
||||
+ A != B: branch to cleanup code to compute result.
|
||||
+ A == B: next block
|
||||
+ 2: End of the inline comparison:
|
||||
+ A != B: branch to cleanup code to compute result.
|
||||
+ A == B: call strcmp/strncmp
|
||||
+ 3: compared requested N bytes:
|
||||
+ A == B: branch to result 0.
|
||||
+ A != B: cleanup code to compute result. */
|
||||
+
|
||||
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
||||
+
|
||||
+ if (TARGET_P9_VECTOR)
|
||||
+ emit_insn (gen_vcmpnezb_p (vec_result, s1data, s2data));
|
||||
+ else
|
||||
+ {
|
||||
+ /* Emit instructions to do comparison and zero check. */
|
||||
+ rtx cmp_res = gen_reg_rtx (load_mode);
|
||||
+ rtx cmp_zero = gen_reg_rtx (load_mode);
|
||||
+ rtx cmp_combined = gen_reg_rtx (load_mode);
|
||||
+ emit_insn (gen_altivec_eqv16qi (cmp_res, s1data, s2data));
|
||||
+ emit_insn (gen_altivec_eqv16qi (cmp_zero, s1data, zero_reg));
|
||||
+ emit_insn (gen_orcv16qi3 (vec_result, cmp_zero, cmp_res));
|
||||
+ emit_insn (gen_altivec_vcmpequb_p (cmp_combined, vec_result, zero_reg));
|
||||
+ }
|
||||
+
|
||||
+ bool branch_to_cleanup = (remain > 0 || equality_compare_rest);
|
||||
+ rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO);
|
||||
+ rtx dst_label;
|
||||
+ rtx cmp_rtx;
|
||||
+ if (branch_to_cleanup)
|
||||
+ {
|
||||
+ /* Branch to cleanup code, otherwise fall through to do more
|
||||
+ compares. P8 and P9 use different CR bits because on P8
|
||||
+ we are looking at the result of a comparsion vs a
|
||||
+ register of zeroes so the all-true condition means no
|
||||
+ difference or zero was found. On P9, vcmpnezb sets a byte
|
||||
+ to 0xff if there is a mismatch or zero, so the all-false
|
||||
+ condition indicates we found no difference or zero. */
|
||||
+ if (!cleanup_label)
|
||||
+ cleanup_label = gen_label_rtx ();
|
||||
+ dst_label = cleanup_label;
|
||||
+ if (TARGET_P9_VECTOR)
|
||||
+ cmp_rtx = gen_rtx_NE (VOIDmode, cr6, const0_rtx);
|
||||
+ else
|
||||
+ cmp_rtx = gen_rtx_GE (VOIDmode, cr6, const0_rtx);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* Branch to final return or fall through to cleanup,
|
||||
+ result is already set to 0. */
|
||||
+ dst_label = final_move_label;
|
||||
+ if (TARGET_P9_VECTOR)
|
||||
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cr6, const0_rtx);
|
||||
+ else
|
||||
+ cmp_rtx = gen_rtx_LT (VOIDmode, cr6, const0_rtx);
|
||||
+ }
|
||||
+
|
||||
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
||||
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
+ lab_ref, pc_rtx);
|
||||
+ rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
+ JUMP_LABEL (j2) = dst_label;
|
||||
+ LABEL_NUSES (dst_label) += 1;
|
||||
+
|
||||
+ offset += cmp_bytes;
|
||||
+ bytes_to_compare -= cmp_bytes;
|
||||
+ }
|
||||
+ *p_cleanup_label = cleanup_label;
|
||||
+ return;
|
||||
}
|
||||
|
||||
/* Generate the final sequence that identifies the differing
|
||||
@@ -1948,6 +2155,96 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
return;
|
||||
}
|
||||
|
||||
+/* Generate the final sequence that identifies the differing
|
||||
+ byte and generates the final result, taking into account
|
||||
+ zero bytes:
|
||||
+
|
||||
+ P8:
|
||||
+ vgbbd 0,0
|
||||
+ vsldoi 0,0,0,9
|
||||
+ mfvsrd 9,32
|
||||
+ addi 10,9,-1 # count trailing zero bits
|
||||
+ andc 9,10,9
|
||||
+ popcntd 9,9
|
||||
+ lbzx 10,28,9 # use that offset to load differing byte
|
||||
+ lbzx 3,29,9
|
||||
+ subf 3,3,10 # subtract for final result
|
||||
+
|
||||
+ P9:
|
||||
+ vclzlsbb # counts trailing bytes with lsb=0
|
||||
+ vextublx # extract differing byte
|
||||
+
|
||||
+ STR1 is the reg rtx for data from string 1.
|
||||
+ STR2 is the reg rtx for data from string 2.
|
||||
+ RESULT is the reg rtx for the comparison result.
|
||||
+ S1ADDR is the register to use for the base address of the first string.
|
||||
+ S2ADDR is the register to use for the base address of the second string.
|
||||
+ ORIG_SRC1 is the unmodified rtx for the first string.
|
||||
+ ORIG_SRC2 is the unmodified rtx for the second string.
|
||||
+ OFF_REG is the register to use for the string offset for loads.
|
||||
+ VEC_RESULT is the rtx for the vector result indicating the byte difference.
|
||||
+ */
|
||||
+
|
||||
+static void
|
||||
+emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
|
||||
+ rtx s1addr, rtx s2addr,
|
||||
+ rtx orig_src1, rtx orig_src2,
|
||||
+ rtx off_reg, rtx vec_result)
|
||||
+{
|
||||
+ if (TARGET_P9_VECTOR)
|
||||
+ {
|
||||
+ rtx diffix = gen_reg_rtx (SImode);
|
||||
+ rtx chr1 = gen_reg_rtx (SImode);
|
||||
+ rtx chr2 = gen_reg_rtx (SImode);
|
||||
+ rtx chr1_di = simplify_gen_subreg (DImode, chr1, SImode, 0);
|
||||
+ rtx chr2_di = simplify_gen_subreg (DImode, chr2, SImode, 0);
|
||||
+ emit_insn (gen_vclzlsbb_v16qi (diffix, vec_result));
|
||||
+ emit_insn (gen_vextublx (chr1, diffix, str1));
|
||||
+ emit_insn (gen_vextublx (chr2, diffix, str2));
|
||||
+ do_sub3 (result, chr1_di, chr2_di);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ rtx diffix = gen_reg_rtx (DImode);
|
||||
+ rtx result_gbbd = gen_reg_rtx (V16QImode);
|
||||
+ /* Since each byte of the input is either 00 or FF, the bytes in
|
||||
+ dw0 and dw1 after vgbbd are all identical to each other. */
|
||||
+ emit_insn (gen_p8v_vgbbd (result_gbbd, vec_result));
|
||||
+ /* For LE, we shift by 9 and get BA in the low two bytes then CTZ.
|
||||
+ For BE, we shift by 7 and get AB in the high two bytes then CLZ. */
|
||||
+ rtx result_shifted = gen_reg_rtx (V16QImode);
|
||||
+ int shift_amt = (BYTES_BIG_ENDIAN) ? 7 : 9;
|
||||
+ emit_insn (gen_altivec_vsldoi_v16qi (result_shifted,result_gbbd,result_gbbd, GEN_INT (shift_amt)));
|
||||
+
|
||||
+ rtx diffix_df = simplify_gen_subreg (DFmode, diffix, DImode, 0);
|
||||
+ emit_insn (gen_p8_mfvsrd_3_v16qi (diffix_df, result_shifted));
|
||||
+ rtx count = gen_reg_rtx (DImode);
|
||||
+
|
||||
+ if (BYTES_BIG_ENDIAN)
|
||||
+ emit_insn (gen_clzdi2 (count, diffix));
|
||||
+ else
|
||||
+ emit_insn (gen_ctzdi2 (count, diffix));
|
||||
+
|
||||
+ /* P8 doesn't have a good solution for extracting one byte from
|
||||
+ a vsx reg like vextublx on P9 so we just compute the offset
|
||||
+ of the differing byte and load it from each string. */
|
||||
+ do_add3 (off_reg, off_reg, count);
|
||||
+
|
||||
+ rtx chr1 = gen_reg_rtx (QImode);
|
||||
+ rtx chr2 = gen_reg_rtx (QImode);
|
||||
+ rtx addr1 = gen_rtx_PLUS (Pmode, s1addr, off_reg);
|
||||
+ do_load_for_compare_from_addr (QImode, chr1, addr1, orig_src1);
|
||||
+ rtx addr2 = gen_rtx_PLUS (Pmode, s2addr, off_reg);
|
||||
+ do_load_for_compare_from_addr (QImode, chr2, addr2, orig_src2);
|
||||
+ machine_mode rmode = GET_MODE (result);
|
||||
+ rtx chr1_rm = simplify_gen_subreg (rmode, chr1, QImode, 0);
|
||||
+ rtx chr2_rm = simplify_gen_subreg (rmode, chr2, QImode, 0);
|
||||
+ do_sub3 (result, chr1_rm, chr2_rm);
|
||||
+ }
|
||||
+
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
/* Expand a string compare operation with length, and return
|
||||
true if successful. Return false if we should let the
|
||||
compiler generate normal code, probably a strncmp call.
|
||||
@@ -2002,21 +2299,43 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
|
||||
gcc_assert (GET_MODE (target) == SImode);
|
||||
|
||||
- unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
|
||||
+ unsigned int required_align = 8;
|
||||
|
||||
unsigned HOST_WIDE_INT offset = 0;
|
||||
unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */
|
||||
unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */
|
||||
+
|
||||
if (no_length)
|
||||
- /* Use this as a standin to determine the mode to use. */
|
||||
- bytes = rs6000_string_compare_inline_limit * word_mode_size;
|
||||
+ bytes = rs6000_string_compare_inline_limit;
|
||||
else
|
||||
bytes = UINTVAL (bytes_rtx);
|
||||
|
||||
- machine_mode load_mode =
|
||||
- select_block_compare_mode (0, bytes, base_align);
|
||||
- unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
- compare_length = rs6000_string_compare_inline_limit * load_mode_size;
|
||||
+ /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
|
||||
+ least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
|
||||
+ at least POWER8. That way we can rely on overlapping compares to
|
||||
+ do the final comparison of less than 16 bytes. Also I do not want
|
||||
+ to deal with making this work for 32 bits. */
|
||||
+ int use_vec = (bytes >= 16 && !TARGET_32BIT && TARGET_EFFICIENT_UNALIGNED_VSX);
|
||||
+
|
||||
+ if (use_vec)
|
||||
+ required_align = 16;
|
||||
+
|
||||
+ machine_mode load_mode;
|
||||
+ rtx tmp_reg_src1, tmp_reg_src2;
|
||||
+ if (use_vec)
|
||||
+ {
|
||||
+ load_mode = V16QImode;
|
||||
+ tmp_reg_src1 = gen_reg_rtx (V16QImode);
|
||||
+ tmp_reg_src2 = gen_reg_rtx (V16QImode);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ load_mode = select_block_compare_mode (0, bytes, base_align);
|
||||
+ tmp_reg_src1 = gen_reg_rtx (word_mode);
|
||||
+ tmp_reg_src2 = gen_reg_rtx (word_mode);
|
||||
+ }
|
||||
+
|
||||
+ compare_length = rs6000_string_compare_inline_limit;
|
||||
|
||||
/* If we have equality at the end of the last compare and we have not
|
||||
found the end of the string, we need to call strcmp/strncmp to
|
||||
@@ -2040,10 +2359,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx final_move_label = gen_label_rtx ();
|
||||
rtx final_label = gen_label_rtx ();
|
||||
rtx begin_compare_label = NULL;
|
||||
- unsigned int required_align = 8;
|
||||
-
|
||||
- required_align = 8;
|
||||
-
|
||||
+
|
||||
if (base_align < required_align)
|
||||
{
|
||||
/* Generate code that checks distance to 4k boundary for this case. */
|
||||
@@ -2060,7 +2376,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
the subsequent code generation are in agreement so we do not
|
||||
go past the length we tested for a 4k boundary crossing. */
|
||||
unsigned HOST_WIDE_INT align_test = compare_length;
|
||||
- if (align_test < 8)
|
||||
+ if (align_test < required_align)
|
||||
{
|
||||
align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test);
|
||||
base_align = align_test;
|
||||
@@ -2102,7 +2418,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
else
|
||||
{
|
||||
/* -m32 -mpowerpc64 results in word_mode being DImode even
|
||||
- though otherwise it is 32-bit. The length arg to strncmp
|
||||
+ though otherwise it is 32-bit. The length arg to strncmp
|
||||
is a size_t which will be the same size as pointers. */
|
||||
rtx len_rtx = gen_reg_rtx (Pmode);
|
||||
emit_move_insn (len_rtx, gen_int_mode (bytes, Pmode));
|
||||
@@ -2124,17 +2440,32 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
}
|
||||
|
||||
rtx cleanup_label = NULL;
|
||||
- rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
|
||||
- rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
|
||||
+ rtx s1addr = NULL, s2addr = NULL, off_reg = NULL, vec_result = NULL;
|
||||
|
||||
/* Generate a sequence of GPR or VEC/VSX instructions to compare out
|
||||
to the length specified. */
|
||||
- expand_strncmp_gpr_sequence(compare_length, base_align,
|
||||
- orig_src1, orig_src2,
|
||||
- tmp_reg_src1, tmp_reg_src2,
|
||||
- result_reg,
|
||||
- equality_compare_rest,
|
||||
- cleanup_label, final_move_label);
|
||||
+ if (use_vec)
|
||||
+ {
|
||||
+ s1addr = gen_reg_rtx (Pmode);
|
||||
+ s2addr = gen_reg_rtx (Pmode);
|
||||
+ off_reg = gen_reg_rtx (Pmode);
|
||||
+ vec_result = gen_reg_rtx (load_mode);
|
||||
+ emit_move_insn (result_reg, GEN_INT (0));
|
||||
+ expand_strncmp_vec_sequence (compare_length,
|
||||
+ orig_src1, orig_src2,
|
||||
+ s1addr, s2addr, off_reg,
|
||||
+ tmp_reg_src1, tmp_reg_src2,
|
||||
+ vec_result,
|
||||
+ equality_compare_rest,
|
||||
+ &cleanup_label, final_move_label);
|
||||
+ }
|
||||
+ else
|
||||
+ expand_strncmp_gpr_sequence (compare_length, base_align,
|
||||
+ orig_src1, orig_src2,
|
||||
+ tmp_reg_src1, tmp_reg_src2,
|
||||
+ result_reg,
|
||||
+ equality_compare_rest,
|
||||
+ &cleanup_label, final_move_label);
|
||||
|
||||
offset = compare_length;
|
||||
|
||||
@@ -2174,7 +2505,12 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
if (cleanup_label)
|
||||
emit_label (cleanup_label);
|
||||
|
||||
- emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
|
||||
+ if (use_vec)
|
||||
+ emit_final_str_compare_vec (tmp_reg_src1, tmp_reg_src2, result_reg,
|
||||
+ s1addr, s2addr, orig_src1, orig_src2,
|
||||
+ off_reg, vec_result);
|
||||
+ else
|
||||
+ emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
|
||||
|
||||
emit_label (final_move_label);
|
||||
emit_insn (gen_movsi (target,
|
||||
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
|
||||
index ace8a477550..ad1b8a29ac6 100644
|
||||
--- a/gcc/config/rs6000/rs6000.opt
|
||||
+++ b/gcc/config/rs6000/rs6000.opt
|
||||
@@ -342,8 +342,8 @@ Target Report Var(rs6000_block_compare_inline_loop_limit) Init(-1) RejectNegativ
|
||||
Max number of bytes to compare with loops.
|
||||
|
||||
mstring-compare-inline-limit=
|
||||
-Target Report Var(rs6000_string_compare_inline_limit) Init(8) RejectNegative Joined UInteger Save
|
||||
-Max number of pairs of load insns for compare.
|
||||
+Target Report Var(rs6000_string_compare_inline_limit) Init(64) RejectNegative Joined UInteger Save
|
||||
+Max number of bytes to compare.
|
||||
|
||||
misel
|
||||
Target Report Mask(ISEL) Var(rs6000_isa_flags)
|
||||
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
|
||||
index e6921e96a3d..01fb4213001 100644
|
||||
--- a/gcc/config/rs6000/vsx.md
|
||||
+++ b/gcc/config/rs6000/vsx.md
|
||||
@@ -1429,7 +1429,7 @@
|
||||
}
|
||||
})
|
||||
|
||||
-(define_insn "*vsx_ld_elemrev_v16qi_internal"
|
||||
+(define_insn "vsx_ld_elemrev_v16qi_internal"
|
||||
[(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
|
||||
(vec_select:V16QI
|
||||
(match_operand:V16QI 1 "memory_operand" "Z")
|
||||
@@ -5107,6 +5107,22 @@
|
||||
"vcmpnezb %0,%1,%2"
|
||||
[(set_attr "type" "vecsimple")])
|
||||
|
||||
+;; Vector Compare Not Equal or Zero Byte predicate or record-form
|
||||
+(define_insn "vcmpnezb_p"
|
||||
+ [(set (reg:CC CR6_REGNO)
|
||||
+ (unspec:CC
|
||||
+ [(match_operand:V16QI 1 "altivec_register_operand" "v")
|
||||
+ (match_operand:V16QI 2 "altivec_register_operand" "v")]
|
||||
+ UNSPEC_VCMPNEZB))
|
||||
+ (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
|
||||
+ (unspec:V16QI
|
||||
+ [(match_dup 1)
|
||||
+ (match_dup 2)]
|
||||
+ UNSPEC_VCMPNEZB))]
|
||||
+ "TARGET_P9_VECTOR"
|
||||
+ "vcmpnezb. %0,%1,%2"
|
||||
+ [(set_attr "type" "vecsimple")])
|
||||
+
|
||||
;; Vector Compare Not Equal Half Word (specified/not+eq:)
|
||||
(define_insn "vcmpneh"
|
||||
[(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index f2dd12b3d73..291e414fea2 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -24165,12 +24165,10 @@ target-specific.
|
||||
|
||||
@item -mstring-compare-inline-limit=@var{num}
|
||||
@opindex mstring-compare-inline-limit
|
||||
-Generate at most @var{num} pairs of load instructions to compare the
|
||||
-string inline. If the difference or end of string is not found at the
|
||||
+Compare at most @var{num} string bytes with inline code.
|
||||
+If the difference or end of string is not found at the
|
||||
end of the inline compare a call to @code{strcmp} or @code{strncmp} will
|
||||
-take care of the rest of the comparison. The default is 8 pairs of
|
||||
-loads, which will compare 64 bytes on a 64-bit target and 32 bytes on a
|
||||
-32-bit target.
|
||||
+take care of the rest of the comparison. The default is 64 bytes.
|
||||
|
||||
@item -G @var{num}
|
||||
@opindex G
|
40
SOURCES/gcc8-rh1652929-4.patch
Normal file
40
SOURCES/gcc8-rh1652929-4.patch
Normal file
@ -0,0 +1,40 @@
|
||||
commit 6f1a7440d9aac59fba0f2e2d8d0a9a0b82f480cb
|
||||
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Tue Oct 2 17:31:53 2018 +0000
|
||||
|
||||
2018-10-02 Aaron Sawdey <acsawdey@linux.ibm.com>
|
||||
|
||||
PR target/87474
|
||||
* config/rs6000/rs6000-string.c (expand_strn_compare): Check that both
|
||||
P8_VECTOR and VSX are enabled.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@264799 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
||||
index ff0414586d0..9c25bad97a1 100644
|
||||
--- a/gcc/config/rs6000/rs6000-string.c
|
||||
+++ b/gcc/config/rs6000/rs6000-string.c
|
||||
@@ -2205,6 +2205,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
|
||||
}
|
||||
else
|
||||
{
|
||||
+ gcc_assert (TARGET_P8_VECTOR);
|
||||
rtx diffix = gen_reg_rtx (DImode);
|
||||
rtx result_gbbd = gen_reg_rtx (V16QImode);
|
||||
/* Since each byte of the input is either 00 or FF, the bytes in
|
||||
@@ -2313,9 +2314,12 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
/* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
|
||||
least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
|
||||
at least POWER8. That way we can rely on overlapping compares to
|
||||
- do the final comparison of less than 16 bytes. Also I do not want
|
||||
- to deal with making this work for 32 bits. */
|
||||
- int use_vec = (bytes >= 16 && !TARGET_32BIT && TARGET_EFFICIENT_UNALIGNED_VSX);
|
||||
+ do the final comparison of less than 16 bytes. Also I do not
|
||||
+ want to deal with making this work for 32 bits. In addition, we
|
||||
+ have to make sure that we have at least P8_VECTOR (we don't allow
|
||||
+ P9_VECTOR without P8_VECTOR). */
|
||||
+ int use_vec = (bytes >= 16 && !TARGET_32BIT
|
||||
+ && TARGET_EFFICIENT_UNALIGNED_VSX && TARGET_P8_VECTOR);
|
||||
|
||||
if (use_vec)
|
||||
required_align = 16;
|
510
SOURCES/gcc8-rh1652929-5.patch
Normal file
510
SOURCES/gcc8-rh1652929-5.patch
Normal file
@ -0,0 +1,510 @@
|
||||
commit 08869d85bd2a7ec5468b3bb3f01a930eb93e6381
|
||||
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Fri Oct 26 19:33:31 2018 +0000
|
||||
|
||||
2018-10-26 Aaron Sawdey <acsawdey@linux.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000-string.c (expand_strncmp_gpr_sequence): Change to
|
||||
a shorter sequence with fewer branches.
|
||||
(emit_final_str_compare_gpr): Ditto.
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@265546 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
|
||||
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
||||
index 9c25bad97a1..96729d9663c 100644
|
||||
--- a/gcc/config/rs6000/rs6000-string.c
|
||||
+++ b/gcc/config/rs6000/rs6000-string.c
|
||||
@@ -259,7 +259,7 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
|
||||
gcc_assert (mode == E_QImode);
|
||||
emit_move_insn (reg, mem);
|
||||
break;
|
||||
-
|
||||
+
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
break;
|
||||
@@ -726,7 +726,7 @@ expand_compare_loop (rtx operands[])
|
||||
{
|
||||
if (GET_MODE_SIZE (GET_MODE (bytes_rtx)) > GET_MODE_SIZE (word_mode))
|
||||
/* Do not expect length longer than word_mode. */
|
||||
- return false;
|
||||
+ return false;
|
||||
else if (GET_MODE_SIZE (GET_MODE (bytes_rtx)) < GET_MODE_SIZE (word_mode))
|
||||
{
|
||||
bytes_rtx = force_reg (GET_MODE (bytes_rtx), bytes_rtx);
|
||||
@@ -770,7 +770,7 @@ expand_compare_loop (rtx operands[])
|
||||
rtx j;
|
||||
|
||||
/* Example of generated code for 35 bytes aligned 1 byte.
|
||||
-
|
||||
+
|
||||
mtctr 8
|
||||
li 6,0
|
||||
li 5,8
|
||||
@@ -798,7 +798,7 @@ expand_compare_loop (rtx operands[])
|
||||
popcntd 9,9
|
||||
subfe 10,10,10
|
||||
or 9,9,10
|
||||
-
|
||||
+
|
||||
Compiled with -fno-reorder-blocks for clarity. */
|
||||
|
||||
/* Structure of what we're going to do:
|
||||
@@ -1041,7 +1041,7 @@ expand_compare_loop (rtx operands[])
|
||||
if (!bytes_is_const)
|
||||
{
|
||||
/* If we're dealing with runtime length, we have to check if
|
||||
- it's zero after the loop. When length is known at compile
|
||||
+ it's zero after the loop. When length is known at compile
|
||||
time the no-remainder condition is dealt with above. By
|
||||
doing this after cleanup_label, we also deal with the
|
||||
case where length is 0 at the start and we bypass the
|
||||
@@ -1411,7 +1411,7 @@ expand_block_compare (rtx operands[])
|
||||
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
|
||||
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
|
||||
/* P7/P8 code uses cond for subfc. but P9 uses
|
||||
- it for cmpld which needs CCUNSmode. */
|
||||
+ it for cmpld which needs CCUNSmode. */
|
||||
rtx cond;
|
||||
if (TARGET_P9_MISC)
|
||||
cond = gen_reg_rtx (CCUNSmode);
|
||||
@@ -1655,7 +1655,7 @@ expand_block_compare (rtx operands[])
|
||||
emit_label (convert_label);
|
||||
|
||||
/* We need to produce DI result from sub, then convert to target SI
|
||||
- while maintaining <0 / ==0 / >0 properties. This sequence works:
|
||||
+ while maintaining <0 / ==0 / >0 properties. This sequence works:
|
||||
subfc L,A,B
|
||||
subfe H,H,H
|
||||
popcntd L,L
|
||||
@@ -1740,7 +1740,7 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
|
||||
to strcmp/strncmp if we have equality at the end of the inline comparison.
|
||||
P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code
|
||||
to clean up and generate the final comparison result.
|
||||
- FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
set the final result. */
|
||||
static void
|
||||
expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
@@ -1763,12 +1763,9 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
while (bytes_to_compare > 0)
|
||||
{
|
||||
/* GPR compare sequence:
|
||||
- check each 8B with: ld/ld cmpd bne
|
||||
- If equal, use rldicr/cmpb to check for zero byte.
|
||||
+ check each 8B with: ld/ld/cmpb/cmpb/orc./bne
|
||||
+
|
||||
cleanup code at end:
|
||||
- cmpb get byte that differs
|
||||
- cmpb look for zero byte
|
||||
- orc combine
|
||||
cntlzd get bit of first zero/diff byte
|
||||
subfic convert for rldcl use
|
||||
rldcl rldcl extract diff/zero byte
|
||||
@@ -1776,7 +1773,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
|
||||
The last compare can branch around the cleanup code if the
|
||||
result is zero because the strings are exactly equal. */
|
||||
-
|
||||
+
|
||||
unsigned int align = compute_current_alignment (base_align, offset);
|
||||
load_mode = select_block_compare_mode (offset, bytes_to_compare, align);
|
||||
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
@@ -1801,34 +1798,49 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
rid of the extra bytes. */
|
||||
cmp_bytes = bytes_to_compare;
|
||||
|
||||
- rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
|
||||
+ rtx offset_reg = gen_reg_rtx (Pmode);
|
||||
+ emit_move_insn (offset_reg, GEN_INT (offset));
|
||||
+
|
||||
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, offset_reg);
|
||||
do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
|
||||
- rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
|
||||
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, offset_reg);
|
||||
do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
|
||||
|
||||
/* We must always left-align the data we read, and
|
||||
clear any bytes to the right that are beyond the string.
|
||||
Otherwise the cmpb sequence won't produce the correct
|
||||
- results. The beginning of the compare will be done
|
||||
- with word_mode so will not have any extra shifts or
|
||||
- clear rights. */
|
||||
+ results. However if there is only one byte left, we
|
||||
+ can just subtract to get the final result so the shifts
|
||||
+ and clears are not needed. */
|
||||
|
||||
- if (load_mode_size < word_mode_size)
|
||||
- {
|
||||
- /* Rotate left first. */
|
||||
- rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
||||
- do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
||||
- do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
||||
- }
|
||||
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
||||
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
+ /* Loading just a single byte is a special case. If we are
|
||||
+ loading more than that, we have to check whether we are
|
||||
+ looking at the entire chunk of data. If not, rotate left and
|
||||
+ clear right so that bytes we aren't supposed to look at are
|
||||
+ zeroed, and the first byte we are supposed to compare is
|
||||
+ leftmost. */
|
||||
+ if (load_mode_size != 1)
|
||||
{
|
||||
- /* Now clear right. This plus the rotate can be
|
||||
- turned into a rldicr instruction. */
|
||||
- HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
||||
- do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
||||
+ if (load_mode_size < word_mode_size)
|
||||
+ {
|
||||
+ /* Rotate left first. */
|
||||
+ rtx sh = GEN_INT (BITS_PER_UNIT
|
||||
+ * (word_mode_size - load_mode_size));
|
||||
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
||||
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
||||
+ }
|
||||
+
|
||||
+ if (cmp_bytes < word_mode_size)
|
||||
+ {
|
||||
+ /* Now clear right. This plus the rotate can be
|
||||
+ turned into a rldicr instruction. */
|
||||
+ HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
||||
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Cases to handle. A and B are chunks of the two strings.
|
||||
@@ -1842,8 +1854,6 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
A == B: branch to result 0.
|
||||
A != B: cleanup code to compute result. */
|
||||
|
||||
- unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
||||
-
|
||||
rtx dst_label;
|
||||
if (remain > 0 || equality_compare_rest)
|
||||
{
|
||||
@@ -1857,54 +1867,89 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
/* Branch to end and produce result of 0. */
|
||||
dst_label = final_move_label;
|
||||
|
||||
- rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
||||
- rtx cond = gen_reg_rtx (CCmode);
|
||||
+ if (load_mode_size == 1)
|
||||
+ {
|
||||
+ /* Special case for comparing just single byte. */
|
||||
+ if (equality_compare_rest)
|
||||
+ {
|
||||
+ /* Use subf./bne to branch to final_move_label if the
|
||||
+ byte differs, otherwise fall through to the strncmp
|
||||
+ call. We must also check for a zero byte here as we
|
||||
+ must not make the library call if this is the end of
|
||||
+ the string. */
|
||||
+
|
||||
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
+ rtx cond = gen_reg_rtx (CCmode);
|
||||
+ rtx diff_rtx = gen_rtx_MINUS (word_mode,
|
||||
+ tmp_reg_src1, tmp_reg_src2);
|
||||
+ rs6000_emit_dot_insn (result_reg, diff_rtx, 2, cond);
|
||||
+ rtx cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
||||
+
|
||||
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
+ lab_ref, pc_rtx);
|
||||
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
+ JUMP_LABEL (j) = final_move_label;
|
||||
+ LABEL_NUSES (final_move_label) += 1;
|
||||
|
||||
- /* Always produce the 0 result, it is needed if
|
||||
- cmpb finds a 0 byte in this chunk. */
|
||||
- rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
|
||||
- rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
|
||||
+ /* Check for zero byte here before fall through to
|
||||
+ library call. This catches the case where the
|
||||
+ strings are equal and end in a zero byte at this
|
||||
+ position. */
|
||||
|
||||
- rtx cmp_rtx;
|
||||
- if (remain == 0 && !equality_compare_rest)
|
||||
- cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
|
||||
- else
|
||||
- cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
||||
+ rtx cond0 = gen_reg_rtx (CCmode);
|
||||
+ emit_move_insn (cond0, gen_rtx_COMPARE (CCmode, tmp_reg_src1,
|
||||
+ const0_rtx));
|
||||
|
||||
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
- lab_ref, pc_rtx);
|
||||
- rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
- JUMP_LABEL (j) = dst_label;
|
||||
- LABEL_NUSES (dst_label) += 1;
|
||||
+ rtx cmp0eq_rtx = gen_rtx_EQ (VOIDmode, cond0, const0_rtx);
|
||||
|
||||
- if (remain > 0 || equality_compare_rest)
|
||||
+ rtx ifelse0 = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp0eq_rtx,
|
||||
+ lab_ref, pc_rtx);
|
||||
+ rtx j0 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse0));
|
||||
+ JUMP_LABEL (j0) = final_move_label;
|
||||
+ LABEL_NUSES (final_move_label) += 1;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* This is the last byte to be compared so we can use
|
||||
+ subf to compute the final result and branch
|
||||
+ unconditionally to final_move_label. */
|
||||
+
|
||||
+ do_sub3 (result_reg, tmp_reg_src1, tmp_reg_src2);
|
||||
+
|
||||
+ rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
|
||||
+ JUMP_LABEL (j) = final_move_label;
|
||||
+ LABEL_NUSES (final_move_label) += 1;
|
||||
+ emit_barrier ();
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
{
|
||||
- /* Generate a cmpb to test for a 0 byte and branch
|
||||
- to final result if found. */
|
||||
rtx cmpb_zero = gen_reg_rtx (word_mode);
|
||||
- rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
||||
- rtx condz = gen_reg_rtx (CCmode);
|
||||
+ rtx cmpb_diff = gen_reg_rtx (word_mode);
|
||||
rtx zero_reg = gen_reg_rtx (word_mode);
|
||||
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
||||
+ rtx cond = gen_reg_rtx (CCmode);
|
||||
+
|
||||
emit_move_insn (zero_reg, GEN_INT (0));
|
||||
+ do_cmpb3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2);
|
||||
do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
|
||||
+ rtx not_diff = gen_rtx_NOT (word_mode, cmpb_diff);
|
||||
+ rtx orc_rtx = gen_rtx_IOR (word_mode, not_diff, cmpb_zero);
|
||||
|
||||
- if (cmp_bytes < word_mode_size)
|
||||
- {
|
||||
- /* Don't want to look at zero bytes past end. */
|
||||
- HOST_WIDE_INT mb =
|
||||
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
||||
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
||||
- do_and3 (cmpb_zero, cmpb_zero, mask);
|
||||
- }
|
||||
+ rs6000_emit_dot_insn (result_reg, orc_rtx, 2, cond);
|
||||
|
||||
- emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
|
||||
- rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
|
||||
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
|
||||
- lab_ref_fin, pc_rtx);
|
||||
- rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
- JUMP_LABEL (j2) = final_move_label;
|
||||
- LABEL_NUSES (final_move_label) += 1;
|
||||
+ rtx cmp_rtx;
|
||||
+ if (remain == 0 && !equality_compare_rest)
|
||||
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
|
||||
+ else
|
||||
+ cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
||||
|
||||
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
||||
+ lab_ref, pc_rtx);
|
||||
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
||||
+ JUMP_LABEL (j) = dst_label;
|
||||
+ LABEL_NUSES (dst_label) += 1;
|
||||
}
|
||||
|
||||
offset += cmp_bytes;
|
||||
@@ -1915,7 +1960,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
return;
|
||||
}
|
||||
|
||||
-/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
|
||||
+/* Generate the sequence of compares for strcmp/strncmp using vec/vsx
|
||||
instructions.
|
||||
|
||||
BYTES_TO_COMPARE is the number of bytes to be compared.
|
||||
@@ -1931,7 +1976,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
to strcmp/strncmp if we have equality at the end of the inline comparison.
|
||||
P_CLEANUP_LABEL is a pointer to rtx for a label we generate if we need code to clean up
|
||||
and generate the final comparison result.
|
||||
- FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
||||
set the final result. */
|
||||
static void
|
||||
expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
@@ -1982,12 +2027,12 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
bne 6,.Lmismatch
|
||||
|
||||
Use the overlapping compare trick for the last block if it is
|
||||
- less than 16 bytes.
|
||||
+ less than 16 bytes.
|
||||
*/
|
||||
|
||||
load_mode = V16QImode;
|
||||
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||
-
|
||||
+
|
||||
if (bytes_to_compare >= load_mode_size)
|
||||
cmp_bytes = load_mode_size;
|
||||
else
|
||||
@@ -2046,10 +2091,10 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
if (branch_to_cleanup)
|
||||
{
|
||||
/* Branch to cleanup code, otherwise fall through to do more
|
||||
- compares. P8 and P9 use different CR bits because on P8
|
||||
+ compares. P8 and P9 use different CR bits because on P8
|
||||
we are looking at the result of a comparsion vs a
|
||||
register of zeroes so the all-true condition means no
|
||||
- difference or zero was found. On P9, vcmpnezb sets a byte
|
||||
+ difference or zero was found. On P9, vcmpnezb sets a byte
|
||||
to 0xff if there is a mismatch or zero, so the all-false
|
||||
condition indicates we found no difference or zero. */
|
||||
if (!cleanup_label)
|
||||
@@ -2062,7 +2107,7 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
}
|
||||
else
|
||||
{
|
||||
- /* Branch to final return or fall through to cleanup,
|
||||
+ /* Branch to final return or fall through to cleanup,
|
||||
result is already set to 0. */
|
||||
dst_label = final_move_label;
|
||||
if (TARGET_P9_VECTOR)
|
||||
@@ -2088,10 +2133,7 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
|
||||
/* Generate the final sequence that identifies the differing
|
||||
byte and generates the final result, taking into account
|
||||
zero bytes:
|
||||
-
|
||||
- cmpb cmpb_result1, src1, src2
|
||||
- cmpb cmpb_result2, src1, zero
|
||||
- orc cmpb_result1, cmp_result1, cmpb_result2
|
||||
+
|
||||
cntlzd get bit of first zero/diff byte
|
||||
addi convert for rldcl use
|
||||
rldcl rldcl extract diff/zero byte
|
||||
@@ -2105,10 +2147,7 @@ static void
|
||||
emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
{
|
||||
machine_mode m = GET_MODE (str1);
|
||||
- rtx cmpb_diff = gen_reg_rtx (m);
|
||||
- rtx cmpb_zero = gen_reg_rtx (m);
|
||||
rtx rot_amt = gen_reg_rtx (m);
|
||||
- rtx zero_reg = gen_reg_rtx (m);
|
||||
|
||||
rtx rot1_1 = gen_reg_rtx (m);
|
||||
rtx rot1_2 = gen_reg_rtx (m);
|
||||
@@ -2117,12 +2156,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
|
||||
if (m == SImode)
|
||||
{
|
||||
- emit_insn (gen_cmpbsi3 (cmpb_diff, str1, str2));
|
||||
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbsi3 (cmpb_zero, str1, zero_reg));
|
||||
- emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
|
||||
- emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
- emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
|
||||
+ emit_insn (gen_clzsi2 (rot_amt, result));
|
||||
emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
emit_insn (gen_rotlsi3 (rot1_1, str1,
|
||||
gen_lowpart (SImode, rot_amt)));
|
||||
@@ -2134,12 +2168,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
}
|
||||
else if (m == DImode)
|
||||
{
|
||||
- emit_insn (gen_cmpbdi3 (cmpb_diff, str1, str2));
|
||||
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
|
||||
- emit_insn (gen_cmpbdi3 (cmpb_zero, str1, zero_reg));
|
||||
- emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
|
||||
- emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
||||
- emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
|
||||
+ emit_insn (gen_clzdi2 (rot_amt, result));
|
||||
emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
|
||||
emit_insn (gen_rotldi3 (rot1_1, str1,
|
||||
gen_lowpart (SImode, rot_amt)));
|
||||
@@ -2151,7 +2180,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
-
|
||||
+
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2169,10 +2198,10 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
||||
lbzx 10,28,9 # use that offset to load differing byte
|
||||
lbzx 3,29,9
|
||||
subf 3,3,10 # subtract for final result
|
||||
-
|
||||
+
|
||||
P9:
|
||||
vclzlsbb # counts trailing bytes with lsb=0
|
||||
- vextublx # extract differing byte
|
||||
+ vextublx # extract differing byte
|
||||
|
||||
STR1 is the reg rtx for data from string 1.
|
||||
STR2 is the reg rtx for data from string 2.
|
||||
@@ -2208,7 +2237,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
|
||||
gcc_assert (TARGET_P8_VECTOR);
|
||||
rtx diffix = gen_reg_rtx (DImode);
|
||||
rtx result_gbbd = gen_reg_rtx (V16QImode);
|
||||
- /* Since each byte of the input is either 00 or FF, the bytes in
|
||||
+ /* Since each byte of the input is either 00 or FF, the bytes in
|
||||
dw0 and dw1 after vgbbd are all identical to each other. */
|
||||
emit_insn (gen_p8v_vgbbd (result_gbbd, vec_result));
|
||||
/* For LE, we shift by 9 and get BA in the low two bytes then CTZ.
|
||||
@@ -2226,7 +2255,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
|
||||
else
|
||||
emit_insn (gen_ctzdi2 (count, diffix));
|
||||
|
||||
- /* P8 doesn't have a good solution for extracting one byte from
|
||||
+ /* P8 doesn't have a good solution for extracting one byte from
|
||||
a vsx reg like vextublx on P9 so we just compute the offset
|
||||
of the differing byte and load it from each string. */
|
||||
do_add3 (off_reg, off_reg, count);
|
||||
@@ -2247,7 +2276,7 @@ emit_final_str_compare_vec (rtx str1, rtx str2, rtx result,
|
||||
}
|
||||
|
||||
/* Expand a string compare operation with length, and return
|
||||
- true if successful. Return false if we should let the
|
||||
+ true if successful. Return false if we should let the
|
||||
compiler generate normal code, probably a strncmp call.
|
||||
|
||||
OPERANDS[0] is the target (result).
|
||||
@@ -2279,9 +2308,9 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
||||
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
||||
|
||||
- /* If we have a length, it must be constant. This simplifies things
|
||||
+ /* If we have a length, it must be constant. This simplifies things
|
||||
a bit as we don't have to generate code to check if we've exceeded
|
||||
- the length. Later this could be expanded to handle this case. */
|
||||
+ the length. Later this could be expanded to handle this case. */
|
||||
if (!no_length && !CONST_INT_P (bytes_rtx))
|
||||
return false;
|
||||
|
||||
@@ -2311,7 +2340,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
else
|
||||
bytes = UINTVAL (bytes_rtx);
|
||||
|
||||
- /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
|
||||
+ /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
|
||||
least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
|
||||
at least POWER8. That way we can rely on overlapping compares to
|
||||
do the final comparison of less than 16 bytes. Also I do not
|
||||
@@ -2363,7 +2392,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
rtx final_move_label = gen_label_rtx ();
|
||||
rtx final_label = gen_label_rtx ();
|
||||
rtx begin_compare_label = NULL;
|
||||
-
|
||||
+
|
||||
if (base_align < required_align)
|
||||
{
|
||||
/* Generate code that checks distance to 4k boundary for this case. */
|
||||
@@ -2472,7 +2501,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
||||
&cleanup_label, final_move_label);
|
||||
|
||||
offset = compare_length;
|
||||
-
|
||||
+
|
||||
if (equality_compare_rest)
|
||||
{
|
||||
/* Update pointers past what has been compared already. */
|
40
SOURCES/gcc8-sparc-config-detection.patch
Normal file
40
SOURCES/gcc8-sparc-config-detection.patch
Normal file
@ -0,0 +1,40 @@
|
||||
--- gcc/config.gcc.jj 2008-04-24 15:42:46.000000000 -0500
|
||||
+++ gcc/config.gcc 2008-04-24 15:44:51.000000000 -0500
|
||||
@@ -2790,7 +2790,7 @@ sparc-*-rtems*)
|
||||
tm_file="${tm_file} dbxelf.h elfos.h sparc/sysv4.h sparc/sp-elf.h sparc/rtemself.h rtems.h newlib-stdint.h"
|
||||
tmake_file="${tmake_file} sparc/t-sparc sparc/t-rtems"
|
||||
;;
|
||||
-sparc-*-linux*)
|
||||
+sparc-*-linux* | sparcv9-*-linux*)
|
||||
tm_file="${tm_file} dbxelf.h elfos.h sparc/sysv4.h gnu-user.h linux.h glibc-stdint.h sparc/tso.h"
|
||||
extra_options="${extra_options} sparc/long-double-switch.opt"
|
||||
case ${target} in
|
||||
@@ -2844,7 +2844,7 @@ sparc64-*-rtems*)
|
||||
extra_options="${extra_options}"
|
||||
tmake_file="${tmake_file} sparc/t-sparc sparc/t-rtems-64"
|
||||
;;
|
||||
-sparc64-*-linux*)
|
||||
+sparc64*-*-linux*)
|
||||
tm_file="sparc/biarch64.h ${tm_file} dbxelf.h elfos.h sparc/sysv4.h gnu-user.h linux.h glibc-stdint.h sparc/default64.h sparc/linux64.h sparc/tso.h"
|
||||
extra_options="${extra_options} sparc/long-double-switch.opt"
|
||||
tmake_file="${tmake_file} sparc/t-sparc sparc/t-linux64"
|
||||
--- libgcc/config.host.jj 2008-04-24 15:46:19.000000000 -0500
|
||||
+++ libgcc/config.host 2008-04-24 15:46:49.000000000 -0500
|
||||
@@ -1002,7 +1002,7 @@ sparc-*-elf*)
|
||||
tmake_file="${tmake_file} t-fdpbit t-crtfm"
|
||||
extra_parts="$extra_parts crti.o crtn.o crtfastmath.o"
|
||||
;;
|
||||
-sparc-*-linux*) # SPARC's running GNU/Linux, libc6
|
||||
+sparc-*-linux* | sparcv9-*-linux*) # SPARC's running GNU/Linux, libc6
|
||||
tmake_file="${tmake_file} t-crtfm"
|
||||
if test "${host_address}" = 64; then
|
||||
tmake_file="$tmake_file sparc/t-linux64"
|
||||
@@ -1050,7 +1050,7 @@ sparc64-*-freebsd*|ultrasparc-*-freebsd*
|
||||
tmake_file="$tmake_file t-crtfm"
|
||||
extra_parts="$extra_parts crtfastmath.o"
|
||||
;;
|
||||
-sparc64-*-linux*) # 64-bit SPARC's running GNU/Linux
|
||||
+sparc64*-*-linux*) # 64-bit SPARC's running GNU/Linux
|
||||
extra_parts="$extra_parts crtfastmath.o"
|
||||
tmake_file="${tmake_file} t-crtfm sparc/t-linux"
|
||||
if test "${host_address}" = 64; then
|
11
SOURCES/nvptx-tools-build.patch
Normal file
11
SOURCES/nvptx-tools-build.patch
Normal file
@ -0,0 +1,11 @@
|
||||
--- nvptx-tools/nvptx-as.c.jj 2017-01-20 12:40:18.000000000 +0100
|
||||
+++ nvptx-tools/nvptx-as.c 2017-01-20 12:43:53.864271442 +0100
|
||||
@@ -939,7 +939,7 @@ fork_execute (const char *prog, char *co
|
||||
fatal_error ("%s: %m", errmsg);
|
||||
}
|
||||
else
|
||||
- fatal_error (errmsg);
|
||||
+ fatal_error ("%s", errmsg);
|
||||
}
|
||||
do_wait (prog, pex);
|
||||
}
|
32
SOURCES/nvptx-tools-glibc.patch
Normal file
32
SOURCES/nvptx-tools-glibc.patch
Normal file
@ -0,0 +1,32 @@
|
||||
--- nvptx-tools/configure.ac.jj 2017-01-13 12:48:31.000000000 +0100
|
||||
+++ nvptx-tools/configure.ac 2017-05-03 10:26:57.076092259 +0200
|
||||
@@ -66,6 +66,8 @@ CPPFLAGS=$save_CPPFLAGS
|
||||
LDFLAGS=$save_LDFLAGS
|
||||
LIBS=$save_LIBS
|
||||
|
||||
+AC_CHECK_DECLS(getopt)
|
||||
+
|
||||
AC_CONFIG_SUBDIRS([libiberty])
|
||||
AC_CONFIG_FILES([Makefile dejagnu.exp])
|
||||
AC_OUTPUT
|
||||
--- nvptx-tools/configure.jj 2017-01-13 12:48:54.000000000 +0100
|
||||
+++ nvptx-tools/configure 2017-05-03 10:27:13.503876809 +0200
|
||||
@@ -3963,6 +3963,18 @@ CPPFLAGS=$save_CPPFLAGS
|
||||
LDFLAGS=$save_LDFLAGS
|
||||
LIBS=$save_LIBS
|
||||
|
||||
+ac_fn_c_check_decl "$LINENO" "getopt" "ac_cv_have_decl_getopt" "$ac_includes_default"
|
||||
+if test "x$ac_cv_have_decl_getopt" = x""yes; then :
|
||||
+ ac_have_decl=1
|
||||
+else
|
||||
+ ac_have_decl=0
|
||||
+fi
|
||||
+
|
||||
+cat >>confdefs.h <<_ACEOF
|
||||
+#define HAVE_DECL_GETOPT $ac_have_decl
|
||||
+_ACEOF
|
||||
+
|
||||
+
|
||||
|
||||
|
||||
subdirs="$subdirs libiberty"
|
947
SOURCES/nvptx-tools-no-ptxas.patch
Normal file
947
SOURCES/nvptx-tools-no-ptxas.patch
Normal file
@ -0,0 +1,947 @@
|
||||
--- nvptx-tools/configure.ac
|
||||
+++ nvptx-tools/configure.ac
|
||||
@@ -51,6 +51,7 @@ LIBS="$LIBS -lcuda"
|
||||
AC_CHECK_FUNCS([[cuGetErrorName] [cuGetErrorString]])
|
||||
AC_CHECK_DECLS([[cuGetErrorName], [cuGetErrorString]],
|
||||
[], [], [[#include <cuda.h>]])
|
||||
+AC_CHECK_HEADERS(unistd.h sys/stat.h)
|
||||
|
||||
AC_MSG_CHECKING([for extra programs to build requiring -lcuda])
|
||||
NVPTX_RUN=
|
||||
--- nvptx-tools/include/libiberty.h
|
||||
+++ nvptx-tools/include/libiberty.h
|
||||
@@ -390,6 +390,17 @@ extern void hex_init (void);
|
||||
/* Save files used for communication between processes. */
|
||||
#define PEX_SAVE_TEMPS 0x4
|
||||
|
||||
+/* Max number of alloca bytes per call before we must switch to malloc.
|
||||
+
|
||||
+ ?? Swiped from gnulib's regex_internal.h header. Is this actually
|
||||
+ the case? This number seems arbitrary, though sane.
|
||||
+
|
||||
+ The OS usually guarantees only one guard page at the bottom of the stack,
|
||||
+ and a page size can be as small as 4096 bytes. So we cannot safely
|
||||
+ allocate anything larger than 4096 bytes. Also care for the possibility
|
||||
+ of a few compiler-allocated temporary stack slots. */
|
||||
+#define MAX_ALLOCA_SIZE 4032
|
||||
+
|
||||
/* Prepare to execute one or more programs, with standard output of
|
||||
each program fed to standard input of the next.
|
||||
FLAGS As above.
|
||||
--- nvptx-tools/nvptx-as.c
|
||||
+++ nvptx-tools/nvptx-as.c
|
||||
@@ -30,6 +30,9 @@
|
||||
#include <string.h>
|
||||
#include <wait.h>
|
||||
#include <unistd.h>
|
||||
+#ifdef HAVE_SYS_STAT_H
|
||||
+#include <sys/stat.h>
|
||||
+#endif
|
||||
#include <errno.h>
|
||||
#define obstack_chunk_alloc malloc
|
||||
#define obstack_chunk_free free
|
||||
@@ -42,6 +45,38 @@
|
||||
|
||||
#include "version.h"
|
||||
|
||||
+#ifndef R_OK
|
||||
+#define R_OK 4
|
||||
+#define W_OK 2
|
||||
+#define X_OK 1
|
||||
+#endif
|
||||
+
|
||||
+#ifndef DIR_SEPARATOR
|
||||
+# define DIR_SEPARATOR '/'
|
||||
+#endif
|
||||
+
|
||||
+#if defined (_WIN32) || defined (__MSDOS__) \
|
||||
+ || defined (__DJGPP__) || defined (__OS2__)
|
||||
+# define HAVE_DOS_BASED_FILE_SYSTEM
|
||||
+# define HAVE_HOST_EXECUTABLE_SUFFIX
|
||||
+# define HOST_EXECUTABLE_SUFFIX ".exe"
|
||||
+# ifndef DIR_SEPARATOR_2
|
||||
+# define DIR_SEPARATOR_2 '\\'
|
||||
+# endif
|
||||
+# define PATH_SEPARATOR ';'
|
||||
+#else
|
||||
+# define PATH_SEPARATOR ':'
|
||||
+#endif
|
||||
+
|
||||
+#ifndef DIR_SEPARATOR_2
|
||||
+# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR)
|
||||
+#else
|
||||
+# define IS_DIR_SEPARATOR(ch) \
|
||||
+ (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2))
|
||||
+#endif
|
||||
+
|
||||
+#define DIR_UP ".."
|
||||
+
|
||||
static const char *outname = NULL;
|
||||
|
||||
static void __attribute__ ((format (printf, 1, 2)))
|
||||
@@ -816,7 +851,7 @@ traverse (void **slot, void *data)
|
||||
}
|
||||
|
||||
static void
|
||||
-process (FILE *in, FILE *out)
|
||||
+process (FILE *in, FILE *out, int verify, const char *outname)
|
||||
{
|
||||
symbol_table = htab_create (500, hash_string_hash, hash_string_eq,
|
||||
NULL);
|
||||
@@ -824,6 +859,18 @@ process (FILE *in, FILE *out)
|
||||
const char *input = read_file (in);
|
||||
Token *tok = tokenize (input);
|
||||
|
||||
+ /* By default, when ptxas is not in PATH, do minimalistic verification,
|
||||
+ just require that the first non-comment directive is .version. */
|
||||
+ if (verify < 0)
|
||||
+ {
|
||||
+ size_t i;
|
||||
+ for (i = 0; tok[i].kind == K_comment; i++)
|
||||
+ ;
|
||||
+ if (tok[i].kind != K_dotted || !is_keyword (&tok[i], "version"))
|
||||
+ fatal_error ("missing .version directive at start of file '%s'",
|
||||
+ outname);
|
||||
+ }
|
||||
+
|
||||
do
|
||||
tok = parse_file (tok);
|
||||
while (tok->kind);
|
||||
@@ -897,9 +944,83 @@ fork_execute (const char *prog, char *const *argv)
|
||||
do_wait (prog, pex);
|
||||
}
|
||||
|
||||
+/* Determine if progname is available in PATH. */
|
||||
+static bool
|
||||
+program_available (const char *progname)
|
||||
+{
|
||||
+ char *temp = getenv ("PATH");
|
||||
+ if (temp)
|
||||
+ {
|
||||
+ char *startp, *endp, *nstore, *alloc_ptr = NULL;
|
||||
+ size_t prefixlen = strlen (temp) + 1;
|
||||
+ size_t len;
|
||||
+ if (prefixlen < 2)
|
||||
+ prefixlen = 2;
|
||||
+
|
||||
+ len = prefixlen + strlen (progname) + 1;
|
||||
+#ifdef HAVE_HOST_EXECUTABLE_SUFFIX
|
||||
+ len += strlen (HOST_EXECUTABLE_SUFFIX);
|
||||
+#endif
|
||||
+ if (len < MAX_ALLOCA_SIZE)
|
||||
+ nstore = (char *) alloca (len);
|
||||
+ else
|
||||
+ alloc_ptr = nstore = (char *) malloc (len);
|
||||
+
|
||||
+ startp = endp = temp;
|
||||
+ while (1)
|
||||
+ {
|
||||
+ if (*endp == PATH_SEPARATOR || *endp == 0)
|
||||
+ {
|
||||
+ if (endp == startp)
|
||||
+ {
|
||||
+ nstore[0] = '.';
|
||||
+ nstore[1] = DIR_SEPARATOR;
|
||||
+ nstore[2] = '\0';
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ memcpy (nstore, startp, endp - startp);
|
||||
+ if (! IS_DIR_SEPARATOR (endp[-1]))
|
||||
+ {
|
||||
+ nstore[endp - startp] = DIR_SEPARATOR;
|
||||
+ nstore[endp - startp + 1] = 0;
|
||||
+ }
|
||||
+ else
|
||||
+ nstore[endp - startp] = 0;
|
||||
+ }
|
||||
+ strcat (nstore, progname);
|
||||
+ if (! access (nstore, X_OK)
|
||||
+#ifdef HAVE_HOST_EXECUTABLE_SUFFIX
|
||||
+ || ! access (strcat (nstore, HOST_EXECUTABLE_SUFFIX), X_OK)
|
||||
+#endif
|
||||
+ )
|
||||
+ {
|
||||
+#if defined (HAVE_SYS_STAT_H) && defined (S_ISREG)
|
||||
+ struct stat st;
|
||||
+ if (stat (nstore, &st) >= 0 && S_ISREG (st.st_mode))
|
||||
+#endif
|
||||
+ {
|
||||
+ free (alloc_ptr);
|
||||
+ return true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (*endp == 0)
|
||||
+ break;
|
||||
+ endp = startp = endp + 1;
|
||||
+ }
|
||||
+ else
|
||||
+ endp++;
|
||||
+ }
|
||||
+ free (alloc_ptr);
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
static struct option long_options[] = {
|
||||
{"traditional-format", no_argument, 0, 0 },
|
||||
{"save-temps", no_argument, 0, 0 },
|
||||
+ {"verify", no_argument, 0, 0 },
|
||||
{"no-verify", no_argument, 0, 0 },
|
||||
{"help", no_argument, 0, 'h' },
|
||||
{"version", no_argument, 0, 'V' },
|
||||
@@ -912,7 +1033,7 @@ main (int argc, char **argv)
|
||||
FILE *in = stdin;
|
||||
FILE *out = stdout;
|
||||
bool verbose __attribute__((unused)) = false;
|
||||
- bool verify = true;
|
||||
+ int verify = -1;
|
||||
const char *smver = "sm_30";
|
||||
|
||||
int o;
|
||||
@@ -923,7 +1044,9 @@ main (int argc, char **argv)
|
||||
{
|
||||
case 0:
|
||||
if (option_index == 2)
|
||||
- verify = false;
|
||||
+ verify = 1;
|
||||
+ else if (option_index == 3)
|
||||
+ verify = 0;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
@@ -948,7 +1071,8 @@ Usage: nvptx-none-as [option...] [asmfile]\n\
|
||||
Options:\n\
|
||||
-o FILE Write output to FILE\n\
|
||||
-v Be verbose\n\
|
||||
+ --verify Do verify output is acceptable to ptxas\n\
|
||||
--no-verify Do not verify output is acceptable to ptxas\n\
|
||||
--help Print this help and exit\n\
|
||||
--version Print version number and exit\n\
|
||||
\n\
|
||||
@@ -983,11 +1108,17 @@ This program has absolutely no warranty.\n",
|
||||
if (!in)
|
||||
fatal_error ("cannot open input ptx file");
|
||||
|
||||
- process (in, out);
|
||||
- if (outname)
|
||||
+ if (outname == NULL)
|
||||
+ verify = 0;
|
||||
+ else if (verify == -1)
|
||||
+ if (program_available ("ptxas"))
|
||||
+ verify = 1;
|
||||
+
|
||||
+ process (in, out, verify, outname);
|
||||
+ if (outname)
|
||||
fclose (out);
|
||||
|
||||
- if (verify && outname)
|
||||
+ if (verify > 0)
|
||||
{
|
||||
struct obstack argv_obstack;
|
||||
obstack_init (&argv_obstack);
|
||||
--- nvptx-tools/configure
|
||||
+++ nvptx-tools/configure
|
||||
@@ -168,7 +168,8 @@ test x\$exitcode = x0 || exit 1"
|
||||
as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
|
||||
as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
|
||||
eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
|
||||
- test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1"
|
||||
+ test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
|
||||
+test \$(( 1 + 1 )) = 2 || exit 1"
|
||||
if (eval "$as_required") 2>/dev/null; then :
|
||||
as_have_required=yes
|
||||
else
|
||||
@@ -552,11 +553,50 @@ PACKAGE_URL=
|
||||
|
||||
ac_unique_file="nvptx-tools"
|
||||
ac_unique_file="nvptx-as.c"
|
||||
+# Factoring default headers for most tests.
|
||||
+ac_includes_default="\
|
||||
+#include <stdio.h>
|
||||
+#ifdef HAVE_SYS_TYPES_H
|
||||
+# include <sys/types.h>
|
||||
+#endif
|
||||
+#ifdef HAVE_SYS_STAT_H
|
||||
+# include <sys/stat.h>
|
||||
+#endif
|
||||
+#ifdef STDC_HEADERS
|
||||
+# include <stdlib.h>
|
||||
+# include <stddef.h>
|
||||
+#else
|
||||
+# ifdef HAVE_STDLIB_H
|
||||
+# include <stdlib.h>
|
||||
+# endif
|
||||
+#endif
|
||||
+#ifdef HAVE_STRING_H
|
||||
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
|
||||
+# include <memory.h>
|
||||
+# endif
|
||||
+# include <string.h>
|
||||
+#endif
|
||||
+#ifdef HAVE_STRINGS_H
|
||||
+# include <strings.h>
|
||||
+#endif
|
||||
+#ifdef HAVE_INTTYPES_H
|
||||
+# include <inttypes.h>
|
||||
+#endif
|
||||
+#ifdef HAVE_STDINT_H
|
||||
+# include <stdint.h>
|
||||
+#endif
|
||||
+#ifdef HAVE_UNISTD_H
|
||||
+# include <unistd.h>
|
||||
+#endif"
|
||||
+
|
||||
enable_option_checking=no
|
||||
ac_subst_vars='LTLIBOBJS
|
||||
LIBOBJS
|
||||
subdirs
|
||||
NVPTX_RUN
|
||||
+EGREP
|
||||
+GREP
|
||||
+CPP
|
||||
CUDA_DRIVER_LDFLAGS
|
||||
CUDA_DRIVER_CPPFLAGS
|
||||
AR
|
||||
@@ -635,7 +675,8 @@ LIBS
|
||||
CPPFLAGS
|
||||
CXX
|
||||
CXXFLAGS
|
||||
-CCC'
|
||||
+CCC
|
||||
+CPP'
|
||||
ac_subdirs_all='libiberty'
|
||||
|
||||
# Initialize some variables set by options.
|
||||
@@ -1267,6 +1308,7 @@ Some influential environment variables:
|
||||
you have headers in a nonstandard directory <include dir>
|
||||
CXX C++ compiler command
|
||||
CXXFLAGS C++ compiler flags
|
||||
+ CPP C preprocessor
|
||||
|
||||
Use these variables to override the choices made by `configure' or to help
|
||||
it to find libraries and programs with nonstandard names/locations.
|
||||
@@ -1575,6 +1617,203 @@ $as_echo "$ac_res" >&6; }
|
||||
eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
|
||||
|
||||
} # ac_fn_c_check_decl
|
||||
+
|
||||
+# ac_fn_c_try_cpp LINENO
|
||||
+# ----------------------
|
||||
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
|
||||
+ac_fn_c_try_cpp ()
|
||||
+{
|
||||
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
|
||||
+ if { { ac_try="$ac_cpp conftest.$ac_ext"
|
||||
+case "(($ac_try" in
|
||||
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
|
||||
+ *) ac_try_echo=$ac_try;;
|
||||
+esac
|
||||
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
|
||||
+$as_echo "$ac_try_echo"; } >&5
|
||||
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
|
||||
+ ac_status=$?
|
||||
+ if test -s conftest.err; then
|
||||
+ grep -v '^ *+' conftest.err >conftest.er1
|
||||
+ cat conftest.er1 >&5
|
||||
+ mv -f conftest.er1 conftest.err
|
||||
+ fi
|
||||
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||
+ test $ac_status = 0; } >/dev/null && {
|
||||
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
|
||||
+ test ! -s conftest.err
|
||||
+ }; then :
|
||||
+ ac_retval=0
|
||||
+else
|
||||
+ $as_echo "$as_me: failed program was:" >&5
|
||||
+sed 's/^/| /' conftest.$ac_ext >&5
|
||||
+
|
||||
+ ac_retval=1
|
||||
+fi
|
||||
+ eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
|
||||
+ return $ac_retval
|
||||
+
|
||||
+} # ac_fn_c_try_cpp
|
||||
+
|
||||
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
|
||||
+# -------------------------------------------------------
|
||||
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
|
||||
+# the include files in INCLUDES and setting the cache variable VAR
|
||||
+# accordingly.
|
||||
+ac_fn_c_check_header_mongrel ()
|
||||
+{
|
||||
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
|
||||
+ if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
|
||||
+$as_echo_n "checking for $2... " >&6; }
|
||||
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
|
||||
+ $as_echo_n "(cached) " >&6
|
||||
+fi
|
||||
+eval ac_res=\$$3
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
|
||||
+$as_echo "$ac_res" >&6; }
|
||||
+else
|
||||
+ # Is the header compilable?
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
|
||||
+$as_echo_n "checking $2 usability... " >&6; }
|
||||
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+$4
|
||||
+#include <$2>
|
||||
+_ACEOF
|
||||
+if ac_fn_c_try_compile "$LINENO"; then :
|
||||
+ ac_header_compiler=yes
|
||||
+else
|
||||
+ ac_header_compiler=no
|
||||
+fi
|
||||
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
|
||||
+$as_echo "$ac_header_compiler" >&6; }
|
||||
+
|
||||
+# Is the header present?
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
|
||||
+$as_echo_n "checking $2 presence... " >&6; }
|
||||
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+#include <$2>
|
||||
+_ACEOF
|
||||
+if ac_fn_c_try_cpp "$LINENO"; then :
|
||||
+ ac_header_preproc=yes
|
||||
+else
|
||||
+ ac_header_preproc=no
|
||||
+fi
|
||||
+rm -f conftest.err conftest.$ac_ext
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
|
||||
+$as_echo "$ac_header_preproc" >&6; }
|
||||
+
|
||||
+# So? What about this header?
|
||||
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
|
||||
+ yes:no: )
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
|
||||
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
|
||||
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
|
||||
+ ;;
|
||||
+ no:yes:* )
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
|
||||
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5
|
||||
+$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;}
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
|
||||
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5
|
||||
+$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;}
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
|
||||
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
|
||||
+ ;;
|
||||
+esac
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
|
||||
+$as_echo_n "checking for $2... " >&6; }
|
||||
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
|
||||
+ $as_echo_n "(cached) " >&6
|
||||
+else
|
||||
+ eval "$3=\$ac_header_compiler"
|
||||
+fi
|
||||
+eval ac_res=\$$3
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
|
||||
+$as_echo "$ac_res" >&6; }
|
||||
+fi
|
||||
+ eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
|
||||
+
|
||||
+} # ac_fn_c_check_header_mongrel
|
||||
+
|
||||
+# ac_fn_c_try_run LINENO
|
||||
+# ----------------------
|
||||
+# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
|
||||
+# that executables *can* be run.
|
||||
+ac_fn_c_try_run ()
|
||||
+{
|
||||
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
|
||||
+ if { { ac_try="$ac_link"
|
||||
+case "(($ac_try" in
|
||||
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
|
||||
+ *) ac_try_echo=$ac_try;;
|
||||
+esac
|
||||
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
|
||||
+$as_echo "$ac_try_echo"; } >&5
|
||||
+ (eval "$ac_link") 2>&5
|
||||
+ ac_status=$?
|
||||
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||
+ test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
|
||||
+ { { case "(($ac_try" in
|
||||
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
|
||||
+ *) ac_try_echo=$ac_try;;
|
||||
+esac
|
||||
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
|
||||
+$as_echo "$ac_try_echo"; } >&5
|
||||
+ (eval "$ac_try") 2>&5
|
||||
+ ac_status=$?
|
||||
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||
+ test $ac_status = 0; }; }; then :
|
||||
+ ac_retval=0
|
||||
+else
|
||||
+ $as_echo "$as_me: program exited with status $ac_status" >&5
|
||||
+ $as_echo "$as_me: failed program was:" >&5
|
||||
+sed 's/^/| /' conftest.$ac_ext >&5
|
||||
+
|
||||
+ ac_retval=$ac_status
|
||||
+fi
|
||||
+ rm -rf conftest.dSYM conftest_ipa8_conftest.oo
|
||||
+ eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
|
||||
+ return $ac_retval
|
||||
+
|
||||
+} # ac_fn_c_try_run
|
||||
+
|
||||
+# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
|
||||
+# -------------------------------------------------------
|
||||
+# Tests whether HEADER exists and can be compiled using the include files in
|
||||
+# INCLUDES, setting the cache variable VAR accordingly.
|
||||
+ac_fn_c_check_header_compile ()
|
||||
+{
|
||||
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
|
||||
+$as_echo_n "checking for $2... " >&6; }
|
||||
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
|
||||
+ $as_echo_n "(cached) " >&6
|
||||
+else
|
||||
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+$4
|
||||
+#include <$2>
|
||||
+_ACEOF
|
||||
+if ac_fn_c_try_compile "$LINENO"; then :
|
||||
+ eval "$3=yes"
|
||||
+else
|
||||
+ eval "$3=no"
|
||||
+fi
|
||||
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
+fi
|
||||
+eval ac_res=\$$3
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
|
||||
+$as_echo "$ac_res" >&6; }
|
||||
+ eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
|
||||
+
|
||||
+} # ac_fn_c_check_header_compile
|
||||
cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
@@ -3284,6 +3523,418 @@ cat >>confdefs.h <<_ACEOF
|
||||
#define HAVE_DECL_CUGETERRORSTRING $ac_have_decl
|
||||
_ACEOF
|
||||
|
||||
+ac_ext=c
|
||||
+ac_cpp='$CPP $CPPFLAGS'
|
||||
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
|
||||
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
|
||||
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
|
||||
+$as_echo_n "checking how to run the C preprocessor... " >&6; }
|
||||
+# On Suns, sometimes $CPP names a directory.
|
||||
+if test -n "$CPP" && test -d "$CPP"; then
|
||||
+ CPP=
|
||||
+fi
|
||||
+if test -z "$CPP"; then
|
||||
+ if test "${ac_cv_prog_CPP+set}" = set; then :
|
||||
+ $as_echo_n "(cached) " >&6
|
||||
+else
|
||||
+ # Double quotes because CPP needs to be expanded
|
||||
+ for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
|
||||
+ do
|
||||
+ ac_preproc_ok=false
|
||||
+for ac_c_preproc_warn_flag in '' yes
|
||||
+do
|
||||
+ # Use a header file that comes with gcc, so configuring glibc
|
||||
+ # with a fresh cross-compiler works.
|
||||
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
|
||||
+ # <limits.h> exists even on freestanding compilers.
|
||||
+ # On the NeXT, cc -E runs the code through the compiler's parser,
|
||||
+ # not just through cpp. "Syntax error" is here to catch this case.
|
||||
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+#ifdef __STDC__
|
||||
+# include <limits.h>
|
||||
+#else
|
||||
+# include <assert.h>
|
||||
+#endif
|
||||
+ Syntax error
|
||||
+_ACEOF
|
||||
+if ac_fn_c_try_cpp "$LINENO"; then :
|
||||
+
|
||||
+else
|
||||
+ # Broken: fails on valid input.
|
||||
+continue
|
||||
+fi
|
||||
+rm -f conftest.err conftest.$ac_ext
|
||||
+
|
||||
+ # OK, works on sane cases. Now check whether nonexistent headers
|
||||
+ # can be detected and how.
|
||||
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+#include <ac_nonexistent.h>
|
||||
+_ACEOF
|
||||
+if ac_fn_c_try_cpp "$LINENO"; then :
|
||||
+ # Broken: success on invalid input.
|
||||
+continue
|
||||
+else
|
||||
+ # Passes both tests.
|
||||
+ac_preproc_ok=:
|
||||
+break
|
||||
+fi
|
||||
+rm -f conftest.err conftest.$ac_ext
|
||||
+
|
||||
+done
|
||||
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
|
||||
+rm -f conftest.err conftest.$ac_ext
|
||||
+if $ac_preproc_ok; then :
|
||||
+ break
|
||||
+fi
|
||||
+
|
||||
+ done
|
||||
+ ac_cv_prog_CPP=$CPP
|
||||
+
|
||||
+fi
|
||||
+ CPP=$ac_cv_prog_CPP
|
||||
+else
|
||||
+ ac_cv_prog_CPP=$CPP
|
||||
+fi
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
|
||||
+$as_echo "$CPP" >&6; }
|
||||
+ac_preproc_ok=false
|
||||
+for ac_c_preproc_warn_flag in '' yes
|
||||
+do
|
||||
+ # Use a header file that comes with gcc, so configuring glibc
|
||||
+ # with a fresh cross-compiler works.
|
||||
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
|
||||
+ # <limits.h> exists even on freestanding compilers.
|
||||
+ # On the NeXT, cc -E runs the code through the compiler's parser,
|
||||
+ # not just through cpp. "Syntax error" is here to catch this case.
|
||||
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+#ifdef __STDC__
|
||||
+# include <limits.h>
|
||||
+#else
|
||||
+# include <assert.h>
|
||||
+#endif
|
||||
+ Syntax error
|
||||
+_ACEOF
|
||||
+if ac_fn_c_try_cpp "$LINENO"; then :
|
||||
+
|
||||
+else
|
||||
+ # Broken: fails on valid input.
|
||||
+continue
|
||||
+fi
|
||||
+rm -f conftest.err conftest.$ac_ext
|
||||
+
|
||||
+ # OK, works on sane cases. Now check whether nonexistent headers
|
||||
+ # can be detected and how.
|
||||
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+#include <ac_nonexistent.h>
|
||||
+_ACEOF
|
||||
+if ac_fn_c_try_cpp "$LINENO"; then :
|
||||
+ # Broken: success on invalid input.
|
||||
+continue
|
||||
+else
|
||||
+ # Passes both tests.
|
||||
+ac_preproc_ok=:
|
||||
+break
|
||||
+fi
|
||||
+rm -f conftest.err conftest.$ac_ext
|
||||
+
|
||||
+done
|
||||
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
|
||||
+rm -f conftest.err conftest.$ac_ext
|
||||
+if $ac_preproc_ok; then :
|
||||
+
|
||||
+else
|
||||
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
|
||||
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
|
||||
+as_fn_error "C preprocessor \"$CPP\" fails sanity check
|
||||
+See \`config.log' for more details." "$LINENO" 5; }
|
||||
+fi
|
||||
+
|
||||
+ac_ext=c
|
||||
+ac_cpp='$CPP $CPPFLAGS'
|
||||
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
|
||||
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
|
||||
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
|
||||
+
|
||||
+
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
|
||||
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
|
||||
+if test "${ac_cv_path_GREP+set}" = set; then :
|
||||
+ $as_echo_n "(cached) " >&6
|
||||
+else
|
||||
+ if test -z "$GREP"; then
|
||||
+ ac_path_GREP_found=false
|
||||
+ # Loop through the user's path and test for each of PROGNAME-LIST
|
||||
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
|
||||
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
|
||||
+do
|
||||
+ IFS=$as_save_IFS
|
||||
+ test -z "$as_dir" && as_dir=.
|
||||
+ for ac_prog in grep ggrep; do
|
||||
+ for ac_exec_ext in '' $ac_executable_extensions; do
|
||||
+ ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
|
||||
+ { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
|
||||
+# Check for GNU ac_path_GREP and select it if it is found.
|
||||
+ # Check for GNU $ac_path_GREP
|
||||
+case `"$ac_path_GREP" --version 2>&1` in
|
||||
+*GNU*)
|
||||
+ ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
|
||||
+*)
|
||||
+ ac_count=0
|
||||
+ $as_echo_n 0123456789 >"conftest.in"
|
||||
+ while :
|
||||
+ do
|
||||
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
|
||||
+ mv "conftest.tmp" "conftest.in"
|
||||
+ cp "conftest.in" "conftest.nl"
|
||||
+ $as_echo 'GREP' >> "conftest.nl"
|
||||
+ "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
|
||||
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
|
||||
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
|
||||
+ if test $ac_count -gt ${ac_path_GREP_max-0}; then
|
||||
+ # Best one so far, save it but keep looking for a better one
|
||||
+ ac_cv_path_GREP="$ac_path_GREP"
|
||||
+ ac_path_GREP_max=$ac_count
|
||||
+ fi
|
||||
+ # 10*(2^10) chars as input seems more than enough
|
||||
+ test $ac_count -gt 10 && break
|
||||
+ done
|
||||
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
|
||||
+esac
|
||||
+
|
||||
+ $ac_path_GREP_found && break 3
|
||||
+ done
|
||||
+ done
|
||||
+ done
|
||||
+IFS=$as_save_IFS
|
||||
+ if test -z "$ac_cv_path_GREP"; then
|
||||
+ as_fn_error "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
|
||||
+ fi
|
||||
+else
|
||||
+ ac_cv_path_GREP=$GREP
|
||||
+fi
|
||||
+
|
||||
+fi
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
|
||||
+$as_echo "$ac_cv_path_GREP" >&6; }
|
||||
+ GREP="$ac_cv_path_GREP"
|
||||
+
|
||||
+
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
|
||||
+$as_echo_n "checking for egrep... " >&6; }
|
||||
+if test "${ac_cv_path_EGREP+set}" = set; then :
|
||||
+ $as_echo_n "(cached) " >&6
|
||||
+else
|
||||
+ if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
|
||||
+ then ac_cv_path_EGREP="$GREP -E"
|
||||
+ else
|
||||
+ if test -z "$EGREP"; then
|
||||
+ ac_path_EGREP_found=false
|
||||
+ # Loop through the user's path and test for each of PROGNAME-LIST
|
||||
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
|
||||
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
|
||||
+do
|
||||
+ IFS=$as_save_IFS
|
||||
+ test -z "$as_dir" && as_dir=.
|
||||
+ for ac_prog in egrep; do
|
||||
+ for ac_exec_ext in '' $ac_executable_extensions; do
|
||||
+ ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
|
||||
+ { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
|
||||
+# Check for GNU ac_path_EGREP and select it if it is found.
|
||||
+ # Check for GNU $ac_path_EGREP
|
||||
+case `"$ac_path_EGREP" --version 2>&1` in
|
||||
+*GNU*)
|
||||
+ ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
|
||||
+*)
|
||||
+ ac_count=0
|
||||
+ $as_echo_n 0123456789 >"conftest.in"
|
||||
+ while :
|
||||
+ do
|
||||
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
|
||||
+ mv "conftest.tmp" "conftest.in"
|
||||
+ cp "conftest.in" "conftest.nl"
|
||||
+ $as_echo 'EGREP' >> "conftest.nl"
|
||||
+ "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
|
||||
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
|
||||
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
|
||||
+ if test $ac_count -gt ${ac_path_EGREP_max-0}; then
|
||||
+ # Best one so far, save it but keep looking for a better one
|
||||
+ ac_cv_path_EGREP="$ac_path_EGREP"
|
||||
+ ac_path_EGREP_max=$ac_count
|
||||
+ fi
|
||||
+ # 10*(2^10) chars as input seems more than enough
|
||||
+ test $ac_count -gt 10 && break
|
||||
+ done
|
||||
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
|
||||
+esac
|
||||
+
|
||||
+ $ac_path_EGREP_found && break 3
|
||||
+ done
|
||||
+ done
|
||||
+ done
|
||||
+IFS=$as_save_IFS
|
||||
+ if test -z "$ac_cv_path_EGREP"; then
|
||||
+ as_fn_error "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
|
||||
+ fi
|
||||
+else
|
||||
+ ac_cv_path_EGREP=$EGREP
|
||||
+fi
|
||||
+
|
||||
+ fi
|
||||
+fi
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
|
||||
+$as_echo "$ac_cv_path_EGREP" >&6; }
|
||||
+ EGREP="$ac_cv_path_EGREP"
|
||||
+
|
||||
+
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
|
||||
+$as_echo_n "checking for ANSI C header files... " >&6; }
|
||||
+if test "${ac_cv_header_stdc+set}" = set; then :
|
||||
+ $as_echo_n "(cached) " >&6
|
||||
+else
|
||||
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+#include <stdlib.h>
|
||||
+#include <stdarg.h>
|
||||
+#include <string.h>
|
||||
+#include <float.h>
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+
|
||||
+ ;
|
||||
+ return 0;
|
||||
+}
|
||||
+_ACEOF
|
||||
+if ac_fn_c_try_compile "$LINENO"; then :
|
||||
+ ac_cv_header_stdc=yes
|
||||
+else
|
||||
+ ac_cv_header_stdc=no
|
||||
+fi
|
||||
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
+
|
||||
+if test $ac_cv_header_stdc = yes; then
|
||||
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
|
||||
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+#include <string.h>
|
||||
+
|
||||
+_ACEOF
|
||||
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
|
||||
+ $EGREP "memchr" >/dev/null 2>&1; then :
|
||||
+
|
||||
+else
|
||||
+ ac_cv_header_stdc=no
|
||||
+fi
|
||||
+rm -f conftest*
|
||||
+
|
||||
+fi
|
||||
+
|
||||
+if test $ac_cv_header_stdc = yes; then
|
||||
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
|
||||
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+#include <stdlib.h>
|
||||
+
|
||||
+_ACEOF
|
||||
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
|
||||
+ $EGREP "free" >/dev/null 2>&1; then :
|
||||
+
|
||||
+else
|
||||
+ ac_cv_header_stdc=no
|
||||
+fi
|
||||
+rm -f conftest*
|
||||
+
|
||||
+fi
|
||||
+
|
||||
+if test $ac_cv_header_stdc = yes; then
|
||||
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
|
||||
+ if test "$cross_compiling" = yes; then :
|
||||
+ :
|
||||
+else
|
||||
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+#include <ctype.h>
|
||||
+#include <stdlib.h>
|
||||
+#if ((' ' & 0x0FF) == 0x020)
|
||||
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
|
||||
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
|
||||
+#else
|
||||
+# define ISLOWER(c) \
|
||||
+ (('a' <= (c) && (c) <= 'i') \
|
||||
+ || ('j' <= (c) && (c) <= 'r') \
|
||||
+ || ('s' <= (c) && (c) <= 'z'))
|
||||
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
|
||||
+#endif
|
||||
+
|
||||
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < 256; i++)
|
||||
+ if (XOR (islower (i), ISLOWER (i))
|
||||
+ || toupper (i) != TOUPPER (i))
|
||||
+ return 2;
|
||||
+ return 0;
|
||||
+}
|
||||
+_ACEOF
|
||||
+if ac_fn_c_try_run "$LINENO"; then :
|
||||
+
|
||||
+else
|
||||
+ ac_cv_header_stdc=no
|
||||
+fi
|
||||
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
|
||||
+ conftest.$ac_objext conftest.beam conftest.$ac_ext
|
||||
+fi
|
||||
+
|
||||
+fi
|
||||
+fi
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
|
||||
+$as_echo "$ac_cv_header_stdc" >&6; }
|
||||
+if test $ac_cv_header_stdc = yes; then
|
||||
+
|
||||
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
|
||||
+
|
||||
+fi
|
||||
+
|
||||
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
|
||||
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
|
||||
+ inttypes.h stdint.h unistd.h
|
||||
+do :
|
||||
+ as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
|
||||
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
|
||||
+"
|
||||
+eval as_val=\$$as_ac_Header
|
||||
+ if test "x$as_val" = x""yes; then :
|
||||
+ cat >>confdefs.h <<_ACEOF
|
||||
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
|
||||
+_ACEOF
|
||||
+
|
||||
+fi
|
||||
+
|
||||
+done
|
||||
+
|
||||
+
|
||||
+for ac_header in unistd.h sys/stat.h
|
||||
+do :
|
||||
+ as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
|
||||
+ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
|
||||
+eval as_val=\$$as_ac_Header
|
||||
+ if test "x$as_val" = x""yes; then :
|
||||
+ cat >>confdefs.h <<_ACEOF
|
||||
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
|
||||
+_ACEOF
|
||||
+
|
||||
+fi
|
||||
+
|
||||
+done
|
||||
+
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for extra programs to build requiring -lcuda" >&5
|
||||
$as_echo_n "checking for extra programs to build requiring -lcuda... " >&6; }
|
3205
SPECS/gcc.spec
Normal file
3205
SPECS/gcc.spec
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user