diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a5474f5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/gcc-4.8.5-20150702.tar.bz2 diff --git a/EMPTY b/EMPTY deleted file mode 100644 index 0519ecb..0000000 --- a/EMPTY +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/compat-libgfortran-48.spec b/compat-libgfortran-48.spec new file mode 100644 index 0000000..b745fb0 --- /dev/null +++ b/compat-libgfortran-48.spec @@ -0,0 +1,504 @@ +%global DATE 20150702 +%global SVNREV 225304 +# Note, gcc_release must be integer, if you want to add suffixes to +# %{release}, append them after %{gcc_release} on Release: line. +%global gcc_release 36 +%global _unpackaged_files_terminate_build 0 +%global _performance_build 1 +%undefine _annotated_build +# Hardening slows the compiler way too much. +%undefine _hardened_build +Summary: Compatibility Fortran runtime library version 4.8.5 +Name: compat-libgfortran-48 +%global gcc_version 4.8.5 +Version: %{gcc_version} +Release: %{gcc_release}.1%{?dist} +# libgcc, libgfortran, libmudflap, libgomp, libstdc++ and crtstuff have +# GCC Runtime Exception. +License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD +Group: Development/Languages +# The source for this package was pulled from upstream's vcs. Use the +# following commands to generate the tarball: +# svn export svn://gcc.gnu.org/svn/gcc/branches/redhat/gcc-4_8-branch@%%{SVNREV} gcc-%%{version}-%%{DATE} +# tar cf - gcc-%%{version}-%%{DATE} | bzip2 -9 > gcc-%%{version}-%%{DATE}.tar.bz2 +Source0: gcc-%{version}-%{DATE}.tar.bz2 +URL: http://gcc.gnu.org +# Need binutils with -pie support >= 2.14.90.0.4-4 +# Need binutils which can omit dot symbols and overlap .opd on ppc64 >= 2.15.91.0.2-4 +# Need binutils which handle -msecure-plt on ppc >= 2.16.91.0.2-2 +# Need binutils which support .weakref >= 2.16.91.0.3-1 +# Need binutils which support --hash-style=gnu >= 2.17.50.0.2-7 +# Need binutils which support mffgpr and mftgpr >= 2.17.50.0.2-8 +# Need binutils which support --build-id >= 2.17.50.0.17-3 +# Need binutils which support %gnu_unique_object >= 2.19.51.0.14 +# Need binutils which support .cfi_sections >= 2.19.51.0.14-33 +# Need binutils which support --no-add-needed >= 2.20.51.0.2-12 +BuildRequires: binutils >= 2.20.51.0.2-12 +# While gcc doesn't include statically linked binaries, during testing +# -static is used several times. +BuildRequires: glibc-static +BuildRequires: zlib-devel, gettext, dejagnu, bison, flex, sharutils +BuildRequires: gmp-devel >= 4.1.2-8, mpfr-devel >= 2.2.1, libmpc-devel >= 0.8.1 +# For VTA guality testing +BuildRequires: gdb +# Make sure pthread.h doesn't contain __thread tokens +# Make sure glibc supports stack protector +# Make sure glibc supports DT_GNU_HASH +BuildRequires: glibc-devel >= 2.4.90-13 +BuildRequires: elfutils-devel >= 0.147 +BuildRequires: elfutils-libelf-devel >= 0.147 +%ifarch ppc ppc64 ppc64le ppc64p7 s390 s390x sparc sparcv9 alpha +# Make sure glibc supports TFmode long double +BuildRequires: glibc >= 2.3.90-35 +%endif +# Need .eh_frame ld optimizations +# Need proper visibility support +# Need -pie support +# Need --as-needed/--no-as-needed support +# On ppc64, need omit dot symbols support and --non-overlapping-opd +# Need binutils that owns /usr/bin/c++filt +# Need binutils that support .weakref +# Need binutils that supports --hash-style=gnu +# Need binutils that support mffgpr/mftgpr +# Need binutils that support --build-id +# Need binutils that support %gnu_unique_object +# Need binutils that support .cfi_sections +# Need binutils that support --no-add-needed +Requires: binutils >= 2.20.51.0.2-12 +# Make sure gdb will understand DW_FORM_strp +Conflicts: gdb < 5.1-2 +Requires: glibc-devel >= 2.2.90-12 +%ifarch ppc ppc64 ppc64le ppc64p7 s390 s390x sparc sparcv9 alpha +# Make sure glibc supports TFmode long double +Requires: glibc >= 2.3.90-35 +%endif +%ifarch %{ix86} x86_64 +%global build_libquadmath 1 +%else +%global build_libquadmath 0 +%endif +%if %{build_libquadmath} +# Use the system libquadmath. +BuildRequires: libquadmath >= 8.2.1 +%endif + +Patch0: gcc48-hack.patch +Patch1: gcc48-java-nomulti.patch +Patch2: gcc48-ppc32-retaddr.patch +Patch3: gcc48-rh330771.patch +Patch4: gcc48-i386-libgomp.patch +Patch5: gcc48-sparc-config-detection.patch +Patch6: gcc48-libgomp-omp_h-multilib.patch +Patch7: gcc48-libtool-no-rpath.patch +Patch10: gcc48-pr38757.patch +Patch12: gcc48-no-add-needed.patch +Patch13: gcc48-pr56564.patch +Patch14: gcc48-color-auto.patch +Patch15: gcc48-pr28865.patch +Patch16: gcc48-libgo-p224.patch +Patch17: gcc48-pr60010.patch +Patch18: gcc48-aarch64-ada.patch +Patch19: gcc48-aarch64-async-unw-tables.patch +Patch20: gcc48-aarch64-unwind-opt.patch +Patch21: gcc48-rh1243366.patch +Patch22: gcc48-rh1180633.patch +Patch23: gcc48-rh1278872.patch +Patch24: gcc48-pr67281.patch +Patch25: gcc48-pr68680.patch +Patch26: gcc48-rh1312436.patch +Patch27: gcc48-pr53477.patch +Patch28: gcc48-rh1296211.patch +Patch29: gcc48-rh1304449.patch +Patch30: gcc48-s390-z13.patch +Patch31: gcc48-rh1312850.patch +Patch32: gcc48-pr65142.patch +Patch33: gcc48-pr52714.patch +Patch34: gcc48-rh1344807.patch +Patch35: gcc48-libgomp-20160715.patch +Patch36: gcc48-pr63293.patch +Patch37: gcc48-pr72863.patch +Patch38: gcc48-pr78064.patch +Patch39: gcc48-pr62258.patch +Patch40: gcc48-rh1369183.patch +Patch41: gcc48-pr68184.patch +Patch42: gcc48-pr79439.patch +Patch43: gcc48-pr66731.patch +Patch44: gcc48-pr69116.patch +Patch45: gcc48-pr72747.patch +Patch46: gcc48-pr78796.patch +Patch47: gcc48-pr79969.patch +Patch48: gcc48-pr78875.patch +Patch49: gcc48-rh1402585.patch +Patch50: gcc48-pr70549.patch +Patch51: gcc48-rh1457969.patch +Patch52: gcc48-pr69644.patch +Patch53: gcc48-rh1487434.patch +Patch54: gcc48-rh1468546.patch +Patch55: gcc48-rh1469384.patch +Patch56: gcc48-rh1491395.patch +Patch57: gcc48-rh1482762.patch +Patch58: gcc48-pr77375.patch +Patch59: gcc48-pr77767.patch +Patch60: gcc48-pr78378.patch +Patch61: gcc48-pr80129.patch +Patch62: gcc48-pr80362.patch +Patch63: gcc48-pr80692.patch +Patch64: gcc48-pr82274.patch +Patch65: gcc48-pr78416.patch +Patch66: gcc48-rh1546728.patch +Patch67: gcc48-rh1555397.patch +Patch68: gcc48-pr81395.patch +Patch69: gcc48-pr72717.patch +Patch70: gcc48-pr66840.patch +Patch71: gcc48-rh1546372.patch +Patch72: gcc48-libc-name.patch +Patch73: gcc48-ucontext.patch + +Patch1301: gcc48-rh1469697-1.patch +Patch1302: gcc48-rh1469697-2.patch +Patch1303: gcc48-rh1469697-3.patch +Patch1304: gcc48-rh1469697-4.patch +Patch1305: gcc48-rh1469697-5.patch +Patch1306: gcc48-rh1469697-6.patch +Patch1307: gcc48-rh1469697-7.patch +Patch1308: gcc48-rh1469697-8.patch +Patch1309: gcc48-rh1469697-9.patch +Patch1310: gcc48-rh1469697-10.patch +Patch1311: gcc48-rh1469697-11.patch +Patch1312: gcc48-rh1469697-12.patch +Patch1313: gcc48-rh1469697-13.patch +Patch1314: gcc48-rh1469697-14.patch +Patch1315: gcc48-rh1469697-15.patch +Patch1316: gcc48-rh1469697-16.patch +Patch1317: gcc48-rh1469697-17.patch +Patch1318: gcc48-rh1469697-18.patch +Patch1319: gcc48-rh1469697-19.patch +Patch1320: gcc48-rh1469697-20.patch +Patch1321: gcc48-rh1469697-21.patch +Patch1322: gcc48-rh1469697-22.patch +Patch1323: gcc48-rh1469697-23.patch +Patch1324: gcc48-rh1537828-1.patch +Patch1325: gcc48-rh1537828-2.patch +Patch1326: gcc48-rh1537828-3.patch +Patch1327: gcc48-rh1537828-4.patch +Patch1328: gcc48-rh1537828-5.patch +Patch1329: gcc48-rh1537828-10.patch + +Patch1401: gcc48-rh1535655-1.patch +Patch1402: gcc48-rh1535655-2.patch +Patch1403: gcc48-rh1535655-3.patch +Patch1404: gcc48-rh1535655-4.patch +Patch1405: gcc48-rh1535655-5.patch +Patch1406: gcc48-rh1535655-6.patch +Patch1407: gcc48-rh1552021.patch +Patch1408: gcc48-rh1537828-6.patch +Patch1409: gcc48-rh1537828-7.patch +Patch1410: gcc48-rh1537828-8.patch +Patch1411: gcc48-rh1537828-9.patch + +# On ARM EABI systems, we do want -gnueabi to be part of the +# target triple. +%ifnarch %{arm} +%global _gnu %{nil} +%endif +%ifarch sparcv9 +%global gcc_target_platform sparc64-%{_vendor}-%{_target_os} +%endif +%ifarch ppc ppc64p7 +%global gcc_target_platform ppc64-%{_vendor}-%{_target_os} +%endif +%ifnarch sparcv9 ppc ppc64p7 +%global gcc_target_platform %{_target_platform} +%endif + +%description +This package includes a Fortran 95 runtime library for compatibility +with GCC 4.8.x-RH compiled Fortran applications. + +%prep +%setup -q -n gcc-%{version}-%{DATE} +%patch0 -p0 -b .hack~ +%patch1 -p0 -b .java-nomulti~ +%patch2 -p0 -b .ppc32-retaddr~ +%patch3 -p0 -b .rh330771~ +%patch4 -p0 -b .i386-libgomp~ +%patch5 -p0 -b .sparc-config-detection~ +%patch6 -p0 -b .libgomp-omp_h-multilib~ +%patch7 -p0 -b .libtool-no-rpath~ +%patch10 -p0 -b .pr38757~ +%patch12 -p0 -b .no-add-needed~ +%patch13 -p0 -b .pr56564~ +%if 0%{?fedora} >= 20 || 0%{?rhel} >= 7 +%patch14 -p0 -b .color-auto~ +%endif +%patch15 -p0 -b .pr28865~ +%patch16 -p0 -b .libgo-p224~ +rm -f libgo/go/crypto/elliptic/p224{,_test}.go +%patch17 -p0 -b .pr60010~ +%ifarch aarch64 +%patch18 -p0 -b .aarch64-ada~ +%endif +%patch19 -p0 -b .aarch64-async-unw-tables~ +%patch20 -p0 -b .aarch64-unwind-opt~ +%patch21 -p0 -b .rh1243366~ +%patch22 -p0 -b .rh1180633~ +%patch23 -p0 -b .rh1278872~ +%patch24 -p0 -b .pr67281~ +%patch25 -p0 -b .pr68680~ +%patch26 -p0 -b .rh1312436~ +%patch27 -p0 -b .pr53477~ +touch -r %{PATCH27} libstdc++-v3/python/libstdcxx/v6/printers.py +%patch28 -p0 -b .rh1296211~ +%patch29 -p0 -b .rh1304449~ +%patch30 -p0 -b .s390-z13~ +%patch31 -p0 -b .rh1312850~ +%patch32 -p0 -b .pr65142~ +%patch33 -p0 -b .pr52714~ +%patch34 -p0 -b .rh1344807~ +%patch35 -p0 -b .libgomp-20160715~ +%patch36 -p0 -b .pr63293~ +%patch37 -p0 -b .pr72863~ +%patch38 -p0 -b .pr78064~ +%patch39 -p0 -b .pr62258~ +%patch40 -p0 -b .rh1369183~ +%patch41 -p0 -b .pr68184~ +%patch42 -p0 -b .pr79439~ +%patch43 -p0 -b .pr66731~ +%patch44 -p0 -b .pr69116~ +%patch45 -p0 -b .pr72747~ +%patch46 -p0 -b .pr78796~ +%patch47 -p0 -b .pr79969~ +%patch48 -p0 -b .pr78875~ +%patch49 -p0 -b .rh1402585~ +%patch50 -p0 -b .pr70549~ +%patch51 -p0 -b .rh1457969~ +%patch52 -p0 -b .pr69644~ +%patch53 -p0 -b .rh1487434~ +%patch54 -p0 -b .rh1468546~ +%patch55 -p0 -b .rh1469384~ +%patch56 -p0 -b .rh1491395~ +%patch57 -p0 -b .rh1482762~ +%patch58 -p0 -b .pr77375~ +%patch59 -p0 -b .pr77767~ +%patch60 -p0 -b .pr78378~ +%patch61 -p0 -b .pr80129~ +%patch62 -p0 -b .pr80362~ +%patch63 -p0 -b .pr80692~ +%patch64 -p0 -b .pr82274~ +%patch65 -p0 -b .pr78416~ +%patch66 -p0 -b .rh1546728~ +%patch67 -p0 -b .rh1555397~ +%patch68 -p0 -b .pr81395~ +%patch69 -p0 -b .pr72717~ +%patch70 -p0 -b .pr66840~ +%patch71 -p0 -b .rh1546372~ +%patch72 -p0 -b .libc-name~ +%patch73 -p0 -b .ucontext~ + +%patch1301 -p1 -b .stack-clash-1~ +%patch1302 -p1 -b .stack-clash-2~ +%patch1303 -p1 -b .stack-clash-3~ +%patch1304 -p1 -b .stack-clash-4~ +%patch1305 -p1 -b .stack-clash-5~ +%patch1306 -p1 -b .stack-clash-6~ +%patch1307 -p1 -b .stack-clash-7~ +%patch1308 -p1 -b .stack-clash-8~ +%patch1309 -p1 -b .stack-clash-9~ +%patch1310 -p1 -b .stack-clash-10~ +%patch1311 -p1 -b .stack-clash-11~ +%patch1312 -p1 -b .stack-clash-12~ +%patch1313 -p1 -b .stack-clash-13~ +%patch1314 -p1 -b .stack-clash-14~ +%patch1315 -p1 -b .stack-clash-15~ +%patch1316 -p1 -b .stack-clash-16~ +%patch1317 -p1 -b .stack-clash-17~ +%patch1318 -p1 -b .stack-clash-18~ +%patch1319 -p1 -b .stack-clash-19~ +%patch1320 -p1 -b .stack-clash-20~ +%patch1321 -p1 -b .stack-clash-21~ +%patch1322 -p1 -b .stack-clash-22~ +%patch1323 -p1 -b .stack-clash-23~ +%patch1324 -p1 -b .stack-clash-24~ +%patch1325 -p1 -b .stack-clash-25~ +%patch1326 -p1 -b .stack-clash-26~ +%patch1327 -p1 -b .stack-clash-27~ +%patch1328 -p1 -b .stack-clash-28~ +%patch1329 -p1 -b .stack-clash-29~ + +%patch1401 -p1 -b .retpolines-1~ +%patch1402 -p1 -b .retpolines-2~ +%patch1403 -p1 -b .retpolines-3~ +%patch1404 -p1 -b .retpolines-4~ +%patch1405 -p1 -b .retpolines-5~ +%patch1406 -p1 -b .retpolines-6~ +%patch1407 -p0 -b .retpolines-7~ +%patch1408 -p0 -b .retpolines-8~ +%patch1409 -p1 -b .retpolines-9~ +%patch1410 -p1 -b .retpolines-10~ +%patch1411 -p1 -b .retpolines-11~ + +%build + +# Undo the broken autoconf change in recent Fedora versions +export CONFIG_SITE=NONE + +rm -fr obj-%{gcc_target_platform} +mkdir obj-%{gcc_target_platform} +cd obj-%{gcc_target_platform} + +CC=gcc +CXX=g++ +OPT_FLAGS=`echo %{optflags}|sed -e 's/\(-Wp,\)\?-D_FORTIFY_SOURCE=[12]//g'` +OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-m64//g;s/-m32//g;s/-m31//g'` +OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-mfpmath=sse/-mfpmath=sse -msse2/g'` +OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/ -pipe / /g'` +# GCC 4.8 doesn't know these options, but redhat-rpm-config supplies them. +OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/ -fstack-clash-protection -fcf-protection/ /g'` +OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-Werror=format-security/-Wformat-security/g'` +%ifarch %{ix86} +OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-march=i.86//g'` +%endif +%ifarch s390x +# Same here. +OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-march=z13 -mtune=z14//g'` +%endif +%ifarch ppc64le +# Same here. GCC 4.8 doesn't grok power8. +OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-mcpu=power8 -mtune=power8/-mcpu=power7 -mtune=power7/g'` +%endif +OPT_FLAGS=`echo "$OPT_FLAGS" | sed -e 's/[[:blank:]]\+/ /g'` +case "$OPT_FLAGS" in + *-fasynchronous-unwind-tables*) + sed -i -e 's/-fno-exceptions /-fno-exceptions -fno-asynchronous-unwind-tables/' \ + ../gcc/Makefile.in + ;; +esac +CC="$CC" CFLAGS="$OPT_FLAGS" \ + CXXFLAGS="`echo " $OPT_FLAGS " | sed 's/ -Wall / /g;s/ -fexceptions / /g' \ + | sed 's/ -Werror=format-security / -Wformat -Werror=format-security /'`" \ + XCFLAGS="$OPT_FLAGS" TCFLAGS="$OPT_FLAGS" \ + ../configure --prefix=%{_prefix} --mandir=%{_mandir} --infodir=%{_infodir} \ + --with-bugurl=http://bugzilla.redhat.com/bugzilla \ + --disable-bootstrap \ + --enable-shared --enable-threads=posix --enable-checking=release \ + --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions \ + --enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu \ + --enable-languages=c,c++,fortran,lto \ + --disable-plugin --enable-initfini-array \ + --without-isl --without-cloog \ + --enable-gnu-indirect-function \ + --disable-libitm --disable-libsanitizer --disable-libgomp \ + --disable-libatomic --disable-libstdcxx-pch --disable-libssp \ + --disable-libmpx --disable-libcc1 \ + --disable-multilib \ +%ifarch %{arm} + --disable-sjlj-exceptions \ +%endif +%ifarch ppc ppc64 ppc64le ppc64p7 + --enable-secureplt \ +%endif +%ifarch sparc sparcv9 sparc64 ppc ppc64 ppc64le ppc64p7 s390 s390x alpha + --with-long-double-128 \ +%endif +%ifarch ppc64le + --enable-targets=powerpcle-linux \ +%endif +%ifarch ppc64le + --with-cpu-64=power8 --with-tune-64=power8 \ +%endif +%ifarch ppc ppc64 ppc64p7 +%if 0%{?rhel} >= 7 + --with-cpu-32=power7 --with-tune-32=power7 --with-cpu-64=power7 --with-tune-64=power7 \ +%endif +%if 0%{?rhel} == 6 + --with-cpu-32=power4 --with-tune-32=power6 --with-cpu-64=power4 --with-tune-64=power6 \ +%endif +%endif +%ifarch ppc + --build=%{gcc_target_platform} --target=%{gcc_target_platform} --with-cpu=default32 +%endif +%ifarch %{ix86} x86_64 + --with-tune=generic \ +%endif +%if 0%{?rhel} >= 7 +%ifarch %{ix86} + --with-arch=x86-64 \ +%endif +%ifarch x86_64 + --with-arch_32=x86-64 \ +%endif +%else +%ifarch %{ix86} + --with-arch=i686 \ +%endif +%ifarch x86_64 + --with-arch_32=i686 \ +%endif +%endif +%ifarch s390 s390x +%if 0%{?rhel} >= 7 + --with-arch=z196 --with-tune=zEC12 --enable-decimal-float \ +%else + --with-arch=z9-109 --with-tune=z10 --enable-decimal-float \ +%endif +%endif +%ifarch armv7hl + --with-cpu=cortex-a8 --with-tune=cortex-a8 --with-arch=armv7-a \ + --with-float=hard --with-fpu=vfpv3-d16 --with-abi=aapcs-linux \ +%endif +%ifnarch sparc sparcv9 ppc + --build=%{gcc_target_platform} +%endif + +make %{?_smp_mflags} BOOT_CFLAGS="$OPT_FLAGS" + +%install +rm -fr %{buildroot} +cd obj-%{gcc_target_platform} +mkdir -p %{buildroot}/%{_libdir} + +# Do this so that the resulting .so doesn't have a bogus RPATH. +cd %{gcc_target_platform}/libgfortran/ +mkdir temp +%if %{build_libquadmath} +# Link against the system libquadmath. +# ??? I don't know what I am doing. +mkdir libquadmath +echo '/* GNU ld script + Use the system libquadmath.so. */ +INPUT ( %{_libdir}/libquadmath.so.0.0.0 )' > libquadmath/libquadmath.so +export LIBRARY_PATH=`pwd`/libquadmath +%endif +make install DESTDIR=`pwd`/temp +cp -a temp/usr/%{_lib}/libgfortran.so.3* %{buildroot}/%{_libdir} +cd ../.. + +%check +cd obj-%{gcc_target_platform} + +# Run the Fortran tests. +make %{?_smp_mflags} -k -C gcc check-gfortran ALT_CC_UNDER_TEST=gcc ALT_CXX_UNDER_TEST=g++ || : +echo ====================TESTING========================= +( LC_ALL=C ../contrib/test_summary || : ) 2>&1 | sed -n '/^cat.*EOF/,/^EOF/{/^cat.*EOF/d;/^EOF/d;/^LAST_UPDATED:/d;p;}' +echo ====================TESTING END===================== +mkdir testlogs-%{_target_platform}-%{version}-%{release} +for i in `find . -name \*.log | grep -F testsuite/ | grep -v 'config.log\|acats.*/tests/'`; do + ln $i testlogs-%{_target_platform}-%{version}-%{release}/ || : +done +tar cf - testlogs-%{_target_platform}-%{version}-%{release} | bzip2 -9c \ + | uuencode testlogs-%{_target_platform}.tar.bz2 || : +rm -rf testlogs-%{_target_platform}-%{version}-%{release} + +%post -p /sbin/ldconfig + +%postun -p /sbin/ldconfig + +%files +%{_libdir}/libgfortran.so.3* + +%changelog +* Tue Sep 11 2018 Marek Polacek 4.8.5-36.1 +- remove a few Requires + +* Wed Aug 8 2018 Marek Polacek 4.8.5-36 +- new compat library diff --git a/gcc48-aarch64-ada.patch b/gcc48-aarch64-ada.patch new file mode 100644 index 0000000..4d2fb7d --- /dev/null +++ b/gcc48-aarch64-ada.patch @@ -0,0 +1,96 @@ +2014-04-09 Richard Henderson + + * gcc-interface/Makefile.in: Support aarch64-linux. + * init.c: Enable alternate stack support also on aarch64. + * types.h (Fat_Pointer): Remove aligned attribute. + +--- gcc/ada/gcc-interface/Makefile.in ++++ gcc/ada/gcc-interface/Makefile.in +@@ -2123,6 +2123,44 @@ ifeq ($(strip $(filter-out alpha% linux%,$(arch) $(osys))),) + LIBRARY_VERSION := $(LIB_VERSION) + endif + ++# AArch64 Linux ++ifeq ($(strip $(filter-out aarch64% linux%,$(arch) $(osys))),) ++ LIBGNAT_TARGET_PAIRS = \ ++ a-exetim.adb + + * common/config/aarch64/aarch64-common.c (TARGET_OPTION_INIT_STRUCT): + Define. + (aarch64_option_init_struct): New function. + +--- gcc/common/config/aarch64/aarch64-common.c ++++ gcc/common/config/aarch64/aarch64-common.c +@@ -39,6 +39,9 @@ + #undef TARGET_OPTION_OPTIMIZATION_TABLE + #define TARGET_OPTION_OPTIMIZATION_TABLE aarch_option_optimization_table + ++#undef TARGET_OPTION_INIT_STRUCT ++#define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct ++ + /* Set default optimization options. */ + static const struct default_options aarch_option_optimization_table[] = + { +@@ -47,6 +50,16 @@ static const struct default_options aarch_option_optimization_table[] = + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + ++/* Implement TARGET_OPTION_INIT_STRUCT. */ ++ ++static void ++aarch64_option_init_struct (struct gcc_options *opts) ++{ ++ /* By default, always emit DWARF-2 unwind info. This allows debugging ++ without maintaining a stack frame back-chain. */ ++ opts->x_flag_asynchronous_unwind_tables = 1; ++} ++ + /* Implement TARGET_HANDLE_OPTION. + This function handles the target specific options for CPU/target selection. + diff --git a/gcc48-aarch64-unwind-opt.patch b/gcc48-aarch64-unwind-opt.patch new file mode 100644 index 0000000..074cd7e --- /dev/null +++ b/gcc48-aarch64-unwind-opt.patch @@ -0,0 +1,342 @@ +2014-08-08 Richard Henderson + + * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Add + cfi_ops argument, for restore put REG_CFA_RESTORE notes into + *cfi_ops rather than on individual insns. Cleanup. + (aarch64_save_or_restore_callee_save_registers): Likewise. + (aarch64_expand_prologue): Adjust caller. + (aarch64_expand_epilogue): Likewise. Cleanup. Emit queued cfi_ops + on the stack restore insn. + +--- gcc/config/aarch64/aarch64.c 2014-07-15 02:27:16.000000000 -0700 ++++ gcc/config/aarch64/aarch64.c 2014-08-21 12:52:44.190455860 -0700 +@@ -1603,24 +1603,23 @@ aarch64_register_saved_on_entry (int reg + + static void + aarch64_save_or_restore_fprs (int start_offset, int increment, +- bool restore, rtx base_rtx) +- ++ bool restore, rtx base_rtx, rtx *cfi_ops) + { + unsigned regno; + unsigned regno2; + rtx insn; + rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; + +- + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + { + if (aarch64_register_saved_on_entry (regno)) + { +- rtx mem; ++ rtx mem, reg1; + mem = gen_mem_ref (DFmode, + plus_constant (Pmode, + base_rtx, + start_offset)); ++ reg1 = gen_rtx_REG (DFmode, regno); + + for (regno2 = regno + 1; + regno2 <= V31_REGNUM +@@ -1632,54 +1631,51 @@ aarch64_save_or_restore_fprs (int start_ + if (regno2 <= V31_REGNUM && + aarch64_register_saved_on_entry (regno2)) + { +- rtx mem2; ++ rtx mem2, reg2; + /* Next highest register to be saved. */ + mem2 = gen_mem_ref (DFmode, + plus_constant + (Pmode, + base_rtx, + start_offset + increment)); ++ reg2 = gen_rtx_REG (DFmode, regno2); ++ + if (restore == false) + { +- insn = emit_insn +- ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno), +- mem2, gen_rtx_REG (DFmode, regno2))); +- ++ insn = emit_insn (gen_store_pairdf (mem, reg1, mem2, reg2)); ++ /* The first part of a frame-related parallel insn ++ is always assumed to be relevant to the frame ++ calculations; subsequent parts, are only ++ frame-related if explicitly marked. */ ++ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; ++ RTX_FRAME_RELATED_P (insn) = 1; + } + else + { +- insn = emit_insn +- ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem, +- gen_rtx_REG (DFmode, regno2), mem2)); +- +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno)); +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2)); ++ emit_insn (gen_load_pairdf (reg1, mem, reg2, mem2)); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); + } + +- /* The first part of a frame-related parallel insn +- is always assumed to be relevant to the frame +- calculations; subsequent parts, are only +- frame-related if explicitly marked. */ +- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, +- 1)) = 1; + regno = regno2; + start_offset += increment * 2; + } + else + { + if (restore == false) +- insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno)); ++ { ++ insn = emit_move_insn (mem, reg1); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } + else + { +- insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem); +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); ++ emit_move_insn (reg1, mem); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops); + } + start_offset += increment; + } +- RTX_FRAME_RELATED_P (insn) = 1; + } + } +- + } + + +@@ -1687,13 +1683,14 @@ aarch64_save_or_restore_fprs (int start_ + restore's have to happen. */ + static void + aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset, +- bool restore) ++ bool restore, rtx *cfi_ops) + { + rtx insn; + rtx base_rtx = stack_pointer_rtx; + HOST_WIDE_INT start_offset = offset; + HOST_WIDE_INT increment = UNITS_PER_WORD; +- rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; ++ rtx (*gen_mem_ref)(enum machine_mode, rtx) ++ = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; + unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM; + unsigned regno; + unsigned regno2; +@@ -1702,11 +1699,13 @@ aarch64_save_or_restore_callee_save_regi + { + if (aarch64_register_saved_on_entry (regno)) + { +- rtx mem; ++ rtx mem, reg1; ++ + mem = gen_mem_ref (Pmode, + plus_constant (Pmode, + base_rtx, + start_offset)); ++ reg1 = gen_rtx_REG (DImode, regno); + + for (regno2 = regno + 1; + regno2 <= limit +@@ -1718,56 +1717,54 @@ aarch64_save_or_restore_callee_save_regi + if (regno2 <= limit && + aarch64_register_saved_on_entry (regno2)) + { +- rtx mem2; ++ rtx mem2, reg2; + /* Next highest register to be saved. */ + mem2 = gen_mem_ref (Pmode, + plus_constant + (Pmode, + base_rtx, + start_offset + increment)); ++ reg2 = gen_rtx_REG (DImode, regno2); ++ + if (restore == false) + { +- insn = emit_insn +- ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno), +- mem2, gen_rtx_REG (DImode, regno2))); +- ++ insn = emit_insn (gen_store_pairdi (mem, reg1, mem2, reg2)); ++ /* The first part of a frame-related parallel insn ++ is always assumed to be relevant to the frame ++ calculations; subsequent parts, are only ++ frame-related if explicitly marked. */ ++ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; ++ RTX_FRAME_RELATED_P (insn) = 1; + } + else + { +- insn = emit_insn +- ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem, +- gen_rtx_REG (DImode, regno2), mem2)); +- +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2)); ++ emit_insn (gen_load_pairdi (reg1, mem, reg2, mem2)); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); + } + +- /* The first part of a frame-related parallel insn +- is always assumed to be relevant to the frame +- calculations; subsequent parts, are only +- frame-related if explicitly marked. */ +- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, +- 1)) = 1; + regno = regno2; + start_offset += increment * 2; + } + else + { + if (restore == false) +- insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno)); ++ { ++ insn = emit_move_insn (mem, reg1); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } + else + { +- insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem); +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); ++ emit_move_insn (reg1, mem); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops); + } + start_offset += increment; + } +- RTX_FRAME_RELATED_P (insn) = 1; + } + } + +- aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx); +- ++ aarch64_save_or_restore_fprs (start_offset, increment, restore, ++ base_rtx, cfi_ops); + } + + /* AArch64 stack frames generated by this compiler look like: +@@ -1966,7 +1963,7 @@ aarch64_expand_prologue (void) + } + + aarch64_save_or_restore_callee_save_registers +- (fp_offset + cfun->machine->frame.hardfp_offset, 0); ++ (fp_offset + cfun->machine->frame.hardfp_offset, 0, NULL); + } + + /* when offset >= 512, +@@ -1991,6 +1988,7 @@ aarch64_expand_epilogue (bool for_sibcal + HOST_WIDE_INT fp_offset; + rtx insn; + rtx cfa_reg; ++ rtx cfi_ops = NULL; + + aarch64_layout_frame (); + original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; +@@ -2035,15 +2033,17 @@ aarch64_expand_epilogue (bool for_sibcal + insn = emit_insn (gen_add3_insn (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (- fp_offset))); ++ /* CFA should be calculated from the value of SP from now on. */ ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (VOIDmode, stack_pointer_rtx, ++ plus_constant (Pmode, hard_frame_pointer_rtx, ++ -fp_offset))); + RTX_FRAME_RELATED_P (insn) = 1; +- /* As SP is set to (FP - fp_offset), according to the rules in +- dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated +- from the value of SP from now on. */ + cfa_reg = stack_pointer_rtx; + } + + aarch64_save_or_restore_callee_save_registers +- (fp_offset + cfun->machine->frame.hardfp_offset, 1); ++ (fp_offset + cfun->machine->frame.hardfp_offset, 1, &cfi_ops); + + /* Restore the frame pointer and lr if the frame pointer is needed. */ + if (offset > 0) +@@ -2051,6 +2051,8 @@ aarch64_expand_epilogue (bool for_sibcal + if (frame_pointer_needed) + { + rtx mem_fp, mem_lr; ++ rtx reg_fp = hard_frame_pointer_rtx; ++ rtx reg_lr = gen_rtx_REG (DImode, LR_REGNUM); + + if (fp_offset) + { +@@ -2063,52 +2065,36 @@ aarch64_expand_epilogue (bool for_sibcal + stack_pointer_rtx, + fp_offset + + UNITS_PER_WORD)); +- insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx, +- mem_fp, +- gen_rtx_REG (DImode, +- LR_REGNUM), +- mem_lr)); ++ emit_insn (gen_load_pairdi (reg_fp, mem_fp, reg_lr, mem_lr)); ++ ++ insn = emit_insn (gen_add2_insn (stack_pointer_rtx, ++ GEN_INT (offset))); + } + else + { + insn = emit_insn (gen_loadwb_pairdi_di +- (stack_pointer_rtx, +- stack_pointer_rtx, +- hard_frame_pointer_rtx, +- gen_rtx_REG (DImode, LR_REGNUM), +- GEN_INT (offset), ++ (stack_pointer_rtx, stack_pointer_rtx, ++ reg_fp, reg_lr, GEN_INT (offset), + GEN_INT (GET_MODE_SIZE (DImode) + offset))); +- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; +- add_reg_note (insn, REG_CFA_ADJUST_CFA, +- (gen_rtx_SET (Pmode, stack_pointer_rtx, +- plus_constant (Pmode, cfa_reg, +- offset)))); +- } +- +- /* The first part of a frame-related parallel insn +- is always assumed to be relevant to the frame +- calculations; subsequent parts, are only +- frame-related if explicitly marked. */ +- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); +- add_reg_note (insn, REG_CFA_RESTORE, +- gen_rtx_REG (DImode, LR_REGNUM)); +- +- if (fp_offset) +- { +- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, +- GEN_INT (offset))); +- RTX_FRAME_RELATED_P (insn) = 1; + } ++ cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg_fp, cfi_ops); ++ cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg_lr, cfi_ops); + } + else + { + insn = emit_insn (gen_add2_insn (stack_pointer_rtx, + GEN_INT (offset))); +- RTX_FRAME_RELATED_P (insn) = 1; + } ++ cfi_ops = alloc_reg_note (REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (VOIDmode, stack_pointer_rtx, ++ plus_constant (Pmode, cfa_reg, ++ offset)), ++ cfi_ops); ++ REG_NOTES (insn) = cfi_ops; ++ RTX_FRAME_RELATED_P (insn) = 1; + } ++ else ++ gcc_assert (cfi_ops == NULL); + + /* Stack adjustment for exception handler. */ + if (crtl->calls_eh_return) diff --git a/gcc48-color-auto.patch b/gcc48-color-auto.patch new file mode 100644 index 0000000..a8cf938 --- /dev/null +++ b/gcc48-color-auto.patch @@ -0,0 +1,46 @@ +2013-09-20 Jakub Jelinek + + * common.opt (-fdiagnostics-color=): Default to auto. + * toplev.c (process_options): Always default to + -fdiagnostics-color=auto if no -f{,no-}diagnostics-color*. + * doc/invoke.texi (-fdiagnostics-color*): Adjust documentation + of the default. + +--- gcc/common.opt.jj 2013-09-18 12:06:53.000000000 +0200 ++++ gcc/common.opt 2013-09-20 10:00:00.935823900 +0200 +@@ -1037,7 +1037,7 @@ Common Alias(fdiagnostics-color=,always, + ; + + fdiagnostics-color= +-Common Joined RejectNegative Var(flag_diagnostics_show_color) Enum(diagnostic_color_rule) Init(DIAGNOSTICS_COLOR_NO) ++Common Joined RejectNegative Var(flag_diagnostics_show_color) Enum(diagnostic_color_rule) Init(DIAGNOSTICS_COLOR_AUTO) + -fdiagnostics-color=[never|always|auto] Colorize diagnostics + + ; Required for these enum values. +--- gcc/toplev.c.jj 2013-09-09 11:32:39.000000000 +0200 ++++ gcc/toplev.c 2013-09-20 10:10:08.198721005 +0200 +@@ -1229,10 +1229,8 @@ process_options (void) + + maximum_field_alignment = initial_max_fld_align * BITS_PER_UNIT; + +- /* Default to -fdiagnostics-color=auto if GCC_COLORS is in the environment, +- otherwise default to -fdiagnostics-color=never. */ +- if (!global_options_set.x_flag_diagnostics_show_color +- && getenv ("GCC_COLORS")) ++ /* Default to -fdiagnostics-color=auto. */ ++ if (!global_options_set.x_flag_diagnostics_show_color) + pp_show_color (global_dc->printer) + = colorize_init (DIAGNOSTICS_COLOR_AUTO); + +--- gcc/doc/invoke.texi.jj 2013-09-18 12:06:50.000000000 +0200 ++++ gcc/doc/invoke.texi 2013-09-20 10:09:29.079904455 +0200 +@@ -3046,8 +3046,7 @@ a message which is too long to fit on a + @cindex highlight, color, colour + @vindex GCC_COLORS @r{environment variable} + Use color in diagnostics. @var{WHEN} is @samp{never}, @samp{always}, +-or @samp{auto}. The default is @samp{never} if @env{GCC_COLORS} environment +-variable isn't present in the environment, and @samp{auto} otherwise. ++or @samp{auto}. The default is @samp{auto}. + @samp{auto} means to use color only when the standard error is a terminal. + The forms @option{-fdiagnostics-color} and @option{-fno-diagnostics-color} are + aliases for @option{-fdiagnostics-color=always} and diff --git a/gcc48-hack.patch b/gcc48-hack.patch new file mode 100644 index 0000000..1903e95 --- /dev/null +++ b/gcc48-hack.patch @@ -0,0 +1,102 @@ +--- libada/Makefile.in.jj 2009-01-14 12:07:35.000000000 +0100 ++++ libada/Makefile.in 2009-01-15 14:25:33.000000000 +0100 +@@ -66,18 +66,40 @@ libsubdir := $(libdir)/gcc/$(target_nonc + ADA_RTS_DIR=$(GCC_DIR)/ada/rts$(subst /,_,$(MULTISUBDIR)) + ADA_RTS_SUBDIR=./rts$(subst /,_,$(MULTISUBDIR)) + ++DEFAULTMULTIFLAGS := ++ifeq ($(MULTISUBDIR),) ++targ:=$(subst -, ,$(target)) ++arch:=$(word 1,$(targ)) ++ifeq ($(words $(targ)),2) ++osys:=$(word 2,$(targ)) ++else ++osys:=$(word 3,$(targ)) ++endif ++ifeq ($(strip $(filter-out i%86 x86_64 powerpc% ppc% s390% sparc% linux%, $(arch) $(osys))),) ++ifeq ($(shell $(CC) $(CFLAGS) -print-multi-os-directory),../lib64) ++DEFAULTMULTIFLAGS := -m64 ++else ++ifeq ($(strip $(filter-out s390%, $(arch))),) ++DEFAULTMULTIFLAGS := -m31 ++else ++DEFAULTMULTIFLAGS := -m32 ++endif ++endif ++endif ++endif ++ + # exeext should not be used because it's the *host* exeext. We're building + # a *target* library, aren't we?!? Likewise for CC. Still, provide bogus + # definitions just in case something slips through the safety net provided + # by recursive make invocations in gcc/ada/Makefile.in + LIBADA_FLAGS_TO_PASS = \ + "MAKEOVERRIDES=" \ +- "LDFLAGS=$(LDFLAGS)" \ ++ "LDFLAGS=$(LDFLAGS) $(DEFAULTMULTIFLAGS)" \ + "LN_S=$(LN_S)" \ + "SHELL=$(SHELL)" \ +- "GNATLIBFLAGS=$(GNATLIBFLAGS) $(MULTIFLAGS)" \ +- "GNATLIBCFLAGS=$(GNATLIBCFLAGS) $(MULTIFLAGS)" \ +- "GNATLIBCFLAGS_FOR_C=$(GNATLIBCFLAGS_FOR_C) $(MULTIFLAGS)" \ ++ "GNATLIBFLAGS=$(GNATLIBFLAGS) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \ ++ "GNATLIBCFLAGS=$(GNATLIBCFLAGS) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \ ++ "GNATLIBCFLAGS_FOR_C=$(GNATLIBCFLAGS_FOR_C) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \ + "PICFLAG_FOR_TARGET=$(PICFLAG)" \ + "THREAD_KIND=$(THREAD_KIND)" \ + "TRACE=$(TRACE)" \ +@@ -88,7 +110,7 @@ LIBADA_FLAGS_TO_PASS = \ + "exeext=.exeext.should.not.be.used " \ + 'CC=the.host.compiler.should.not.be.needed' \ + "GCC_FOR_TARGET=$(CC)" \ +- "CFLAGS=$(CFLAGS)" ++ "CFLAGS=$(CFLAGS) $(DEFAULTMULTIFLAGS)" + + # Rules to build gnatlib. + .PHONY: gnatlib gnatlib-plain gnatlib-sjlj gnatlib-zcx gnatlib-shared osconstool +--- gcc/ada/sem_util.adb (revision 161677) ++++ gcc/ada/sem_util.adb (working copy) +@@ -5487,7 +5487,7 @@ package body Sem_Util is + Exp : Node_Id; + Assn : Node_Id; + Choice : Node_Id; +- Comp_Type : Entity_Id; ++ Comp_Type : Entity_Id := Empty; + Is_Array_Aggr : Boolean; + + begin +--- config-ml.in.jj 2010-06-30 09:50:44.000000000 +0200 ++++ config-ml.in 2010-07-02 21:24:17.994211151 +0200 +@@ -516,6 +516,8 @@ multi-do: + ADAFLAGS="$(ADAFLAGS) $${flags}" \ + prefix="$(prefix)" \ + exec_prefix="$(exec_prefix)" \ ++ mandir="$(mandir)" \ ++ infodir="$(infodir)" \ + GCJFLAGS="$(GCJFLAGS) $${flags}" \ + GOCFLAGS="$(GOCFLAGS) $${flags}" \ + CXXFLAGS="$(CXXFLAGS) $${flags}" \ +--- libjava/Makefile.am.jj 2010-07-09 11:17:33.729604090 +0200 ++++ libjava/Makefile.am 2010-07-09 13:16:41.894375641 +0200 +@@ -710,7 +710,8 @@ if USE_LIBGCJ_BC + ## later. + @echo Installing dummy lib libgcj_bc.so.1.0.0; \ + rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ +- mv $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0 $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ ++ $(INSTALL) $(INSTALL_STRIP_FLAG) $(here)/.libs/libgcj_bc.so $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ ++ rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0; \ + $(libgcj_bc_dummy_LINK) -xc /dev/null -Wl,-soname,libgcj_bc.so.1 \ + -o $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0 -lgcj || exit; \ + rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1; \ +--- libjava/Makefile.in.jj 2010-07-09 11:17:34.000000000 +0200 ++++ libjava/Makefile.in 2010-07-09 13:18:07.542572270 +0200 +@@ -12665,7 +12665,8 @@ install-exec-hook: install-binPROGRAMS i + install-libexecsubPROGRAMS + @USE_LIBGCJ_BC_TRUE@ @echo Installing dummy lib libgcj_bc.so.1.0.0; \ + @USE_LIBGCJ_BC_TRUE@ rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ +-@USE_LIBGCJ_BC_TRUE@ mv $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0 $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ ++@USE_LIBGCJ_BC_TRUE@ $(INSTALL) $(INSTALL_STRIP_FLAG) $(here)/.libs/libgcj_bc.so $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ ++@USE_LIBGCJ_BC_TRUE@ rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0; \ + @USE_LIBGCJ_BC_TRUE@ $(libgcj_bc_dummy_LINK) -xc /dev/null -Wl,-soname,libgcj_bc.so.1 \ + @USE_LIBGCJ_BC_TRUE@ -o $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0 -lgcj || exit; \ + @USE_LIBGCJ_BC_TRUE@ rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1; \ diff --git a/gcc48-i386-libgomp.patch b/gcc48-i386-libgomp.patch new file mode 100644 index 0000000..520561e --- /dev/null +++ b/gcc48-i386-libgomp.patch @@ -0,0 +1,11 @@ +--- libgomp/configure.tgt.jj 2008-01-10 20:53:48.000000000 +0100 ++++ libgomp/configure.tgt 2008-03-27 12:44:51.000000000 +0100 +@@ -67,7 +67,7 @@ if test $enable_linux_futex = yes; then + ;; + *) + if test -z "$with_arch"; then +- XCFLAGS="${XCFLAGS} -march=i486 -mtune=${target_cpu}" ++ XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic" + fi + esac + ;; diff --git a/gcc48-java-nomulti.patch b/gcc48-java-nomulti.patch new file mode 100644 index 0000000..17334aa --- /dev/null +++ b/gcc48-java-nomulti.patch @@ -0,0 +1,44 @@ +--- libjava/configure.ac.jj 2007-12-07 17:55:50.000000000 +0100 ++++ libjava/configure.ac 2007-12-07 18:36:56.000000000 +0100 +@@ -82,6 +82,13 @@ AC_ARG_ENABLE(java-maintainer-mode, + [allow rebuilding of .class and .h files])) + AM_CONDITIONAL(JAVA_MAINTAINER_MODE, test "$enable_java_maintainer_mode" = yes) + ++AC_ARG_ENABLE(libjava-multilib, ++ AS_HELP_STRING([--enable-libjava-multilib], [build libjava as multilib])) ++if test "$enable_libjava_multilib" = no; then ++ multilib=no ++ ac_configure_args="$ac_configure_args --disable-multilib" ++fi ++ + # It may not be safe to run linking tests in AC_PROG_CC/AC_PROG_CXX. + GCC_NO_EXECUTABLES + +--- libjava/configure.jj 2007-12-07 17:55:50.000000000 +0100 ++++ libjava/configure 2007-12-07 18:39:58.000000000 +0100 +@@ -1021,6 +1021,8 @@ Optional Features: + default=yes + --enable-java-maintainer-mode + allow rebuilding of .class and .h files ++ --enable-libjava-multilib ++ build libjava as multilib + --disable-dependency-tracking speeds up one-time build + --enable-dependency-tracking do not reject slow dependency extractors + --enable-maintainer-mode enable make rules and dependencies not useful +@@ -1973,6 +1975,16 @@ else + fi + + ++# Check whether --enable-libjava-multilib was given. ++if test "${enable_libjava_multilib+set}" = set; then ++ enableval=$enable_libjava_multilib; ++fi ++ ++if test "$enable_libjava_multilib" = no; then ++ multilib=no ++ ac_configure_args="$ac_configure_args --disable-multilib" ++fi ++ + # It may not be safe to run linking tests in AC_PROG_CC/AC_PROG_CXX. + + diff --git a/gcc48-libc-name.patch b/gcc48-libc-name.patch new file mode 100644 index 0000000..1e5cac4 --- /dev/null +++ b/gcc48-libc-name.patch @@ -0,0 +1,126 @@ +2016-02-19 Jakub Jelinek + Bernd Edlinger + + * Make-lang.in: Invoke gperf with -L C++. + * cfns.gperf: Remove prototypes for hash and libc_name_p + inlines. + * cfns.h: Regenerated. + * except.c (nothrow_libfn_p): Adjust. + +--- gcc/cp/Make-lang.in ++++ gcc/cp/Make-lang.in +@@ -112,7 +112,7 @@ else + # deleting the $(srcdir)/cp/cfns.h file. + $(srcdir)/cp/cfns.h: + endif +- gperf -o -C -E -k '1-6,$$' -j1 -D -N 'libc_name_p' -L ANSI-C \ ++ gperf -o -C -E -k '1-6,$$' -j1 -D -N 'libc_name_p' -L C++ \ + $(srcdir)/cp/cfns.gperf --output-file $(srcdir)/cp/cfns.h + + # +--- gcc/cp/cfns.gperf ++++ gcc/cp/cfns.gperf +@@ -1,3 +1,5 @@ ++%language=C++ ++%define class-name libc_name + %{ + /* Copyright (C) 2000-2013 Free Software Foundation, Inc. + +@@ -16,14 +18,6 @@ for more details. + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ +-#ifdef __GNUC__ +-__inline +-#endif +-static unsigned int hash (const char *, unsigned int); +-#ifdef __GNUC__ +-__inline +-#endif +-const char * libc_name_p (const char *, unsigned int); + %} + %% + # The standard C library functions, for feeding to gperf; the result is used +--- gcc/cp/cfns.h ++++ gcc/cp/cfns.h +@@ -1,5 +1,5 @@ +-/* ANSI-C code produced by gperf version 3.0.3 */ +-/* Command-line: gperf -o -C -E -k '1-6,$' -j1 -D -N libc_name_p -L ANSI-C cfns.gperf */ ++/* C++ code produced by gperf version 3.0.4 */ ++/* Command-line: gperf -o -C -E -k '1-6,$' -j1 -D -N libc_name_p -L C++ --output-file cfns.h cfns.gperf */ + + #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ +@@ -28,7 +28,7 @@ + #error "gperf generated tables don't work with this execution character set. Please report a bug to ." + #endif + +-#line 1 "cfns.gperf" ++#line 3 "cfns.gperf" + + /* Copyright (C) 2000-2013 Free Software Foundation, Inc. + +@@ -47,25 +47,18 @@ for more details. + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ +-#ifdef __GNUC__ +-__inline +-#endif +-static unsigned int hash (const char *, unsigned int); +-#ifdef __GNUC__ +-__inline +-#endif +-const char * libc_name_p (const char *, unsigned int); + /* maximum key range = 391, duplicates = 0 */ + +-#ifdef __GNUC__ +-__inline +-#else +-#ifdef __cplusplus +-inline +-#endif +-#endif +-static unsigned int +-hash (register const char *str, register unsigned int len) ++class libc_name ++{ ++private: ++ static inline unsigned int hash (const char *str, unsigned int len); ++public: ++ static const char *libc_name_p (const char *str, unsigned int len); ++}; ++ ++inline unsigned int ++libc_name::hash (register const char *str, register unsigned int len) + { + static const unsigned short asso_values[] = + { +@@ -122,14 +115,8 @@ hash (register const char *str, register + return hval + asso_values[(unsigned char)str[len - 1]]; + } + +-#ifdef __GNUC__ +-__inline +-#ifdef __GNUC_STDC_INLINE__ +-__attribute__ ((__gnu_inline__)) +-#endif +-#endif + const char * +-libc_name_p (register const char *str, register unsigned int len) ++libc_name::libc_name_p (register const char *str, register unsigned int len) + { + enum + { +--- gcc/cp/except.c ++++ gcc/cp/except.c +@@ -1040,7 +1040,8 @@ nothrow_libfn_p (const_tree fn) + unless the system headers are playing rename tricks, and if + they are, we don't want to be confused by them. */ + id = DECL_NAME (fn); +- return !!libc_name_p (IDENTIFIER_POINTER (id), IDENTIFIER_LENGTH (id)); ++ return !!libc_name::libc_name_p (IDENTIFIER_POINTER (id), ++ IDENTIFIER_LENGTH (id)); + } + + /* Returns nonzero if an exception of type FROM will be caught by a diff --git a/gcc48-libgo-p224.patch b/gcc48-libgo-p224.patch new file mode 100644 index 0000000..50461bc --- /dev/null +++ b/gcc48-libgo-p224.patch @@ -0,0 +1,1302 @@ +--- libgo/Makefile.am.jj 2013-12-12 19:01:49.000000000 +0100 ++++ libgo/Makefile.am 2014-02-18 17:31:54.798484657 +0100 +@@ -1109,8 +1109,7 @@ go_crypto_dsa_files = \ + go_crypto_ecdsa_files = \ + go/crypto/ecdsa/ecdsa.go + go_crypto_elliptic_files = \ +- go/crypto/elliptic/elliptic.go \ +- go/crypto/elliptic/p224.go ++ go/crypto/elliptic/elliptic.go + go_crypto_hmac_files = \ + go/crypto/hmac/hmac.go + go_crypto_md5_files = \ +--- libgo/Makefile.in.jj 2013-12-12 19:01:49.000000000 +0100 ++++ libgo/Makefile.in 2014-02-18 17:32:11.350389191 +0100 +@@ -1274,8 +1274,7 @@ go_crypto_ecdsa_files = \ + go/crypto/ecdsa/ecdsa.go + + go_crypto_elliptic_files = \ +- go/crypto/elliptic/elliptic.go \ +- go/crypto/elliptic/p224.go ++ go/crypto/elliptic/elliptic.go + + go_crypto_hmac_files = \ + go/crypto/hmac/hmac.go +--- libgo/go/crypto/elliptic/elliptic.go.jj 2012-12-13 11:32:02.640039537 +0100 ++++ libgo/go/crypto/elliptic/elliptic.go 2014-02-18 17:28:22.909692022 +0100 +@@ -327,7 +327,6 @@ var p384 *CurveParams + var p521 *CurveParams + + func initAll() { +- initP224() + initP256() + initP384() + initP521() +--- libgo/go/crypto/elliptic/elliptic_test.go.jj 2012-12-13 11:32:02.640039537 +0100 ++++ libgo/go/crypto/elliptic/elliptic_test.go 2014-02-18 17:31:04.052774265 +0100 +@@ -5,329 +5,14 @@ + package elliptic + + import ( +- "crypto/rand" +- "encoding/hex" +- "fmt" +- "math/big" + "testing" + ) + +-func TestOnCurve(t *testing.T) { +- p224 := P224() +- if !p224.IsOnCurve(p224.Params().Gx, p224.Params().Gy) { +- t.Errorf("FAIL") +- } +-} +- +-type baseMultTest struct { +- k string +- x, y string +-} +- +-var p224BaseMultTests = []baseMultTest{ +- { +- "1", +- "b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", +- "bd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", +- }, +- { +- "2", +- "706a46dc76dcb76798e60e6d89474788d16dc18032d268fd1a704fa6", +- "1c2b76a7bc25e7702a704fa986892849fca629487acf3709d2e4e8bb", +- }, +- { +- "3", +- "df1b1d66a551d0d31eff822558b9d2cc75c2180279fe0d08fd896d04", +- "a3f7f03cadd0be444c0aa56830130ddf77d317344e1af3591981a925", +- }, +- { +- "4", +- "ae99feebb5d26945b54892092a8aee02912930fa41cd114e40447301", +- "482580a0ec5bc47e88bc8c378632cd196cb3fa058a7114eb03054c9", +- }, +- { +- "5", +- "31c49ae75bce7807cdff22055d94ee9021fedbb5ab51c57526f011aa", +- "27e8bff1745635ec5ba0c9f1c2ede15414c6507d29ffe37e790a079b", +- }, +- { +- "6", +- "1f2483f82572251fca975fea40db821df8ad82a3c002ee6c57112408", +- "89faf0ccb750d99b553c574fad7ecfb0438586eb3952af5b4b153c7e", +- }, +- { +- "7", +- "db2f6be630e246a5cf7d99b85194b123d487e2d466b94b24a03c3e28", +- "f3a30085497f2f611ee2517b163ef8c53b715d18bb4e4808d02b963", +- }, +- { +- "8", +- "858e6f9cc6c12c31f5df124aa77767b05c8bc021bd683d2b55571550", +- "46dcd3ea5c43898c5c5fc4fdac7db39c2f02ebee4e3541d1e78047a", +- }, +- { +- "9", +- "2fdcccfee720a77ef6cb3bfbb447f9383117e3daa4a07e36ed15f78d", +- "371732e4f41bf4f7883035e6a79fcedc0e196eb07b48171697517463", +- }, +- { +- "10", +- "aea9e17a306517eb89152aa7096d2c381ec813c51aa880e7bee2c0fd", +- "39bb30eab337e0a521b6cba1abe4b2b3a3e524c14a3fe3eb116b655f", +- }, +- { +- "11", +- "ef53b6294aca431f0f3c22dc82eb9050324f1d88d377e716448e507c", +- "20b510004092e96636cfb7e32efded8265c266dfb754fa6d6491a6da", +- }, +- { +- "12", +- "6e31ee1dc137f81b056752e4deab1443a481033e9b4c93a3044f4f7a", +- "207dddf0385bfdeab6e9acda8da06b3bbef224a93ab1e9e036109d13", +- }, +- { +- "13", +- "34e8e17a430e43289793c383fac9774247b40e9ebd3366981fcfaeca", +- "252819f71c7fb7fbcb159be337d37d3336d7feb963724fdfb0ecb767", +- }, +- { +- "14", +- "a53640c83dc208603ded83e4ecf758f24c357d7cf48088b2ce01e9fa", +- "d5814cd724199c4a5b974a43685fbf5b8bac69459c9469bc8f23ccaf", +- }, +- { +- "15", +- "baa4d8635511a7d288aebeedd12ce529ff102c91f97f867e21916bf9", +- "979a5f4759f80f4fb4ec2e34f5566d595680a11735e7b61046127989", +- }, +- { +- "16", +- "b6ec4fe1777382404ef679997ba8d1cc5cd8e85349259f590c4c66d", +- "3399d464345906b11b00e363ef429221f2ec720d2f665d7dead5b482", +- }, +- { +- "17", +- "b8357c3a6ceef288310e17b8bfeff9200846ca8c1942497c484403bc", +- "ff149efa6606a6bd20ef7d1b06bd92f6904639dce5174db6cc554a26", +- }, +- { +- "18", +- "c9ff61b040874c0568479216824a15eab1a838a797d189746226e4cc", +- "ea98d60e5ffc9b8fcf999fab1df7e7ef7084f20ddb61bb045a6ce002", +- }, +- { +- "19", +- "a1e81c04f30ce201c7c9ace785ed44cc33b455a022f2acdbc6cae83c", +- "dcf1f6c3db09c70acc25391d492fe25b4a180babd6cea356c04719cd", +- }, +- { +- "20", +- "fcc7f2b45df1cd5a3c0c0731ca47a8af75cfb0347e8354eefe782455", +- "d5d7110274cba7cdee90e1a8b0d394c376a5573db6be0bf2747f530", +- }, +- { +- "112233445566778899", +- "61f077c6f62ed802dad7c2f38f5c67f2cc453601e61bd076bb46179e", +- "2272f9e9f5933e70388ee652513443b5e289dd135dcc0d0299b225e4", +- }, +- { +- "112233445566778899112233445566778899", +- "29895f0af496bfc62b6ef8d8a65c88c613949b03668aab4f0429e35", +- "3ea6e53f9a841f2019ec24bde1a75677aa9b5902e61081c01064de93", +- }, +- { +- "6950511619965839450988900688150712778015737983940691968051900319680", +- "ab689930bcae4a4aa5f5cb085e823e8ae30fd365eb1da4aba9cf0379", +- "3345a121bbd233548af0d210654eb40bab788a03666419be6fbd34e7", +- }, +- { +- "13479972933410060327035789020509431695094902435494295338570602119423", +- "bdb6a8817c1f89da1c2f3dd8e97feb4494f2ed302a4ce2bc7f5f4025", +- "4c7020d57c00411889462d77a5438bb4e97d177700bf7243a07f1680", +- }, +- { +- "13479971751745682581351455311314208093898607229429740618390390702079", +- "d58b61aa41c32dd5eba462647dba75c5d67c83606c0af2bd928446a9", +- "d24ba6a837be0460dd107ae77725696d211446c5609b4595976b16bd", +- }, +- { +- "13479972931865328106486971546324465392952975980343228160962702868479", +- "dc9fa77978a005510980e929a1485f63716df695d7a0c18bb518df03", +- "ede2b016f2ddffc2a8c015b134928275ce09e5661b7ab14ce0d1d403", +- }, +- { +- "11795773708834916026404142434151065506931607341523388140225443265536", +- "499d8b2829cfb879c901f7d85d357045edab55028824d0f05ba279ba", +- "bf929537b06e4015919639d94f57838fa33fc3d952598dcdbb44d638", +- }, +- { +- "784254593043826236572847595991346435467177662189391577090", +- "8246c999137186632c5f9eddf3b1b0e1764c5e8bd0e0d8a554b9cb77", +- "e80ed8660bc1cb17ac7d845be40a7a022d3306f116ae9f81fea65947", +- }, +- { +- "13479767645505654746623887797783387853576174193480695826442858012671", +- "6670c20afcceaea672c97f75e2e9dd5c8460e54bb38538ebb4bd30eb", +- "f280d8008d07a4caf54271f993527d46ff3ff46fd1190a3f1faa4f74", +- }, +- { +- "205688069665150753842126177372015544874550518966168735589597183", +- "eca934247425cfd949b795cb5ce1eff401550386e28d1a4c5a8eb", +- "d4c01040dba19628931bc8855370317c722cbd9ca6156985f1c2e9ce", +- }, +- { +- "13479966930919337728895168462090683249159702977113823384618282123295", +- "ef353bf5c73cd551b96d596fbc9a67f16d61dd9fe56af19de1fba9cd", +- "21771b9cdce3e8430c09b3838be70b48c21e15bc09ee1f2d7945b91f", +- }, +- { +- "50210731791415612487756441341851895584393717453129007497216", +- "4036052a3091eb481046ad3289c95d3ac905ca0023de2c03ecd451cf", +- "d768165a38a2b96f812586a9d59d4136035d9c853a5bf2e1c86a4993", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368041", +- "fcc7f2b45df1cd5a3c0c0731ca47a8af75cfb0347e8354eefe782455", +- "f2a28eefd8b345832116f1e574f2c6b2c895aa8c24941f40d8b80ad1", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368042", +- "a1e81c04f30ce201c7c9ace785ed44cc33b455a022f2acdbc6cae83c", +- "230e093c24f638f533dac6e2b6d01da3b5e7f45429315ca93fb8e634", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368043", +- "c9ff61b040874c0568479216824a15eab1a838a797d189746226e4cc", +- "156729f1a003647030666054e208180f8f7b0df2249e44fba5931fff", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368044", +- "b8357c3a6ceef288310e17b8bfeff9200846ca8c1942497c484403bc", +- "eb610599f95942df1082e4f9426d086fb9c6231ae8b24933aab5db", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368045", +- "b6ec4fe1777382404ef679997ba8d1cc5cd8e85349259f590c4c66d", +- "cc662b9bcba6f94ee4ff1c9c10bd6ddd0d138df2d099a282152a4b7f", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368046", +- "baa4d8635511a7d288aebeedd12ce529ff102c91f97f867e21916bf9", +- "6865a0b8a607f0b04b13d1cb0aa992a5a97f5ee8ca1849efb9ed8678", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368047", +- "a53640c83dc208603ded83e4ecf758f24c357d7cf48088b2ce01e9fa", +- "2a7eb328dbe663b5a468b5bc97a040a3745396ba636b964370dc3352", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368048", +- "34e8e17a430e43289793c383fac9774247b40e9ebd3366981fcfaeca", +- "dad7e608e380480434ea641cc82c82cbc92801469c8db0204f13489a", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368049", +- "6e31ee1dc137f81b056752e4deab1443a481033e9b4c93a3044f4f7a", +- "df82220fc7a4021549165325725f94c3410ddb56c54e161fc9ef62ee", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368050", +- "ef53b6294aca431f0f3c22dc82eb9050324f1d88d377e716448e507c", +- "df4aefffbf6d1699c930481cd102127c9a3d992048ab05929b6e5927", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368051", +- "aea9e17a306517eb89152aa7096d2c381ec813c51aa880e7bee2c0fd", +- "c644cf154cc81f5ade49345e541b4d4b5c1adb3eb5c01c14ee949aa2", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368052", +- "2fdcccfee720a77ef6cb3bfbb447f9383117e3daa4a07e36ed15f78d", +- "c8e8cd1b0be40b0877cfca1958603122f1e6914f84b7e8e968ae8b9e", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368053", +- "858e6f9cc6c12c31f5df124aa77767b05c8bc021bd683d2b55571550", +- "fb9232c15a3bc7673a3a03b0253824c53d0fd1411b1cabe2e187fb87", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368054", +- "db2f6be630e246a5cf7d99b85194b123d487e2d466b94b24a03c3e28", +- "f0c5cff7ab680d09ee11dae84e9c1072ac48ea2e744b1b7f72fd469e", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368055", +- "1f2483f82572251fca975fea40db821df8ad82a3c002ee6c57112408", +- "76050f3348af2664aac3a8b05281304ebc7a7914c6ad50a4b4eac383", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368056", +- "31c49ae75bce7807cdff22055d94ee9021fedbb5ab51c57526f011aa", +- "d817400e8ba9ca13a45f360e3d121eaaeb39af82d6001c8186f5f866", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368057", +- "ae99feebb5d26945b54892092a8aee02912930fa41cd114e40447301", +- "fb7da7f5f13a43b81774373c879cd32d6934c05fa758eeb14fcfab38", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368058", +- "df1b1d66a551d0d31eff822558b9d2cc75c2180279fe0d08fd896d04", +- "5c080fc3522f41bbb3f55a97cfecf21f882ce8cbb1e50ca6e67e56dc", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368059", +- "706a46dc76dcb76798e60e6d89474788d16dc18032d268fd1a704fa6", +- "e3d4895843da188fd58fb0567976d7b50359d6b78530c8f62d1b1746", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368060", +- "b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", +- "42c89c774a08dc04b3dd201932bc8a5ea5f8b89bbb2a7e667aff81cd", +- }, +-} +- +-func TestBaseMult(t *testing.T) { +- p224 := P224() +- for i, e := range p224BaseMultTests { +- k, ok := new(big.Int).SetString(e.k, 10) +- if !ok { +- t.Errorf("%d: bad value for k: %s", i, e.k) +- } +- x, y := p224.ScalarBaseMult(k.Bytes()) +- if fmt.Sprintf("%x", x) != e.x || fmt.Sprintf("%x", y) != e.y { +- t.Errorf("%d: bad output for k=%s: got (%x, %x), want (%s, %s)", i, e.k, x, y, e.x, e.y) +- } +- if testing.Short() && i > 5 { +- break +- } +- } +-} +- +-func TestGenericBaseMult(t *testing.T) { +- // We use the P224 CurveParams directly in order to test the generic implementation. +- p224 := P224().Params() +- for i, e := range p224BaseMultTests { +- k, ok := new(big.Int).SetString(e.k, 10) +- if !ok { +- t.Errorf("%d: bad value for k: %s", i, e.k) +- } +- x, y := p224.ScalarBaseMult(k.Bytes()) +- if fmt.Sprintf("%x", x) != e.x || fmt.Sprintf("%x", y) != e.y { +- t.Errorf("%d: bad output for k=%s: got (%x, %x), want (%s, %s)", i, e.k, x, y, e.x, e.y) +- } +- if testing.Short() && i > 5 { +- break +- } +- } +-} +- + func TestInfinity(t *testing.T) { + tests := []struct { + name string + curve Curve + }{ +- {"p224", P224()}, + {"p256", P256()}, + } + +@@ -359,43 +44,3 @@ func TestInfinity(t *testing.T) { + } + } + } +- +-func BenchmarkBaseMult(b *testing.B) { +- b.ResetTimer() +- p224 := P224() +- e := p224BaseMultTests[25] +- k, _ := new(big.Int).SetString(e.k, 10) +- b.StartTimer() +- for i := 0; i < b.N; i++ { +- p224.ScalarBaseMult(k.Bytes()) +- } +-} +- +-func TestMarshal(t *testing.T) { +- p224 := P224() +- _, x, y, err := GenerateKey(p224, rand.Reader) +- if err != nil { +- t.Error(err) +- return +- } +- serialized := Marshal(p224, x, y) +- xx, yy := Unmarshal(p224, serialized) +- if xx == nil { +- t.Error("failed to unmarshal") +- return +- } +- if xx.Cmp(x) != 0 || yy.Cmp(y) != 0 { +- t.Error("unmarshal returned different values") +- return +- } +-} +- +-func TestP224Overflow(t *testing.T) { +- // This tests for a specific bug in the P224 implementation. +- p224 := P224() +- pointData, _ := hex.DecodeString("049B535B45FB0A2072398A6831834624C7E32CCFD5A4B933BCEAF77F1DD945E08BBE5178F5EDF5E733388F196D2A631D2E075BB16CBFEEA15B") +- x, y := Unmarshal(p224, pointData) +- if !p224.IsOnCurve(x, y) { +- t.Error("P224 failed to validate a correct point") +- } +-} +--- libgo/go/crypto/ecdsa/ecdsa_test.go.jj 2012-12-13 11:32:02.589039782 +0100 ++++ libgo/go/crypto/ecdsa/ecdsa_test.go 2014-02-18 17:28:22.909692022 +0100 +@@ -33,7 +33,6 @@ func testKeyGeneration(t *testing.T, c e + } + + func TestKeyGeneration(t *testing.T) { +- testKeyGeneration(t, elliptic.P224(), "p224") + if testing.Short() { + return + } +@@ -63,7 +62,6 @@ func testSignAndVerify(t *testing.T, c e + } + + func TestSignAndVerify(t *testing.T) { +- testSignAndVerify(t, elliptic.P224(), "p224") + if testing.Short() { + return + } +@@ -129,8 +127,6 @@ func TestVectors(t *testing.T) { + parts := strings.SplitN(line, ",", 2) + + switch parts[0] { +- case "P-224": +- pub.Curve = elliptic.P224() + case "P-256": + pub.Curve = elliptic.P256() + case "P-384": +--- libgo/go/crypto/x509/x509.go.jj 2013-08-14 13:55:08.939843607 +0200 ++++ libgo/go/crypto/x509/x509.go 2014-02-18 17:28:22.943691764 +0100 +@@ -283,9 +283,6 @@ func getPublicKeyAlgorithmFromOID(oid as + + // RFC 5480, 2.1.1.1. Named Curve + // +-// secp224r1 OBJECT IDENTIFIER ::= { +-// iso(1) identified-organization(3) certicom(132) curve(0) 33 } +-// + // secp256r1 OBJECT IDENTIFIER ::= { + // iso(1) member-body(2) us(840) ansi-X9-62(10045) curves(3) + // prime(1) 7 } +@@ -298,7 +295,6 @@ func getPublicKeyAlgorithmFromOID(oid as + // + // NB: secp256r1 is equivalent to prime256v1 + var ( +- oidNamedCurveP224 = asn1.ObjectIdentifier{1, 3, 132, 0, 33} + oidNamedCurveP256 = asn1.ObjectIdentifier{1, 2, 840, 10045, 3, 1, 7} + oidNamedCurveP384 = asn1.ObjectIdentifier{1, 3, 132, 0, 34} + oidNamedCurveP521 = asn1.ObjectIdentifier{1, 3, 132, 0, 35} +@@ -306,8 +302,6 @@ var ( + + func namedCurveFromOID(oid asn1.ObjectIdentifier) elliptic.Curve { + switch { +- case oid.Equal(oidNamedCurveP224): +- return elliptic.P224() + case oid.Equal(oidNamedCurveP256): + return elliptic.P256() + case oid.Equal(oidNamedCurveP384): +@@ -320,8 +314,6 @@ func namedCurveFromOID(oid asn1.ObjectId + + func oidFromNamedCurve(curve elliptic.Curve) (asn1.ObjectIdentifier, bool) { + switch curve { +- case elliptic.P224(): +- return oidNamedCurveP224, true + case elliptic.P256(): + return oidNamedCurveP256, true + case elliptic.P384(): +@@ -1212,7 +1204,7 @@ func CreateCertificate(rand io.Reader, t + hashFunc = crypto.SHA1 + case *ecdsa.PrivateKey: + switch priv.Curve { +- case elliptic.P224(), elliptic.P256(): ++ case elliptic.P256(): + hashFunc = crypto.SHA256 + signatureAlgorithm.Algorithm = oidSignatureECDSAWithSHA256 + case elliptic.P384(): +--- libgo/go/crypto/elliptic/p224.go.jj 2012-12-13 11:32:02.641039533 +0100 ++++ libgo/go/crypto/elliptic/p224.go 2014-02-15 11:40:56.191557928 +0100 +@@ -1,765 +0,0 @@ +-// Copyright 2012 The Go Authors. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. +- +-package elliptic +- +-// This is a constant-time, 32-bit implementation of P224. See FIPS 186-3, +-// section D.2.2. +-// +-// See http://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background. +- +-import ( +- "math/big" +-) +- +-var p224 p224Curve +- +-type p224Curve struct { +- *CurveParams +- gx, gy, b p224FieldElement +-} +- +-func initP224() { +- // See FIPS 186-3, section D.2.2 +- p224.CurveParams = new(CurveParams) +- p224.P, _ = new(big.Int).SetString("26959946667150639794667015087019630673557916260026308143510066298881", 10) +- p224.N, _ = new(big.Int).SetString("26959946667150639794667015087019625940457807714424391721682722368061", 10) +- p224.B, _ = new(big.Int).SetString("b4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4", 16) +- p224.Gx, _ = new(big.Int).SetString("b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", 16) +- p224.Gy, _ = new(big.Int).SetString("bd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", 16) +- p224.BitSize = 224 +- +- p224FromBig(&p224.gx, p224.Gx) +- p224FromBig(&p224.gy, p224.Gy) +- p224FromBig(&p224.b, p224.B) +-} +- +-// P224 returns a Curve which implements P-224 (see FIPS 186-3, section D.2.2) +-func P224() Curve { +- initonce.Do(initAll) +- return p224 +-} +- +-func (curve p224Curve) Params() *CurveParams { +- return curve.CurveParams +-} +- +-func (curve p224Curve) IsOnCurve(bigX, bigY *big.Int) bool { +- var x, y p224FieldElement +- p224FromBig(&x, bigX) +- p224FromBig(&y, bigY) +- +- // y² = x³ - 3x + b +- var tmp p224LargeFieldElement +- var x3 p224FieldElement +- p224Square(&x3, &x, &tmp) +- p224Mul(&x3, &x3, &x, &tmp) +- +- for i := 0; i < 8; i++ { +- x[i] *= 3 +- } +- p224Sub(&x3, &x3, &x) +- p224Reduce(&x3) +- p224Add(&x3, &x3, &curve.b) +- p224Contract(&x3, &x3) +- +- p224Square(&y, &y, &tmp) +- p224Contract(&y, &y) +- +- for i := 0; i < 8; i++ { +- if y[i] != x3[i] { +- return false +- } +- } +- return true +-} +- +-func (p224Curve) Add(bigX1, bigY1, bigX2, bigY2 *big.Int) (x, y *big.Int) { +- var x1, y1, z1, x2, y2, z2, x3, y3, z3 p224FieldElement +- +- p224FromBig(&x1, bigX1) +- p224FromBig(&y1, bigY1) +- if bigX1.Sign() != 0 || bigY1.Sign() != 0 { +- z1[0] = 1 +- } +- p224FromBig(&x2, bigX2) +- p224FromBig(&y2, bigY2) +- if bigX2.Sign() != 0 || bigY2.Sign() != 0 { +- z2[0] = 1 +- } +- +- p224AddJacobian(&x3, &y3, &z3, &x1, &y1, &z1, &x2, &y2, &z2) +- return p224ToAffine(&x3, &y3, &z3) +-} +- +-func (p224Curve) Double(bigX1, bigY1 *big.Int) (x, y *big.Int) { +- var x1, y1, z1, x2, y2, z2 p224FieldElement +- +- p224FromBig(&x1, bigX1) +- p224FromBig(&y1, bigY1) +- z1[0] = 1 +- +- p224DoubleJacobian(&x2, &y2, &z2, &x1, &y1, &z1) +- return p224ToAffine(&x2, &y2, &z2) +-} +- +-func (p224Curve) ScalarMult(bigX1, bigY1 *big.Int, scalar []byte) (x, y *big.Int) { +- var x1, y1, z1, x2, y2, z2 p224FieldElement +- +- p224FromBig(&x1, bigX1) +- p224FromBig(&y1, bigY1) +- z1[0] = 1 +- +- p224ScalarMult(&x2, &y2, &z2, &x1, &y1, &z1, scalar) +- return p224ToAffine(&x2, &y2, &z2) +-} +- +-func (curve p224Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { +- var z1, x2, y2, z2 p224FieldElement +- +- z1[0] = 1 +- p224ScalarMult(&x2, &y2, &z2, &curve.gx, &curve.gy, &z1, scalar) +- return p224ToAffine(&x2, &y2, &z2) +-} +- +-// Field element functions. +-// +-// The field that we're dealing with is ℤ/pℤ where p = 2**224 - 2**96 + 1. +-// +-// Field elements are represented by a FieldElement, which is a typedef to an +-// array of 8 uint32's. The value of a FieldElement, a, is: +-// a[0] + 2**28·a[1] + 2**56·a[1] + ... + 2**196·a[7] +-// +-// Using 28-bit limbs means that there's only 4 bits of headroom, which is less +-// than we would really like. But it has the useful feature that we hit 2**224 +-// exactly, making the reflections during a reduce much nicer. +-type p224FieldElement [8]uint32 +- +-// p224P is the order of the field, represented as a p224FieldElement. +-var p224P = [8]uint32{1, 0, 0, 0xffff000, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff} +- +-// p224IsZero returns 1 if a == 0 mod p and 0 otherwise. +-// +-// a[i] < 2**29 +-func p224IsZero(a *p224FieldElement) uint32 { +- // Since a p224FieldElement contains 224 bits there are two possible +- // representations of 0: 0 and p. +- var minimal p224FieldElement +- p224Contract(&minimal, a) +- +- var isZero, isP uint32 +- for i, v := range minimal { +- isZero |= v +- isP |= v - p224P[i] +- } +- +- // If either isZero or isP is 0, then we should return 1. +- isZero |= isZero >> 16 +- isZero |= isZero >> 8 +- isZero |= isZero >> 4 +- isZero |= isZero >> 2 +- isZero |= isZero >> 1 +- +- isP |= isP >> 16 +- isP |= isP >> 8 +- isP |= isP >> 4 +- isP |= isP >> 2 +- isP |= isP >> 1 +- +- // For isZero and isP, the LSB is 0 iff all the bits are zero. +- result := isZero & isP +- result = (^result) & 1 +- +- return result +-} +- +-// p224Add computes *out = a+b +-// +-// a[i] + b[i] < 2**32 +-func p224Add(out, a, b *p224FieldElement) { +- for i := 0; i < 8; i++ { +- out[i] = a[i] + b[i] +- } +-} +- +-const two31p3 = 1<<31 + 1<<3 +-const two31m3 = 1<<31 - 1<<3 +-const two31m15m3 = 1<<31 - 1<<15 - 1<<3 +- +-// p224ZeroModP31 is 0 mod p where bit 31 is set in all limbs so that we can +-// subtract smaller amounts without underflow. See the section "Subtraction" in +-// [1] for reasoning. +-var p224ZeroModP31 = []uint32{two31p3, two31m3, two31m3, two31m15m3, two31m3, two31m3, two31m3, two31m3} +- +-// p224Sub computes *out = a-b +-// +-// a[i], b[i] < 2**30 +-// out[i] < 2**32 +-func p224Sub(out, a, b *p224FieldElement) { +- for i := 0; i < 8; i++ { +- out[i] = a[i] + p224ZeroModP31[i] - b[i] +- } +-} +- +-// LargeFieldElement also represents an element of the field. The limbs are +-// still spaced 28-bits apart and in little-endian order. So the limbs are at +-// 0, 28, 56, ..., 392 bits, each 64-bits wide. +-type p224LargeFieldElement [15]uint64 +- +-const two63p35 = 1<<63 + 1<<35 +-const two63m35 = 1<<63 - 1<<35 +-const two63m35m19 = 1<<63 - 1<<35 - 1<<19 +- +-// p224ZeroModP63 is 0 mod p where bit 63 is set in all limbs. See the section +-// "Subtraction" in [1] for why. +-var p224ZeroModP63 = [8]uint64{two63p35, two63m35, two63m35, two63m35, two63m35m19, two63m35, two63m35, two63m35} +- +-const bottom12Bits = 0xfff +-const bottom28Bits = 0xfffffff +- +-// p224Mul computes *out = a*b +-// +-// a[i] < 2**29, b[i] < 2**30 (or vice versa) +-// out[i] < 2**29 +-func p224Mul(out, a, b *p224FieldElement, tmp *p224LargeFieldElement) { +- for i := 0; i < 15; i++ { +- tmp[i] = 0 +- } +- +- for i := 0; i < 8; i++ { +- for j := 0; j < 8; j++ { +- tmp[i+j] += uint64(a[i]) * uint64(b[j]) +- } +- } +- +- p224ReduceLarge(out, tmp) +-} +- +-// Square computes *out = a*a +-// +-// a[i] < 2**29 +-// out[i] < 2**29 +-func p224Square(out, a *p224FieldElement, tmp *p224LargeFieldElement) { +- for i := 0; i < 15; i++ { +- tmp[i] = 0 +- } +- +- for i := 0; i < 8; i++ { +- for j := 0; j <= i; j++ { +- r := uint64(a[i]) * uint64(a[j]) +- if i == j { +- tmp[i+j] += r +- } else { +- tmp[i+j] += r << 1 +- } +- } +- } +- +- p224ReduceLarge(out, tmp) +-} +- +-// ReduceLarge converts a p224LargeFieldElement to a p224FieldElement. +-// +-// in[i] < 2**62 +-func p224ReduceLarge(out *p224FieldElement, in *p224LargeFieldElement) { +- for i := 0; i < 8; i++ { +- in[i] += p224ZeroModP63[i] +- } +- +- // Eliminate the coefficients at 2**224 and greater. +- for i := 14; i >= 8; i-- { +- in[i-8] -= in[i] +- in[i-5] += (in[i] & 0xffff) << 12 +- in[i-4] += in[i] >> 16 +- } +- in[8] = 0 +- // in[0..8] < 2**64 +- +- // As the values become small enough, we start to store them in |out| +- // and use 32-bit operations. +- for i := 1; i < 8; i++ { +- in[i+1] += in[i] >> 28 +- out[i] = uint32(in[i] & bottom28Bits) +- } +- in[0] -= in[8] +- out[3] += uint32(in[8]&0xffff) << 12 +- out[4] += uint32(in[8] >> 16) +- // in[0] < 2**64 +- // out[3] < 2**29 +- // out[4] < 2**29 +- // out[1,2,5..7] < 2**28 +- +- out[0] = uint32(in[0] & bottom28Bits) +- out[1] += uint32((in[0] >> 28) & bottom28Bits) +- out[2] += uint32(in[0] >> 56) +- // out[0] < 2**28 +- // out[1..4] < 2**29 +- // out[5..7] < 2**28 +-} +- +-// Reduce reduces the coefficients of a to smaller bounds. +-// +-// On entry: a[i] < 2**31 + 2**30 +-// On exit: a[i] < 2**29 +-func p224Reduce(a *p224FieldElement) { +- for i := 0; i < 7; i++ { +- a[i+1] += a[i] >> 28 +- a[i] &= bottom28Bits +- } +- top := a[7] >> 28 +- a[7] &= bottom28Bits +- +- // top < 2**4 +- mask := top +- mask |= mask >> 2 +- mask |= mask >> 1 +- mask <<= 31 +- mask = uint32(int32(mask) >> 31) +- // Mask is all ones if top != 0, all zero otherwise +- +- a[0] -= top +- a[3] += top << 12 +- +- // We may have just made a[0] negative but, if we did, then we must +- // have added something to a[3], this it's > 2**12. Therefore we can +- // carry down to a[0]. +- a[3] -= 1 & mask +- a[2] += mask & (1<<28 - 1) +- a[1] += mask & (1<<28 - 1) +- a[0] += mask & (1 << 28) +-} +- +-// p224Invert calculates *out = in**-1 by computing in**(2**224 - 2**96 - 1), +-// i.e. Fermat's little theorem. +-func p224Invert(out, in *p224FieldElement) { +- var f1, f2, f3, f4 p224FieldElement +- var c p224LargeFieldElement +- +- p224Square(&f1, in, &c) // 2 +- p224Mul(&f1, &f1, in, &c) // 2**2 - 1 +- p224Square(&f1, &f1, &c) // 2**3 - 2 +- p224Mul(&f1, &f1, in, &c) // 2**3 - 1 +- p224Square(&f2, &f1, &c) // 2**4 - 2 +- p224Square(&f2, &f2, &c) // 2**5 - 4 +- p224Square(&f2, &f2, &c) // 2**6 - 8 +- p224Mul(&f1, &f1, &f2, &c) // 2**6 - 1 +- p224Square(&f2, &f1, &c) // 2**7 - 2 +- for i := 0; i < 5; i++ { // 2**12 - 2**6 +- p224Square(&f2, &f2, &c) +- } +- p224Mul(&f2, &f2, &f1, &c) // 2**12 - 1 +- p224Square(&f3, &f2, &c) // 2**13 - 2 +- for i := 0; i < 11; i++ { // 2**24 - 2**12 +- p224Square(&f3, &f3, &c) +- } +- p224Mul(&f2, &f3, &f2, &c) // 2**24 - 1 +- p224Square(&f3, &f2, &c) // 2**25 - 2 +- for i := 0; i < 23; i++ { // 2**48 - 2**24 +- p224Square(&f3, &f3, &c) +- } +- p224Mul(&f3, &f3, &f2, &c) // 2**48 - 1 +- p224Square(&f4, &f3, &c) // 2**49 - 2 +- for i := 0; i < 47; i++ { // 2**96 - 2**48 +- p224Square(&f4, &f4, &c) +- } +- p224Mul(&f3, &f3, &f4, &c) // 2**96 - 1 +- p224Square(&f4, &f3, &c) // 2**97 - 2 +- for i := 0; i < 23; i++ { // 2**120 - 2**24 +- p224Square(&f4, &f4, &c) +- } +- p224Mul(&f2, &f4, &f2, &c) // 2**120 - 1 +- for i := 0; i < 6; i++ { // 2**126 - 2**6 +- p224Square(&f2, &f2, &c) +- } +- p224Mul(&f1, &f1, &f2, &c) // 2**126 - 1 +- p224Square(&f1, &f1, &c) // 2**127 - 2 +- p224Mul(&f1, &f1, in, &c) // 2**127 - 1 +- for i := 0; i < 97; i++ { // 2**224 - 2**97 +- p224Square(&f1, &f1, &c) +- } +- p224Mul(out, &f1, &f3, &c) // 2**224 - 2**96 - 1 +-} +- +-// p224Contract converts a FieldElement to its unique, minimal form. +-// +-// On entry, in[i] < 2**29 +-// On exit, in[i] < 2**28 +-func p224Contract(out, in *p224FieldElement) { +- copy(out[:], in[:]) +- +- for i := 0; i < 7; i++ { +- out[i+1] += out[i] >> 28 +- out[i] &= bottom28Bits +- } +- top := out[7] >> 28 +- out[7] &= bottom28Bits +- +- out[0] -= top +- out[3] += top << 12 +- +- // We may just have made out[i] negative. So we carry down. If we made +- // out[0] negative then we know that out[3] is sufficiently positive +- // because we just added to it. +- for i := 0; i < 3; i++ { +- mask := uint32(int32(out[i]) >> 31) +- out[i] += (1 << 28) & mask +- out[i+1] -= 1 & mask +- } +- +- // We might have pushed out[3] over 2**28 so we perform another, partial, +- // carry chain. +- for i := 3; i < 7; i++ { +- out[i+1] += out[i] >> 28 +- out[i] &= bottom28Bits +- } +- top = out[7] >> 28 +- out[7] &= bottom28Bits +- +- // Eliminate top while maintaining the same value mod p. +- out[0] -= top +- out[3] += top << 12 +- +- // There are two cases to consider for out[3]: +- // 1) The first time that we eliminated top, we didn't push out[3] over +- // 2**28. In this case, the partial carry chain didn't change any values +- // and top is zero. +- // 2) We did push out[3] over 2**28 the first time that we eliminated top. +- // The first value of top was in [0..16), therefore, prior to eliminating +- // the first top, 0xfff1000 <= out[3] <= 0xfffffff. Therefore, after +- // overflowing and being reduced by the second carry chain, out[3] <= +- // 0xf000. Thus it cannot have overflowed when we eliminated top for the +- // second time. +- +- // Again, we may just have made out[0] negative, so do the same carry down. +- // As before, if we made out[0] negative then we know that out[3] is +- // sufficiently positive. +- for i := 0; i < 3; i++ { +- mask := uint32(int32(out[i]) >> 31) +- out[i] += (1 << 28) & mask +- out[i+1] -= 1 & mask +- } +- +- // Now we see if the value is >= p and, if so, subtract p. +- +- // First we build a mask from the top four limbs, which must all be +- // equal to bottom28Bits if the whole value is >= p. If top4AllOnes +- // ends up with any zero bits in the bottom 28 bits, then this wasn't +- // true. +- top4AllOnes := uint32(0xffffffff) +- for i := 4; i < 8; i++ { +- top4AllOnes &= out[i] +- } +- top4AllOnes |= 0xf0000000 +- // Now we replicate any zero bits to all the bits in top4AllOnes. +- top4AllOnes &= top4AllOnes >> 16 +- top4AllOnes &= top4AllOnes >> 8 +- top4AllOnes &= top4AllOnes >> 4 +- top4AllOnes &= top4AllOnes >> 2 +- top4AllOnes &= top4AllOnes >> 1 +- top4AllOnes = uint32(int32(top4AllOnes<<31) >> 31) +- +- // Now we test whether the bottom three limbs are non-zero. +- bottom3NonZero := out[0] | out[1] | out[2] +- bottom3NonZero |= bottom3NonZero >> 16 +- bottom3NonZero |= bottom3NonZero >> 8 +- bottom3NonZero |= bottom3NonZero >> 4 +- bottom3NonZero |= bottom3NonZero >> 2 +- bottom3NonZero |= bottom3NonZero >> 1 +- bottom3NonZero = uint32(int32(bottom3NonZero<<31) >> 31) +- +- // Everything depends on the value of out[3]. +- // If it's > 0xffff000 and top4AllOnes != 0 then the whole value is >= p +- // If it's = 0xffff000 and top4AllOnes != 0 and bottom3NonZero != 0, +- // then the whole value is >= p +- // If it's < 0xffff000, then the whole value is < p +- n := out[3] - 0xffff000 +- out3Equal := n +- out3Equal |= out3Equal >> 16 +- out3Equal |= out3Equal >> 8 +- out3Equal |= out3Equal >> 4 +- out3Equal |= out3Equal >> 2 +- out3Equal |= out3Equal >> 1 +- out3Equal = ^uint32(int32(out3Equal<<31) >> 31) +- +- // If out[3] > 0xffff000 then n's MSB will be zero. +- out3GT := ^uint32(int32(n) >> 31) +- +- mask := top4AllOnes & ((out3Equal & bottom3NonZero) | out3GT) +- out[0] -= 1 & mask +- out[3] -= 0xffff000 & mask +- out[4] -= 0xfffffff & mask +- out[5] -= 0xfffffff & mask +- out[6] -= 0xfffffff & mask +- out[7] -= 0xfffffff & mask +-} +- +-// Group element functions. +-// +-// These functions deal with group elements. The group is an elliptic curve +-// group with a = -3 defined in FIPS 186-3, section D.2.2. +- +-// p224AddJacobian computes *out = a+b where a != b. +-func p224AddJacobian(x3, y3, z3, x1, y1, z1, x2, y2, z2 *p224FieldElement) { +- // See http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-p224Add-2007-bl +- var z1z1, z2z2, u1, u2, s1, s2, h, i, j, r, v p224FieldElement +- var c p224LargeFieldElement +- +- z1IsZero := p224IsZero(z1) +- z2IsZero := p224IsZero(z2) +- +- // Z1Z1 = Z1² +- p224Square(&z1z1, z1, &c) +- // Z2Z2 = Z2² +- p224Square(&z2z2, z2, &c) +- // U1 = X1*Z2Z2 +- p224Mul(&u1, x1, &z2z2, &c) +- // U2 = X2*Z1Z1 +- p224Mul(&u2, x2, &z1z1, &c) +- // S1 = Y1*Z2*Z2Z2 +- p224Mul(&s1, z2, &z2z2, &c) +- p224Mul(&s1, y1, &s1, &c) +- // S2 = Y2*Z1*Z1Z1 +- p224Mul(&s2, z1, &z1z1, &c) +- p224Mul(&s2, y2, &s2, &c) +- // H = U2-U1 +- p224Sub(&h, &u2, &u1) +- p224Reduce(&h) +- xEqual := p224IsZero(&h) +- // I = (2*H)² +- for j := 0; j < 8; j++ { +- i[j] = h[j] << 1 +- } +- p224Reduce(&i) +- p224Square(&i, &i, &c) +- // J = H*I +- p224Mul(&j, &h, &i, &c) +- // r = 2*(S2-S1) +- p224Sub(&r, &s2, &s1) +- p224Reduce(&r) +- yEqual := p224IsZero(&r) +- if xEqual == 1 && yEqual == 1 && z1IsZero == 0 && z2IsZero == 0 { +- p224DoubleJacobian(x3, y3, z3, x1, y1, z1) +- return +- } +- for i := 0; i < 8; i++ { +- r[i] <<= 1 +- } +- p224Reduce(&r) +- // V = U1*I +- p224Mul(&v, &u1, &i, &c) +- // Z3 = ((Z1+Z2)²-Z1Z1-Z2Z2)*H +- p224Add(&z1z1, &z1z1, &z2z2) +- p224Add(&z2z2, z1, z2) +- p224Reduce(&z2z2) +- p224Square(&z2z2, &z2z2, &c) +- p224Sub(z3, &z2z2, &z1z1) +- p224Reduce(z3) +- p224Mul(z3, z3, &h, &c) +- // X3 = r²-J-2*V +- for i := 0; i < 8; i++ { +- z1z1[i] = v[i] << 1 +- } +- p224Add(&z1z1, &j, &z1z1) +- p224Reduce(&z1z1) +- p224Square(x3, &r, &c) +- p224Sub(x3, x3, &z1z1) +- p224Reduce(x3) +- // Y3 = r*(V-X3)-2*S1*J +- for i := 0; i < 8; i++ { +- s1[i] <<= 1 +- } +- p224Mul(&s1, &s1, &j, &c) +- p224Sub(&z1z1, &v, x3) +- p224Reduce(&z1z1) +- p224Mul(&z1z1, &z1z1, &r, &c) +- p224Sub(y3, &z1z1, &s1) +- p224Reduce(y3) +- +- p224CopyConditional(x3, x2, z1IsZero) +- p224CopyConditional(x3, x1, z2IsZero) +- p224CopyConditional(y3, y2, z1IsZero) +- p224CopyConditional(y3, y1, z2IsZero) +- p224CopyConditional(z3, z2, z1IsZero) +- p224CopyConditional(z3, z1, z2IsZero) +-} +- +-// p224DoubleJacobian computes *out = a+a. +-func p224DoubleJacobian(x3, y3, z3, x1, y1, z1 *p224FieldElement) { +- var delta, gamma, beta, alpha, t p224FieldElement +- var c p224LargeFieldElement +- +- p224Square(&delta, z1, &c) +- p224Square(&gamma, y1, &c) +- p224Mul(&beta, x1, &gamma, &c) +- +- // alpha = 3*(X1-delta)*(X1+delta) +- p224Add(&t, x1, &delta) +- for i := 0; i < 8; i++ { +- t[i] += t[i] << 1 +- } +- p224Reduce(&t) +- p224Sub(&alpha, x1, &delta) +- p224Reduce(&alpha) +- p224Mul(&alpha, &alpha, &t, &c) +- +- // Z3 = (Y1+Z1)²-gamma-delta +- p224Add(z3, y1, z1) +- p224Reduce(z3) +- p224Square(z3, z3, &c) +- p224Sub(z3, z3, &gamma) +- p224Reduce(z3) +- p224Sub(z3, z3, &delta) +- p224Reduce(z3) +- +- // X3 = alpha²-8*beta +- for i := 0; i < 8; i++ { +- delta[i] = beta[i] << 3 +- } +- p224Reduce(&delta) +- p224Square(x3, &alpha, &c) +- p224Sub(x3, x3, &delta) +- p224Reduce(x3) +- +- // Y3 = alpha*(4*beta-X3)-8*gamma² +- for i := 0; i < 8; i++ { +- beta[i] <<= 2 +- } +- p224Sub(&beta, &beta, x3) +- p224Reduce(&beta) +- p224Square(&gamma, &gamma, &c) +- for i := 0; i < 8; i++ { +- gamma[i] <<= 3 +- } +- p224Reduce(&gamma) +- p224Mul(y3, &alpha, &beta, &c) +- p224Sub(y3, y3, &gamma) +- p224Reduce(y3) +-} +- +-// p224CopyConditional sets *out = *in iff the least-significant-bit of control +-// is true, and it runs in constant time. +-func p224CopyConditional(out, in *p224FieldElement, control uint32) { +- control <<= 31 +- control = uint32(int32(control) >> 31) +- +- for i := 0; i < 8; i++ { +- out[i] ^= (out[i] ^ in[i]) & control +- } +-} +- +-func p224ScalarMult(outX, outY, outZ, inX, inY, inZ *p224FieldElement, scalar []byte) { +- var xx, yy, zz p224FieldElement +- for i := 0; i < 8; i++ { +- outX[i] = 0 +- outY[i] = 0 +- outZ[i] = 0 +- } +- +- for _, byte := range scalar { +- for bitNum := uint(0); bitNum < 8; bitNum++ { +- p224DoubleJacobian(outX, outY, outZ, outX, outY, outZ) +- bit := uint32((byte >> (7 - bitNum)) & 1) +- p224AddJacobian(&xx, &yy, &zz, inX, inY, inZ, outX, outY, outZ) +- p224CopyConditional(outX, &xx, bit) +- p224CopyConditional(outY, &yy, bit) +- p224CopyConditional(outZ, &zz, bit) +- } +- } +-} +- +-// p224ToAffine converts from Jacobian to affine form. +-func p224ToAffine(x, y, z *p224FieldElement) (*big.Int, *big.Int) { +- var zinv, zinvsq, outx, outy p224FieldElement +- var tmp p224LargeFieldElement +- +- if isPointAtInfinity := p224IsZero(z); isPointAtInfinity == 1 { +- return new(big.Int), new(big.Int) +- } +- +- p224Invert(&zinv, z) +- p224Square(&zinvsq, &zinv, &tmp) +- p224Mul(x, x, &zinvsq, &tmp) +- p224Mul(&zinvsq, &zinvsq, &zinv, &tmp) +- p224Mul(y, y, &zinvsq, &tmp) +- +- p224Contract(&outx, x) +- p224Contract(&outy, y) +- return p224ToBig(&outx), p224ToBig(&outy) +-} +- +-// get28BitsFromEnd returns the least-significant 28 bits from buf>>shift, +-// where buf is interpreted as a big-endian number. +-func get28BitsFromEnd(buf []byte, shift uint) (uint32, []byte) { +- var ret uint32 +- +- for i := uint(0); i < 4; i++ { +- var b byte +- if l := len(buf); l > 0 { +- b = buf[l-1] +- // We don't remove the byte if we're about to return and we're not +- // reading all of it. +- if i != 3 || shift == 4 { +- buf = buf[:l-1] +- } +- } +- ret |= uint32(b) << (8 * i) >> shift +- } +- ret &= bottom28Bits +- return ret, buf +-} +- +-// p224FromBig sets *out = *in. +-func p224FromBig(out *p224FieldElement, in *big.Int) { +- bytes := in.Bytes() +- out[0], bytes = get28BitsFromEnd(bytes, 0) +- out[1], bytes = get28BitsFromEnd(bytes, 4) +- out[2], bytes = get28BitsFromEnd(bytes, 0) +- out[3], bytes = get28BitsFromEnd(bytes, 4) +- out[4], bytes = get28BitsFromEnd(bytes, 0) +- out[5], bytes = get28BitsFromEnd(bytes, 4) +- out[6], bytes = get28BitsFromEnd(bytes, 0) +- out[7], bytes = get28BitsFromEnd(bytes, 4) +-} +- +-// p224ToBig returns in as a big.Int. +-func p224ToBig(in *p224FieldElement) *big.Int { +- var buf [28]byte +- buf[27] = byte(in[0]) +- buf[26] = byte(in[0] >> 8) +- buf[25] = byte(in[0] >> 16) +- buf[24] = byte(((in[0] >> 24) & 0x0f) | (in[1]<<4)&0xf0) +- +- buf[23] = byte(in[1] >> 4) +- buf[22] = byte(in[1] >> 12) +- buf[21] = byte(in[1] >> 20) +- +- buf[20] = byte(in[2]) +- buf[19] = byte(in[2] >> 8) +- buf[18] = byte(in[2] >> 16) +- buf[17] = byte(((in[2] >> 24) & 0x0f) | (in[3]<<4)&0xf0) +- +- buf[16] = byte(in[3] >> 4) +- buf[15] = byte(in[3] >> 12) +- buf[14] = byte(in[3] >> 20) +- +- buf[13] = byte(in[4]) +- buf[12] = byte(in[4] >> 8) +- buf[11] = byte(in[4] >> 16) +- buf[10] = byte(((in[4] >> 24) & 0x0f) | (in[5]<<4)&0xf0) +- +- buf[9] = byte(in[5] >> 4) +- buf[8] = byte(in[5] >> 12) +- buf[7] = byte(in[5] >> 20) +- +- buf[6] = byte(in[6]) +- buf[5] = byte(in[6] >> 8) +- buf[4] = byte(in[6] >> 16) +- buf[3] = byte(((in[6] >> 24) & 0x0f) | (in[7]<<4)&0xf0) +- +- buf[2] = byte(in[7] >> 4) +- buf[1] = byte(in[7] >> 12) +- buf[0] = byte(in[7] >> 20) +- +- return new(big.Int).SetBytes(buf[:]) +-} +--- libgo/go/crypto/elliptic/p224_test.go.jj 2014-02-18 18:03:31.615598561 +0100 ++++ libgo/go/crypto/elliptic/p224_test.go 2014-02-15 11:40:56.191557928 +0100 +@@ -1,47 +0,0 @@ +-// Copyright 2012 The Go Authors. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. +- +-package elliptic +- +-import ( +- "math/big" +- "testing" +-) +- +-var toFromBigTests = []string{ +- "0", +- "1", +- "23", +- "b70e0cb46bb4bf7f321390b94a03c1d356c01122343280d6105c1d21", +- "706a46d476dcb76798e6046d89474788d164c18032d268fd10704fa6", +-} +- +-func p224AlternativeToBig(in *p224FieldElement) *big.Int { +- ret := new(big.Int) +- tmp := new(big.Int) +- +- for i := uint(0); i < 8; i++ { +- tmp.SetInt64(int64(in[i])) +- tmp.Lsh(tmp, 28*i) +- ret.Add(ret, tmp) +- } +- ret.Mod(ret, p224.P) +- return ret +-} +- +-func TestToFromBig(t *testing.T) { +- for i, test := range toFromBigTests { +- n, _ := new(big.Int).SetString(test, 16) +- var x p224FieldElement +- p224FromBig(&x, n) +- m := p224ToBig(&x) +- if n.Cmp(m) != 0 { +- t.Errorf("#%d: %x != %x", i, n, m) +- } +- q := p224AlternativeToBig(&x) +- if n.Cmp(q) != 0 { +- t.Errorf("#%d: %x != %x (alternative)", i, n, m) +- } +- } +-} diff --git a/gcc48-libgomp-20160715.patch b/gcc48-libgomp-20160715.patch new file mode 100644 index 0000000..9b6a61e --- /dev/null +++ b/gcc48-libgomp-20160715.patch @@ -0,0 +1,10653 @@ +--- libgomp/config/linux/wait.h.jj 2013-01-31 20:29:10.091548989 +0100 ++++ libgomp/config/linux/wait.h 2016-07-13 16:57:18.902355979 +0200 +@@ -34,13 +34,13 @@ + + #define FUTEX_WAIT 0 + #define FUTEX_WAKE 1 +-#define FUTEX_PRIVATE_FLAG 128L ++#define FUTEX_PRIVATE_FLAG 128 + + #ifdef HAVE_ATTRIBUTE_VISIBILITY + # pragma GCC visibility push(hidden) + #endif + +-extern long int gomp_futex_wait, gomp_futex_wake; ++extern int gomp_futex_wait, gomp_futex_wake; + + #include + +@@ -48,7 +48,9 @@ static inline int do_spin (int *addr, in + { + unsigned long long i, count = gomp_spin_count_var; + +- if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0)) ++ if (__builtin_expect (__atomic_load_n (&gomp_managed_threads, ++ MEMMODEL_RELAXED) ++ > gomp_available_cpus, 0)) + count = gomp_throttled_spin_count_var; + for (i = 0; i < count; i++) + if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_RELAXED) != val, 0)) +--- libgomp/config/linux/affinity.c.jj 2014-05-15 10:56:37.499502573 +0200 ++++ libgomp/config/linux/affinity.c 2016-07-13 16:57:18.902355979 +0200 +@@ -352,6 +352,45 @@ gomp_affinity_print_place (void *p) + fprintf (stderr, ":%lu", len); + } + ++int ++omp_get_place_num_procs (int place_num) ++{ ++ if (place_num < 0 || place_num >= gomp_places_list_len) ++ return 0; ++ ++ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; ++ return gomp_cpuset_popcount (gomp_cpuset_size, cpusetp); ++} ++ ++void ++omp_get_place_proc_ids (int place_num, int *ids) ++{ ++ if (place_num < 0 || place_num >= gomp_places_list_len) ++ return; ++ ++ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; ++ unsigned long i, max = 8 * gomp_cpuset_size; ++ for (i = 0; i < max; i++) ++ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp)) ++ *ids++ = i; ++} ++ ++void ++gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) ++{ ++ if (place_num < 0 || place_num >= gomp_places_list_len) ++ return; ++ ++ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; ++ unsigned long i, max = 8 * gomp_cpuset_size; ++ for (i = 0; i < max; i++) ++ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp)) ++ *ids++ = i; ++} ++ ++ialias(omp_get_place_num_procs) ++ialias(omp_get_place_proc_ids) ++ + #else + + #include "../posix/affinity.c" +--- libgomp/config/linux/mutex.c.jj 2013-01-21 16:00:38.220917670 +0100 ++++ libgomp/config/linux/mutex.c 2016-07-13 16:57:18.870356375 +0200 +@@ -28,8 +28,8 @@ + + #include "wait.h" + +-long int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; +-long int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; ++int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; ++int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; + + void + gomp_mutex_lock_slow (gomp_mutex_t *mutex, int oldval) +--- libgomp/config/posix/affinity.c.jj 2014-05-15 10:56:37.987498844 +0200 ++++ libgomp/config/posix/affinity.c 2016-07-15 12:08:28.410015743 +0200 +@@ -113,3 +113,27 @@ gomp_affinity_print_place (void *p) + { + (void) p; + } ++ ++int ++omp_get_place_num_procs (int place_num) ++{ ++ (void) place_num; ++ return 0; ++} ++ ++void ++omp_get_place_proc_ids (int place_num, int *ids) ++{ ++ (void) place_num; ++ (void) ids; ++} ++ ++void ++gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) ++{ ++ (void) place_num; ++ (void) ids; ++} ++ ++ialias(omp_get_place_num_procs) ++ialias(omp_get_place_proc_ids) +--- libgomp/loop_ull.c.jj 2013-01-21 16:00:46.477871806 +0100 ++++ libgomp/loop_ull.c 2016-07-13 16:57:18.918355780 +0200 +@@ -174,15 +174,15 @@ GOMP_loop_ull_runtime_start (bool up, go + { + case GFS_STATIC: + return gomp_loop_ull_static_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ull_dynamic_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ull_guided_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -278,15 +278,15 @@ GOMP_loop_ull_ordered_runtime_start (boo + { + case GFS_STATIC: + return gomp_loop_ull_ordered_static_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ull_ordered_guided_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -298,6 +298,114 @@ GOMP_loop_ull_ordered_runtime_start (boo + } + } + ++/* The *_doacross_*_start routines are similar. The only difference is that ++ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS ++ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 ++ and other COUNTS array elements tell the library number of iterations ++ in the ordered inner loops. */ ++ ++static bool ++gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ ++ thr->ts.static_trip = 0; ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, ++ GFS_STATIC, chunk_size); ++ gomp_doacross_ull_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++ return !gomp_iter_ull_static_next (istart, iend); ++} ++ ++static bool ++gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, ++ GFS_DYNAMIC, chunk_size); ++ gomp_doacross_ull_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#if defined HAVE_SYNC_BUILTINS && defined __LP64__ ++ ret = gomp_iter_ull_dynamic_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_ull_dynamic_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++static bool ++gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, ++ GFS_GUIDED, chunk_size); ++ gomp_doacross_ull_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#if defined HAVE_SYNC_BUILTINS && defined __LP64__ ++ ret = gomp_iter_ull_guided_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_ull_guided_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++bool ++GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull *istart, gomp_ull *iend) ++{ ++ struct gomp_task_icv *icv = gomp_icv (false); ++ switch (icv->run_sched_var) ++ { ++ case GFS_STATIC: ++ return gomp_loop_ull_doacross_static_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_DYNAMIC: ++ return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_GUIDED: ++ return gomp_loop_ull_doacross_guided_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_AUTO: ++ /* For now map to schedule(static), later on we could play with feedback ++ driven choice. */ ++ return gomp_loop_ull_doacross_static_start (ncounts, counts, ++ 0, istart, iend); ++ default: ++ abort (); ++ } ++} ++ + /* The *_next routines are called when the thread completes processing of + the iteration block currently assigned to it. If the work-share + construct is bound directly to a parallel construct, then the iteration +@@ -457,6 +565,10 @@ extern __typeof(gomp_loop_ull_dynamic_st + __attribute__((alias ("gomp_loop_ull_dynamic_start"))); + extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start + __attribute__((alias ("gomp_loop_ull_guided_start"))); ++extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start ++ __attribute__((alias ("gomp_loop_ull_dynamic_start"))); ++extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start ++ __attribute__((alias ("gomp_loop_ull_guided_start"))); + + extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start + __attribute__((alias ("gomp_loop_ull_ordered_static_start"))); +@@ -465,12 +577,23 @@ extern __typeof(gomp_loop_ull_ordered_dy + extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start + __attribute__((alias ("gomp_loop_ull_ordered_guided_start"))); + ++extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start ++ __attribute__((alias ("gomp_loop_ull_doacross_static_start"))); ++extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start ++ __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start"))); ++extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start ++ __attribute__((alias ("gomp_loop_ull_doacross_guided_start"))); ++ + extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next + __attribute__((alias ("gomp_loop_ull_static_next"))); + extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next + __attribute__((alias ("gomp_loop_ull_dynamic_next"))); + extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next + __attribute__((alias ("gomp_loop_ull_guided_next"))); ++extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next ++ __attribute__((alias ("gomp_loop_ull_dynamic_next"))); ++extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next ++ __attribute__((alias ("gomp_loop_ull_guided_next"))); + + extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next + __attribute__((alias ("gomp_loop_ull_ordered_static_next"))); +@@ -507,6 +630,25 @@ GOMP_loop_ull_guided_start (bool up, gom + } + + bool ++GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start, ++ gomp_ull end, gomp_ull incr, ++ gomp_ull chunk_size, ++ gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart, ++ iend); ++} ++ ++bool ++GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end, ++ gomp_ull incr, gomp_ull chunk_size, ++ gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart, ++ iend); ++} ++ ++bool + GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +@@ -534,6 +676,33 @@ GOMP_loop_ull_ordered_guided_start (bool + } + + bool ++GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool + GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) + { + return gomp_loop_ull_static_next (istart, iend); +@@ -550,6 +719,18 @@ GOMP_loop_ull_guided_next (gomp_ull *ist + { + return gomp_loop_ull_guided_next (istart, iend); + } ++ ++bool ++GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_dynamic_next (istart, iend); ++} ++ ++bool ++GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_guided_next (istart, iend); ++} + + bool + GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) +--- libgomp/team.c.jj 2014-05-15 10:56:32.092524669 +0200 ++++ libgomp/team.c 2016-07-13 17:58:01.907291111 +0200 +@@ -133,6 +133,25 @@ gomp_thread_start (void *xdata) + return NULL; + } + ++static inline struct gomp_team * ++get_last_team (unsigned nthreads) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->ts.team == NULL) ++ { ++ struct gomp_thread_pool *pool = thr->thread_pool; ++ if (pool != NULL) ++ { ++ struct gomp_team *last_team = pool->last_team; ++ if (last_team != NULL && last_team->nthreads == nthreads) ++ { ++ pool->last_team = NULL; ++ return last_team; ++ } ++ } ++ } ++ return NULL; ++} + + /* Create a new team data structure. */ + +@@ -140,18 +159,27 @@ struct gomp_team * + gomp_new_team (unsigned nthreads) + { + struct gomp_team *team; +- size_t size; + int i; + +- size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0]) +- + sizeof (team->implicit_task[0])); +- team = gomp_malloc (size); ++ team = get_last_team (nthreads); ++ if (team == NULL) ++ { ++ size_t extra = sizeof (team->ordered_release[0]) ++ + sizeof (team->implicit_task[0]); ++ team = gomp_malloc (sizeof (*team) + nthreads * extra); ++ ++#ifndef HAVE_SYNC_BUILTINS ++ gomp_mutex_init (&team->work_share_list_free_lock); ++#endif ++ gomp_barrier_init (&team->barrier, nthreads); ++ gomp_mutex_init (&team->task_lock); ++ ++ team->nthreads = nthreads; ++ } + + team->work_share_chunk = 8; + #ifdef HAVE_SYNC_BUILTINS + team->single_count = 0; +-#else +- gomp_mutex_init (&team->work_share_list_free_lock); + #endif + team->work_shares_to_free = &team->work_shares[0]; + gomp_init_work_share (&team->work_shares[0], false, nthreads); +@@ -162,15 +190,11 @@ gomp_new_team (unsigned nthreads) + team->work_shares[i].next_free = &team->work_shares[i + 1]; + team->work_shares[i].next_free = NULL; + +- team->nthreads = nthreads; +- gomp_barrier_init (&team->barrier, nthreads); +- + gomp_sem_init (&team->master_release, 0); + team->ordered_release = (void *) &team->implicit_task[nthreads]; + team->ordered_release[0] = &team->master_release; + +- gomp_mutex_init (&team->task_lock); +- team->task_queue = NULL; ++ priority_queue_init (&team->task_queue); + team->task_count = 0; + team->task_queued_count = 0; + team->task_running_count = 0; +@@ -186,8 +210,12 @@ gomp_new_team (unsigned nthreads) + static void + free_team (struct gomp_team *team) + { ++#ifndef HAVE_SYNC_BUILTINS ++ gomp_mutex_destroy (&team->work_share_list_free_lock); ++#endif + gomp_barrier_destroy (&team->barrier); + gomp_mutex_destroy (&team->task_lock); ++ priority_queue_free (&team->task_queue); + free (team); + } + +@@ -258,6 +286,8 @@ gomp_free_thread (void *arg __attribute_ + free (pool); + thr->thread_pool = NULL; + } ++ if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0)) ++ gomp_team_end (); + if (thr->task != NULL) + { + struct gomp_task *task = thr->task; +@@ -287,7 +317,7 @@ gomp_team_start (void (*fn) (void *), vo + struct gomp_thread **affinity_thr = NULL; + + thr = gomp_thread (); +- nested = thr->ts.team != NULL; ++ nested = thr->ts.level; + if (__builtin_expect (thr->thread_pool == NULL, 0)) + { + thr->thread_pool = gomp_new_thread_pool (); +@@ -894,9 +924,6 @@ gomp_team_end (void) + while (ws != NULL); + } + gomp_sem_destroy (&team->master_release); +-#ifndef HAVE_SYNC_BUILTINS +- gomp_mutex_destroy (&team->work_share_list_free_lock); +-#endif + + if (__builtin_expect (thr->ts.team != NULL, 0) + || __builtin_expect (team->nthreads == 1, 0)) +--- libgomp/target.c.jj 2014-05-15 10:56:38.313498020 +0200 ++++ libgomp/target.c 2016-07-15 16:58:29.249328861 +0200 +@@ -22,14 +22,22 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +-/* This file handles the maintainence of threads in response to team +- creation and termination. */ ++/* This file contains the support of offloading. */ + ++#include "config.h" + #include "libgomp.h" ++#include "oacc-plugin.h" ++#include "oacc-int.h" ++#include "gomp-constants.h" + #include + #include + #include ++#ifdef HAVE_INTTYPES_H ++# include /* For PRIu64. */ ++#endif + #include ++#include ++#include + + attribute_hidden int + gomp_get_num_devices (void) +@@ -37,22 +45,87 @@ gomp_get_num_devices (void) + return 0; + } + +-/* Called when encountering a target directive. If DEVICE +- is -1, it means use device-var ICV. If it is -2 (or any other value +- larger than last available hw device, use host fallback. +- FN is address of host code, OPENMP_TARGET contains value of the +- __OPENMP_TARGET__ symbol in the shared library or binary that invokes +- GOMP_target. HOSTADDRS, SIZES and KINDS are arrays +- with MAPNUM entries, with addresses of the host objects, +- sizes of the host objects (resp. for pointer kind pointer bias +- and assumed sizeof (void *) size) and kinds. */ ++/* This function should be called from every offload image while loading. ++ It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of ++ the target, and TARGET_DATA needed by target plugin. */ + + void +-GOMP_target (int device, void (*fn) (void *), const void *openmp_target, +- size_t mapnum, void **hostaddrs, size_t *sizes, +- unsigned char *kinds) ++GOMP_offload_register_ver (unsigned version, const void *host_table, ++ int target_type, const void *target_data) ++{ ++ (void) version; ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++void ++GOMP_offload_register (const void *host_table, int target_type, ++ const void *target_data) ++{ ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++/* This function should be called from every offload image while unloading. ++ It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of ++ the target, and TARGET_DATA needed by target plugin. */ ++ ++void ++GOMP_offload_unregister_ver (unsigned version, const void *host_table, ++ int target_type, const void *target_data) ++{ ++ (void) version; ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++void ++GOMP_offload_unregister (const void *host_table, int target_type, ++ const void *target_data) ++{ ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++/* This function initializes the target device, specified by DEVICEP. DEVICEP ++ must be locked on entry, and remains locked on return. */ ++ ++attribute_hidden void ++gomp_init_device (struct gomp_device_descr *devicep) ++{ ++ devicep->state = GOMP_DEVICE_INITIALIZED; ++} ++ ++attribute_hidden void ++gomp_unload_device (struct gomp_device_descr *devicep) ++{ ++} ++ ++/* Free address mapping tables. MM must be locked on entry, and remains locked ++ on return. */ ++ ++attribute_hidden void ++gomp_free_memmap (struct splay_tree_s *mem_map) ++{ ++ while (mem_map->root) ++ { ++ struct target_mem_desc *tgt = mem_map->root->key.tgt; ++ ++ splay_tree_remove (mem_map, &mem_map->root->key); ++ free (tgt->array); ++ free (tgt); ++ } ++} ++ ++/* Host fallback for GOMP_target{,_ext} routines. */ ++ ++static void ++gomp_target_fallback (void (*fn) (void *), void **hostaddrs) + { +- /* Host fallback. */ + struct gomp_thread old_thr, *thr = gomp_thread (); + old_thr = *thr; + memset (thr, '\0', sizeof (*thr)); +@@ -66,10 +139,167 @@ GOMP_target (int device, void (*fn) (voi + *thr = old_thr; + } + ++/* Calculate alignment and size requirements of a private copy of data shared ++ as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ ++ ++static inline void ++calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, ++ unsigned short *kinds, size_t *tgt_align, ++ size_t *tgt_size) ++{ ++ size_t i; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ if (*tgt_align < align) ++ *tgt_align = align; ++ *tgt_size = (*tgt_size + align - 1) & ~(align - 1); ++ *tgt_size += sizes[i]; ++ } ++} ++ ++/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ ++ ++static inline void ++copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, size_t tgt_align, ++ size_t tgt_size) ++{ ++ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); ++ if (al) ++ tgt += tgt_align - al; ++ tgt_size = 0; ++ size_t i; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ tgt_size = (tgt_size + align - 1) & ~(align - 1); ++ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); ++ hostaddrs[i] = tgt + tgt_size; ++ tgt_size = tgt_size + sizes[i]; ++ } ++} ++ ++/* Called when encountering a target directive. If DEVICE ++ is GOMP_DEVICE_ICV, it means use device-var ICV. If it is ++ GOMP_DEVICE_HOST_FALLBACK (or any value ++ larger than last available hw device), use host fallback. ++ FN is address of host code, UNUSED is part of the current ABI, but ++ we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays ++ with MAPNUM entries, with addresses of the host objects, ++ sizes of the host objects (resp. for pointer kind pointer bias ++ and assumed sizeof (void *) size) and kinds. */ ++ ++void ++GOMP_target (int device, void (*fn) (void *), const void *unused, ++ size_t mapnum, void **hostaddrs, size_t *sizes, ++ unsigned char *kinds) ++{ ++ return gomp_target_fallback (fn, hostaddrs); ++} ++ ++/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, ++ and several arguments have been added: ++ FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. ++ DEPEND is array of dependencies, see GOMP_task for details. ++ ++ ARGS is a pointer to an array consisting of a variable number of both ++ device-independent and device-specific arguments, which can take one two ++ elements where the first specifies for which device it is intended, the type ++ and optionally also the value. If the value is not present in the first ++ one, the whole second element the actual value. The last element of the ++ array is a single NULL. Among the device independent can be for example ++ NUM_TEAMS and THREAD_LIMIT. ++ ++ NUM_TEAMS is positive if GOMP_teams will be called in the body with ++ that value, or 1 if teams construct is not present, or 0, if ++ teams construct does not have num_teams clause and so the choice is ++ implementation defined, and -1 if it can't be determined on the host ++ what value will GOMP_teams have on the device. ++ THREAD_LIMIT similarly is positive if GOMP_teams will be called in the ++ body with that value, or 0, if teams construct does not have thread_limit ++ clause or the teams construct is not present, or -1 if it can't be ++ determined on the host what value will GOMP_teams have on the device. */ ++ ++void ++GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend, void **args) ++{ ++ size_t tgt_align = 0, tgt_size = 0; ++ bool fpc_done = false; ++ ++ if (flags & GOMP_TARGET_FLAG_NOWAIT) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->ts.team ++ && !thr->task->final_task) ++ { ++ gomp_create_target_task (NULL, fn, mapnum, hostaddrs, ++ sizes, kinds, flags, depend, args, ++ GOMP_TARGET_TASK_BEFORE_MAP); ++ return; ++ } ++ } ++ ++ /* If there are depend clauses, but nowait is not present ++ (or we are in a final task), block the parent task until the ++ dependencies are resolved and then just continue with the rest ++ of the function as if it is a merged task. */ ++ if (depend != NULL) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->task && thr->task->depend_hash) ++ { ++ /* If we might need to wait, copy firstprivate now. */ ++ calculate_firstprivate_requirements (mapnum, sizes, kinds, ++ &tgt_align, &tgt_size); ++ if (tgt_align) ++ { ++ char *tgt = gomp_alloca (tgt_size + tgt_align - 1); ++ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, ++ tgt_align, tgt_size); ++ } ++ fpc_done = true; ++ gomp_task_maybe_wait_for_dependencies (depend); ++ } ++ } ++ ++ if (!fpc_done) ++ { ++ calculate_firstprivate_requirements (mapnum, sizes, kinds, ++ &tgt_align, &tgt_size); ++ if (tgt_align) ++ { ++ char *tgt = gomp_alloca (tgt_size + tgt_align - 1); ++ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, ++ tgt_align, tgt_size); ++ } ++ } ++ gomp_target_fallback (fn, hostaddrs); ++} ++ ++/* Host fallback for GOMP_target_data{,_ext} routines. */ ++ ++static void ++gomp_target_data_fallback (void) ++{ ++} ++ + void +-GOMP_target_data (int device, const void *openmp_target, size_t mapnum, ++GOMP_target_data (int device, const void *unused, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) + { ++ return gomp_target_data_fallback (); ++} ++ ++void ++GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds) ++{ ++ return gomp_target_data_fallback (); + } + + void +@@ -78,12 +308,112 @@ GOMP_target_end_data (void) + } + + void +-GOMP_target_update (int device, const void *openmp_target, size_t mapnum, ++GOMP_target_update (int device, const void *unused, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) + { + } + + void ++GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend) ++{ ++ /* If there are depend clauses, but nowait is not present, ++ block the parent task until the dependencies are resolved ++ and then just continue with the rest of the function as if it ++ is a merged task. Until we are able to schedule task during ++ variable mapping or unmapping, ignore nowait if depend clauses ++ are not present. */ ++ if (depend != NULL) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->task && thr->task->depend_hash) ++ { ++ if ((flags & GOMP_TARGET_FLAG_NOWAIT) ++ && thr->ts.team ++ && !thr->task->final_task) ++ { ++ if (gomp_create_target_task (NULL, (void (*) (void *)) NULL, ++ mapnum, hostaddrs, sizes, kinds, ++ flags | GOMP_TARGET_FLAG_UPDATE, ++ depend, NULL, GOMP_TARGET_TASK_DATA)) ++ return; ++ } ++ else ++ { ++ struct gomp_team *team = thr->ts.team; ++ /* If parallel or taskgroup has been cancelled, don't start new ++ tasks. */ ++ if (team ++ && (gomp_team_barrier_cancelled (&team->barrier) ++ || (thr->task->taskgroup ++ && thr->task->taskgroup->cancelled))) ++ return; ++ ++ gomp_task_maybe_wait_for_dependencies (depend); ++ } ++ } ++ } ++} ++ ++void ++GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend) ++{ ++ /* If there are depend clauses, but nowait is not present, ++ block the parent task until the dependencies are resolved ++ and then just continue with the rest of the function as if it ++ is a merged task. Until we are able to schedule task during ++ variable mapping or unmapping, ignore nowait if depend clauses ++ are not present. */ ++ if (depend != NULL) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->task && thr->task->depend_hash) ++ { ++ if ((flags & GOMP_TARGET_FLAG_NOWAIT) ++ && thr->ts.team ++ && !thr->task->final_task) ++ { ++ if (gomp_create_target_task (NULL, (void (*) (void *)) NULL, ++ mapnum, hostaddrs, sizes, kinds, ++ flags, depend, NULL, ++ GOMP_TARGET_TASK_DATA)) ++ return; ++ } ++ else ++ { ++ struct gomp_team *team = thr->ts.team; ++ /* If parallel or taskgroup has been cancelled, don't start new ++ tasks. */ ++ if (team ++ && (gomp_team_barrier_cancelled (&team->barrier) ++ || (thr->task->taskgroup ++ && thr->task->taskgroup->cancelled))) ++ return; ++ ++ gomp_task_maybe_wait_for_dependencies (depend); ++ } ++ } ++ } ++} ++ ++bool ++gomp_target_task_fn (void *data) ++{ ++ struct gomp_target_task *ttask = (struct gomp_target_task *) data; ++ ++ if (ttask->fn != NULL) ++ { ++ ttask->state = GOMP_TARGET_TASK_FALLBACK; ++ gomp_target_fallback (ttask->fn, ttask->hostaddrs); ++ return false; ++ } ++ return false; ++} ++ ++void + GOMP_teams (unsigned int num_teams, unsigned int thread_limit) + { + if (thread_limit) +@@ -94,3 +424,153 @@ GOMP_teams (unsigned int num_teams, unsi + } + (void) num_teams; + } ++ ++void * ++omp_target_alloc (size_t size, int device_num) ++{ ++ if (device_num == GOMP_DEVICE_HOST_FALLBACK) ++ return malloc (size); ++ ++ return NULL; ++} ++ ++void ++omp_target_free (void *device_ptr, int device_num) ++{ ++ if (device_ptr == NULL) ++ return; ++ ++ if (device_num == GOMP_DEVICE_HOST_FALLBACK) ++ { ++ free (device_ptr); ++ return; ++ } ++} ++ ++int ++omp_target_is_present (void *ptr, int device_num) ++{ ++ if (ptr == NULL) ++ return 1; ++ ++ if (device_num == GOMP_DEVICE_HOST_FALLBACK) ++ return 1; ++ ++ return 0; ++} ++ ++int ++omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, ++ size_t src_offset, int dst_device_num, int src_device_num) ++{ ++ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); ++ return 0; ++} ++ ++#define HALF_SIZE_T (((size_t) 1) << (8 * sizeof (size_t) / 2)) ++ ++#define __builtin_mul_overflow(x, y, z) \ ++ ({ bool retval = false; \ ++ size_t xval = (x); \ ++ size_t yval = (y); \ ++ size_t zval = xval * yval; \ ++ if (__builtin_expect ((xval | yval) >= HALF_SIZE_T, 0)) \ ++ { \ ++ if (xval && zval / xval != yval) \ ++ retval = true; \ ++ } \ ++ *(z) = zval; \ ++ retval; }) ++ ++static int ++omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, ++ int num_dims, const size_t *volume, ++ const size_t *dst_offsets, ++ const size_t *src_offsets, ++ const size_t *dst_dimensions, ++ const size_t *src_dimensions) ++{ ++ size_t dst_slice = element_size; ++ size_t src_slice = element_size; ++ size_t j, dst_off, src_off, length; ++ int i, ret; ++ ++ ++ if (num_dims == 1) ++ { ++ if (__builtin_mul_overflow (element_size, volume[0], &length) ++ || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) ++ || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) ++ return EINVAL; ++ memcpy ((char *) dst + dst_off, (char *) src + src_off, length); ++ ret = 1; ++ return ret ? 0 : EINVAL; ++ } ++ ++ /* FIXME: it would be nice to have some plugin function to handle ++ num_dims == 2 and num_dims == 3 more efficiently. Larger ones can ++ be handled in the generic recursion below, and for host-host it ++ should be used even for any num_dims >= 2. */ ++ ++ for (i = 1; i < num_dims; i++) ++ if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) ++ || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) ++ return EINVAL; ++ if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) ++ || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) ++ return EINVAL; ++ for (j = 0; j < volume[0]; j++) ++ { ++ ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, ++ (char *) src + src_off, ++ element_size, num_dims - 1, ++ volume + 1, dst_offsets + 1, ++ src_offsets + 1, dst_dimensions + 1, ++ src_dimensions + 1); ++ if (ret) ++ return ret; ++ dst_off += dst_slice; ++ src_off += src_slice; ++ } ++ return 0; ++} ++ ++int ++omp_target_memcpy_rect (void *dst, void *src, size_t element_size, ++ int num_dims, const size_t *volume, ++ const size_t *dst_offsets, ++ const size_t *src_offsets, ++ const size_t *dst_dimensions, ++ const size_t *src_dimensions, ++ int dst_device_num, int src_device_num) ++{ ++ if (!dst && !src) ++ return INT_MAX; ++ ++ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ ++ int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, ++ volume, dst_offsets, src_offsets, ++ dst_dimensions, src_dimensions); ++ return ret; ++} ++ ++int ++omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, ++ size_t device_offset, int device_num) ++{ ++ return EINVAL; ++} ++ ++int ++omp_target_disassociate_ptr (void *ptr, int device_num) ++{ ++ return EINVAL; ++} +--- libgomp/fortran.c.jj 2014-05-15 10:56:31.593531223 +0200 ++++ libgomp/fortran.c 2016-07-13 16:57:04.432535397 +0200 +@@ -67,12 +67,20 @@ ialias_redirect (omp_get_active_level) + ialias_redirect (omp_in_final) + ialias_redirect (omp_get_cancellation) + ialias_redirect (omp_get_proc_bind) ++ialias_redirect (omp_get_num_places) ++ialias_redirect (omp_get_place_num_procs) ++ialias_redirect (omp_get_place_proc_ids) ++ialias_redirect (omp_get_place_num) ++ialias_redirect (omp_get_partition_num_places) ++ialias_redirect (omp_get_partition_place_nums) + ialias_redirect (omp_set_default_device) + ialias_redirect (omp_get_default_device) + ialias_redirect (omp_get_num_devices) + ialias_redirect (omp_get_num_teams) + ialias_redirect (omp_get_team_num) + ialias_redirect (omp_is_initial_device) ++ialias_redirect (omp_get_initial_device) ++ialias_redirect (omp_get_max_task_priority) + #endif + + #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING +@@ -342,35 +350,35 @@ omp_get_wtime_ (void) + } + + void +-omp_set_schedule_ (const int32_t *kind, const int32_t *modifier) ++omp_set_schedule_ (const int32_t *kind, const int32_t *chunk_size) + { +- omp_set_schedule (*kind, *modifier); ++ omp_set_schedule (*kind, *chunk_size); + } + + void +-omp_set_schedule_8_ (const int32_t *kind, const int64_t *modifier) ++omp_set_schedule_8_ (const int32_t *kind, const int64_t *chunk_size) + { +- omp_set_schedule (*kind, TO_INT (*modifier)); ++ omp_set_schedule (*kind, TO_INT (*chunk_size)); + } + + void +-omp_get_schedule_ (int32_t *kind, int32_t *modifier) ++omp_get_schedule_ (int32_t *kind, int32_t *chunk_size) + { + omp_sched_t k; +- int m; +- omp_get_schedule (&k, &m); ++ int cs; ++ omp_get_schedule (&k, &cs); + *kind = k; +- *modifier = m; ++ *chunk_size = cs; + } + + void +-omp_get_schedule_8_ (int32_t *kind, int64_t *modifier) ++omp_get_schedule_8_ (int32_t *kind, int64_t *chunk_size) + { + omp_sched_t k; +- int m; +- omp_get_schedule (&k, &m); ++ int cs; ++ omp_get_schedule (&k, &cs); + *kind = k; +- *modifier = m; ++ *chunk_size = cs; + } + + int32_t +@@ -451,6 +459,69 @@ omp_get_proc_bind_ (void) + return omp_get_proc_bind (); + } + ++int32_t ++omp_get_num_places_ (void) ++{ ++ return omp_get_num_places (); ++} ++ ++int32_t ++omp_get_place_num_procs_ (const int32_t *place_num) ++{ ++ return omp_get_place_num_procs (*place_num); ++} ++ ++int32_t ++omp_get_place_num_procs_8_ (const int64_t *place_num) ++{ ++ return omp_get_place_num_procs (TO_INT (*place_num)); ++} ++ ++void ++omp_get_place_proc_ids_ (const int32_t *place_num, int32_t *ids) ++{ ++ omp_get_place_proc_ids (*place_num, (int *) ids); ++} ++ ++void ++omp_get_place_proc_ids_8_ (const int64_t *place_num, int64_t *ids) ++{ ++ gomp_get_place_proc_ids_8 (TO_INT (*place_num), ids); ++} ++ ++int32_t ++omp_get_place_num_ (void) ++{ ++ return omp_get_place_num (); ++} ++ ++int32_t ++omp_get_partition_num_places_ (void) ++{ ++ return omp_get_partition_num_places (); ++} ++ ++void ++omp_get_partition_place_nums_ (int32_t *place_nums) ++{ ++ omp_get_partition_place_nums ((int *) place_nums); ++} ++ ++void ++omp_get_partition_place_nums_8_ (int64_t *place_nums) ++{ ++ if (gomp_places_list == NULL) ++ return; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ unsigned int i; ++ for (i = 0; i < thr->ts.place_partition_len; i++) ++ *place_nums++ = (int64_t) thr->ts.place_partition_off + i; ++} ++ + void + omp_set_default_device_ (const int32_t *device_num) + { +@@ -492,3 +563,15 @@ omp_is_initial_device_ (void) + { + return omp_is_initial_device (); + } ++ ++int32_t ++omp_get_initial_device_ (void) ++{ ++ return omp_get_initial_device (); ++} ++ ++int32_t ++omp_get_max_task_priority_ (void) ++{ ++ return omp_get_max_task_priority (); ++} +--- libgomp/libgomp.map.jj 2014-05-15 10:56:31.927533549 +0200 ++++ libgomp/libgomp.map 2016-07-13 16:57:04.434535373 +0200 +@@ -134,6 +134,36 @@ OMP_4.0 { + omp_is_initial_device_; + } OMP_3.1; + ++OMP_4.5 { ++ global: ++ omp_get_max_task_priority; ++ omp_get_max_task_priority_; ++ omp_get_num_places; ++ omp_get_num_places_; ++ omp_get_place_num_procs; ++ omp_get_place_num_procs_; ++ omp_get_place_num_procs_8_; ++ omp_get_place_proc_ids; ++ omp_get_place_proc_ids_; ++ omp_get_place_proc_ids_8_; ++ omp_get_place_num; ++ omp_get_place_num_; ++ omp_get_partition_num_places; ++ omp_get_partition_num_places_; ++ omp_get_partition_place_nums; ++ omp_get_partition_place_nums_; ++ omp_get_partition_place_nums_8_; ++ omp_get_initial_device; ++ omp_get_initial_device_; ++ omp_target_alloc; ++ omp_target_free; ++ omp_target_is_present; ++ omp_target_memcpy; ++ omp_target_memcpy_rect; ++ omp_target_associate_ptr; ++ omp_target_disassociate_ptr; ++} OMP_4.0; ++ + GOMP_1.0 { + global: + GOMP_atomic_end; +@@ -227,3 +257,158 @@ GOMP_4.0 { + GOMP_target_update; + GOMP_teams; + } GOMP_3.0; ++ ++GOMP_4.0.1 { ++ global: ++ GOMP_offload_register; ++ GOMP_offload_unregister; ++} GOMP_4.0; ++ ++GOMP_4.5 { ++ global: ++ GOMP_target_ext; ++ GOMP_target_data_ext; ++ GOMP_target_update_ext; ++ GOMP_target_enter_exit_data; ++ GOMP_taskloop; ++ GOMP_taskloop_ull; ++ GOMP_offload_register_ver; ++ GOMP_offload_unregister_ver; ++ GOMP_loop_doacross_dynamic_start; ++ GOMP_loop_doacross_guided_start; ++ GOMP_loop_doacross_runtime_start; ++ GOMP_loop_doacross_static_start; ++ GOMP_doacross_post; ++ GOMP_doacross_wait; ++ GOMP_loop_ull_doacross_dynamic_start; ++ GOMP_loop_ull_doacross_guided_start; ++ GOMP_loop_ull_doacross_runtime_start; ++ GOMP_loop_ull_doacross_static_start; ++ GOMP_doacross_ull_post; ++ GOMP_doacross_ull_wait; ++ GOMP_loop_nonmonotonic_dynamic_next; ++ GOMP_loop_nonmonotonic_dynamic_start; ++ GOMP_loop_nonmonotonic_guided_next; ++ GOMP_loop_nonmonotonic_guided_start; ++ GOMP_loop_ull_nonmonotonic_dynamic_next; ++ GOMP_loop_ull_nonmonotonic_dynamic_start; ++ GOMP_loop_ull_nonmonotonic_guided_next; ++ GOMP_loop_ull_nonmonotonic_guided_start; ++ GOMP_parallel_loop_nonmonotonic_dynamic; ++ GOMP_parallel_loop_nonmonotonic_guided; ++} GOMP_4.0.1; ++ ++OACC_2.0 { ++ global: ++ acc_get_num_devices; ++ acc_get_num_devices_h_; ++ acc_set_device_type; ++ acc_set_device_type_h_; ++ acc_get_device_type; ++ acc_get_device_type_h_; ++ acc_set_device_num; ++ acc_set_device_num_h_; ++ acc_get_device_num; ++ acc_get_device_num_h_; ++ acc_async_test; ++ acc_async_test_h_; ++ acc_async_test_all; ++ acc_async_test_all_h_; ++ acc_wait; ++ acc_wait_h_; ++ acc_wait_async; ++ acc_wait_async_h_; ++ acc_wait_all; ++ acc_wait_all_h_; ++ acc_wait_all_async; ++ acc_wait_all_async_h_; ++ acc_init; ++ acc_init_h_; ++ acc_shutdown; ++ acc_shutdown_h_; ++ acc_on_device; ++ acc_on_device_h_; ++ acc_malloc; ++ acc_free; ++ acc_copyin; ++ acc_copyin_32_h_; ++ acc_copyin_64_h_; ++ acc_copyin_array_h_; ++ acc_present_or_copyin; ++ acc_present_or_copyin_32_h_; ++ acc_present_or_copyin_64_h_; ++ acc_present_or_copyin_array_h_; ++ acc_create; ++ acc_create_32_h_; ++ acc_create_64_h_; ++ acc_create_array_h_; ++ acc_present_or_create; ++ acc_present_or_create_32_h_; ++ acc_present_or_create_64_h_; ++ acc_present_or_create_array_h_; ++ acc_copyout; ++ acc_copyout_32_h_; ++ acc_copyout_64_h_; ++ acc_copyout_array_h_; ++ acc_delete; ++ acc_delete_32_h_; ++ acc_delete_64_h_; ++ acc_delete_array_h_; ++ acc_update_device; ++ acc_update_device_32_h_; ++ acc_update_device_64_h_; ++ acc_update_device_array_h_; ++ acc_update_self; ++ acc_update_self_32_h_; ++ acc_update_self_64_h_; ++ acc_update_self_array_h_; ++ acc_map_data; ++ acc_unmap_data; ++ acc_deviceptr; ++ acc_hostptr; ++ acc_is_present; ++ acc_is_present_32_h_; ++ acc_is_present_64_h_; ++ acc_is_present_array_h_; ++ acc_memcpy_to_device; ++ acc_memcpy_from_device; ++ acc_get_current_cuda_device; ++ acc_get_current_cuda_context; ++ acc_get_cuda_stream; ++ acc_set_cuda_stream; ++}; ++ ++GOACC_2.0 { ++ global: ++ GOACC_data_end; ++ GOACC_data_start; ++ GOACC_enter_exit_data; ++ GOACC_parallel; ++ GOACC_update; ++ GOACC_wait; ++ GOACC_get_thread_num; ++ GOACC_get_num_threads; ++}; ++ ++GOACC_2.0.1 { ++ global: ++ GOACC_declare; ++ GOACC_parallel_keyed; ++} GOACC_2.0; ++ ++GOMP_PLUGIN_1.0 { ++ global: ++ GOMP_PLUGIN_malloc; ++ GOMP_PLUGIN_malloc_cleared; ++ GOMP_PLUGIN_realloc; ++ GOMP_PLUGIN_debug; ++ GOMP_PLUGIN_error; ++ GOMP_PLUGIN_fatal; ++ GOMP_PLUGIN_async_unmap_vars; ++ GOMP_PLUGIN_acc_thread; ++}; ++ ++GOMP_PLUGIN_1.1 { ++ global: ++ GOMP_PLUGIN_target_task_completion; ++} GOMP_PLUGIN_1.0; +--- libgomp/ordered.c.jj 2013-01-21 16:00:46.137873657 +0100 ++++ libgomp/ordered.c 2016-07-13 16:57:18.918355780 +0200 +@@ -25,6 +25,9 @@ + /* This file handles the ORDERED construct. */ + + #include "libgomp.h" ++#include ++#include ++#include "doacross.h" + + + /* This function is called when first allocating an iteration block. That +@@ -249,3 +252,533 @@ void + GOMP_ordered_end (void) + { + } ++ ++/* DOACROSS initialization. */ ++ ++#define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__) ++ ++void ++gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ struct gomp_work_share *ws = thr->ts.work_share; ++ unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0; ++ unsigned long ent, num_ents, elt_sz, shift_sz; ++ struct gomp_doacross_work_share *doacross; ++ ++ if (team == NULL || team->nthreads == 1) ++ return; ++ ++ for (i = 0; i < ncounts; i++) ++ { ++ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ ++ if (counts[i] == 0) ++ return; ++ ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int this_bits; ++ if (counts[i] == 1) ++ this_bits = 1; ++ else ++ this_bits = __SIZEOF_LONG__ * __CHAR_BIT__ ++ - __builtin_clzl (counts[i] - 1); ++ if (num_bits + this_bits <= MAX_COLLAPSED_BITS) ++ { ++ bits[i] = this_bits; ++ num_bits += this_bits; ++ } ++ else ++ num_bits = MAX_COLLAPSED_BITS + 1; ++ } ++ } ++ ++ if (ws->sched == GFS_STATIC) ++ num_ents = team->nthreads; ++ else if (ws->sched == GFS_GUIDED) ++ num_ents = counts[0]; ++ else ++ num_ents = (counts[0] - 1) / chunk_size + 1; ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ elt_sz = sizeof (unsigned long); ++ shift_sz = ncounts * sizeof (unsigned int); ++ } ++ else ++ { ++ elt_sz = sizeof (unsigned long) * ncounts; ++ shift_sz = 0; ++ } ++ elt_sz = (elt_sz + 63) & ~63UL; ++ ++ doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz ++ + shift_sz); ++ doacross->chunk_size = chunk_size; ++ doacross->elt_sz = elt_sz; ++ doacross->ncounts = ncounts; ++ doacross->flattened = false; ++ doacross->array = (unsigned char *) ++ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) ++ & ~(uintptr_t) 63); ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int shift_count = 0; ++ doacross->flattened = true; ++ for (i = ncounts; i > 0; i--) ++ { ++ doacross->shift_counts[i - 1] = shift_count; ++ shift_count += bits[i - 1]; ++ } ++ for (ent = 0; ent < num_ents; ent++) ++ *(unsigned long *) (doacross->array + ent * elt_sz) = 0; ++ } ++ else ++ for (ent = 0; ent < num_ents; ent++) ++ memset (doacross->array + ent * elt_sz, '\0', ++ sizeof (unsigned long) * ncounts); ++ if (ws->sched == GFS_STATIC && chunk_size == 0) ++ { ++ unsigned long q = counts[0] / num_ents; ++ unsigned long t = counts[0] % num_ents; ++ doacross->boundary = t * (q + 1); ++ doacross->q = q; ++ doacross->t = t; ++ } ++ ws->doacross = doacross; ++} ++ ++/* DOACROSS POST operation. */ ++ ++void ++GOMP_doacross_post (long *counts) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ ent = thr->ts.team_id; ++ else if (ws->sched == GFS_GUIDED) ++ ent = counts[0]; ++ else ++ ent = counts[0] / doacross->chunk_size; ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long flattened ++ = (unsigned long) counts[0] << doacross->shift_counts[0]; ++ ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= (unsigned long) counts[i] ++ << doacross->shift_counts[i]; ++ flattened++; ++ if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE)) ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ else ++ __atomic_store_n (array, flattened, MEMMODEL_RELEASE); ++ return; ++ } ++ ++ __atomic_thread_fence (MEMMODEL_ACQUIRE); ++ for (i = doacross->ncounts; i-- > 0; ) ++ { ++ if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE); ++ } ++} ++ ++/* DOACROSS WAIT operation. */ ++ ++void ++GOMP_doacross_wait (long first, ...) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ va_list ap; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ { ++ if (ws->chunk_size == 0) ++ { ++ if (first < doacross->boundary) ++ ent = first / (doacross->q + 1); ++ else ++ ent = (first - doacross->boundary) / doacross->q ++ + doacross->t; ++ } ++ else ++ ent = first / ws->chunk_size % thr->ts.team->nthreads; ++ } ++ else if (ws->sched == GFS_GUIDED) ++ ent = first; ++ else ++ ent = first / doacross->chunk_size; ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long flattened ++ = (unsigned long) first << doacross->shift_counts[0]; ++ unsigned long cur; ++ ++ va_start (ap, first); ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= (unsigned long) va_arg (ap, long) ++ << doacross->shift_counts[i]; ++ cur = __atomic_load_n (array, MEMMODEL_ACQUIRE); ++ if (flattened < cur) ++ { ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ doacross_spin (array, flattened, cur); ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ ++ do ++ { ++ va_start (ap, first); ++ for (i = 0; i < doacross->ncounts; i++) ++ { ++ unsigned long thisv ++ = (unsigned long) (i ? va_arg (ap, long) : first) + 1; ++ unsigned long cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED); ++ if (thisv < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (thisv > cur) ++ break; ++ } ++ va_end (ap); ++ if (i == doacross->ncounts) ++ break; ++ cpu_relax (); ++ } ++ while (1); ++ __sync_synchronize (); ++} ++ ++typedef unsigned long long gomp_ull; ++ ++void ++gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ struct gomp_work_share *ws = thr->ts.work_share; ++ unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0; ++ unsigned long ent, num_ents, elt_sz, shift_sz; ++ struct gomp_doacross_work_share *doacross; ++ ++ if (team == NULL || team->nthreads == 1) ++ return; ++ ++ for (i = 0; i < ncounts; i++) ++ { ++ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ ++ if (counts[i] == 0) ++ return; ++ ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int this_bits; ++ if (counts[i] == 1) ++ this_bits = 1; ++ else ++ this_bits = __SIZEOF_LONG_LONG__ * __CHAR_BIT__ ++ - __builtin_clzll (counts[i] - 1); ++ if (num_bits + this_bits <= MAX_COLLAPSED_BITS) ++ { ++ bits[i] = this_bits; ++ num_bits += this_bits; ++ } ++ else ++ num_bits = MAX_COLLAPSED_BITS + 1; ++ } ++ } ++ ++ if (ws->sched == GFS_STATIC) ++ num_ents = team->nthreads; ++ else if (ws->sched == GFS_GUIDED) ++ num_ents = counts[0]; ++ else ++ num_ents = (counts[0] - 1) / chunk_size + 1; ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ elt_sz = sizeof (unsigned long); ++ shift_sz = ncounts * sizeof (unsigned int); ++ } ++ else ++ { ++ if (sizeof (gomp_ull) == sizeof (unsigned long)) ++ elt_sz = sizeof (gomp_ull) * ncounts; ++ else if (sizeof (gomp_ull) == 2 * sizeof (unsigned long)) ++ elt_sz = sizeof (unsigned long) * 2 * ncounts; ++ else ++ abort (); ++ shift_sz = 0; ++ } ++ elt_sz = (elt_sz + 63) & ~63UL; ++ ++ doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz ++ + shift_sz); ++ doacross->chunk_size_ull = chunk_size; ++ doacross->elt_sz = elt_sz; ++ doacross->ncounts = ncounts; ++ doacross->flattened = false; ++ doacross->boundary = 0; ++ doacross->array = (unsigned char *) ++ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) ++ & ~(uintptr_t) 63); ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int shift_count = 0; ++ doacross->flattened = true; ++ for (i = ncounts; i > 0; i--) ++ { ++ doacross->shift_counts[i - 1] = shift_count; ++ shift_count += bits[i - 1]; ++ } ++ for (ent = 0; ent < num_ents; ent++) ++ *(unsigned long *) (doacross->array + ent * elt_sz) = 0; ++ } ++ else ++ for (ent = 0; ent < num_ents; ent++) ++ memset (doacross->array + ent * elt_sz, '\0', ++ sizeof (unsigned long) * ncounts); ++ if (ws->sched == GFS_STATIC && chunk_size == 0) ++ { ++ gomp_ull q = counts[0] / num_ents; ++ gomp_ull t = counts[0] % num_ents; ++ doacross->boundary_ull = t * (q + 1); ++ doacross->q_ull = q; ++ doacross->t = t; ++ } ++ ws->doacross = doacross; ++} ++ ++/* DOACROSS POST operation. */ ++ ++void ++GOMP_doacross_ull_post (gomp_ull *counts) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ ent = thr->ts.team_id; ++ else if (ws->sched == GFS_GUIDED) ++ ent = counts[0]; ++ else ++ ent = counts[0] / doacross->chunk_size_ull; ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ gomp_ull flattened ++ = counts[0] << doacross->shift_counts[0]; ++ ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= counts[i] << doacross->shift_counts[i]; ++ flattened++; ++ if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE)) ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ else ++ __atomic_store_n (array, flattened, MEMMODEL_RELEASE); ++ return; ++ } ++ ++ __atomic_thread_fence (MEMMODEL_ACQUIRE); ++ if (sizeof (gomp_ull) == sizeof (unsigned long)) ++ { ++ gomp_ull *array = (gomp_ull *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ for (i = doacross->ncounts; i-- > 0; ) ++ { ++ if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE); ++ } ++ } ++ else ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ for (i = doacross->ncounts; i-- > 0; ) ++ { ++ gomp_ull cull = counts[i] + 1UL; ++ unsigned long c = (unsigned long) cull; ++ if (c != __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[2 * i + 1], c, MEMMODEL_RELEASE); ++ c = cull >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2); ++ if (c != __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[2 * i], c, MEMMODEL_RELEASE); ++ } ++ } ++} ++ ++/* DOACROSS WAIT operation. */ ++ ++void ++GOMP_doacross_ull_wait (gomp_ull first, ...) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ va_list ap; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ { ++ if (ws->chunk_size_ull == 0) ++ { ++ if (first < doacross->boundary_ull) ++ ent = first / (doacross->q_ull + 1); ++ else ++ ent = (first - doacross->boundary_ull) / doacross->q_ull ++ + doacross->t; ++ } ++ else ++ ent = first / ws->chunk_size_ull % thr->ts.team->nthreads; ++ } ++ else if (ws->sched == GFS_GUIDED) ++ ent = first; ++ else ++ ent = first / doacross->chunk_size_ull; ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ gomp_ull flattened = first << doacross->shift_counts[0]; ++ unsigned long cur; ++ ++ va_start (ap, first); ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= va_arg (ap, gomp_ull) ++ << doacross->shift_counts[i]; ++ cur = __atomic_load_n (array, MEMMODEL_ACQUIRE); ++ if (flattened < cur) ++ { ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ doacross_spin (array, flattened, cur); ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ ++ if (sizeof (gomp_ull) == sizeof (unsigned long)) ++ { ++ gomp_ull *array = (gomp_ull *) (doacross->array ++ + ent * doacross->elt_sz); ++ do ++ { ++ va_start (ap, first); ++ for (i = 0; i < doacross->ncounts; i++) ++ { ++ gomp_ull thisv ++ = (i ? va_arg (ap, gomp_ull) : first) + 1; ++ gomp_ull cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED); ++ if (thisv < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (thisv > cur) ++ break; ++ } ++ va_end (ap); ++ if (i == doacross->ncounts) ++ break; ++ cpu_relax (); ++ } ++ while (1); ++ } ++ else ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ do ++ { ++ va_start (ap, first); ++ for (i = 0; i < doacross->ncounts; i++) ++ { ++ gomp_ull thisv ++ = (i ? va_arg (ap, gomp_ull) : first) + 1; ++ unsigned long t ++ = thisv >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2); ++ unsigned long cur ++ = __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED); ++ if (t < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (t > cur) ++ break; ++ t = thisv; ++ cur = __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED); ++ if (t < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (t > cur) ++ break; ++ } ++ va_end (ap); ++ if (i == doacross->ncounts) ++ break; ++ cpu_relax (); ++ } ++ while (1); ++ } ++ __sync_synchronize (); ++} +--- libgomp/loop.c.jj 2014-05-15 10:56:36.487505570 +0200 ++++ libgomp/loop.c 2016-07-13 16:57:13.488423109 +0200 +@@ -110,6 +110,11 @@ gomp_loop_static_start (long start, long + return !gomp_iter_static_next (istart, iend); + } + ++/* The current dynamic implementation is always monotonic. The ++ entrypoints without nonmonotonic in them have to be always monotonic, ++ but the nonmonotonic ones could be changed to use work-stealing for ++ improved scalability. */ ++ + static bool + gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size, + long *istart, long *iend) +@@ -135,6 +140,9 @@ gomp_loop_dynamic_start (long start, lon + return ret; + } + ++/* Similarly as for dynamic, though the question is how can the chunk sizes ++ be decreased without a central locking or atomics. */ ++ + static bool + gomp_loop_guided_start (long start, long end, long incr, long chunk_size, + long *istart, long *iend) +@@ -168,13 +176,16 @@ GOMP_loop_runtime_start (long start, lon + switch (icv->run_sched_var) + { + case GFS_STATIC: +- return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier, ++ return gomp_loop_static_start (start, end, incr, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: +- return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier, ++ return gomp_loop_dynamic_start (start, end, incr, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: +- return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier, ++ return gomp_loop_guided_start (start, end, incr, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -265,15 +276,15 @@ GOMP_loop_ordered_runtime_start (long st + { + case GFS_STATIC: + return gomp_loop_ordered_static_start (start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ordered_dynamic_start (start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ordered_guided_start (start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -285,6 +296,111 @@ GOMP_loop_ordered_runtime_start (long st + } + } + ++/* The *_doacross_*_start routines are similar. The only difference is that ++ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS ++ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 ++ and other COUNTS array elements tell the library number of iterations ++ in the ordered inner loops. */ ++ ++static bool ++gomp_loop_doacross_static_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ ++ thr->ts.static_trip = 0; ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, ++ GFS_STATIC, chunk_size); ++ gomp_doacross_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++ return !gomp_iter_static_next (istart, iend); ++} ++ ++static bool ++gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, ++ GFS_DYNAMIC, chunk_size); ++ gomp_doacross_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#ifdef HAVE_SYNC_BUILTINS ++ ret = gomp_iter_dynamic_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_dynamic_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++static bool ++gomp_loop_doacross_guided_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, ++ GFS_GUIDED, chunk_size); ++ gomp_doacross_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#ifdef HAVE_SYNC_BUILTINS ++ ret = gomp_iter_guided_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_guided_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++bool ++GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts, ++ long *istart, long *iend) ++{ ++ struct gomp_task_icv *icv = gomp_icv (false); ++ switch (icv->run_sched_var) ++ { ++ case GFS_STATIC: ++ return gomp_loop_doacross_static_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_DYNAMIC: ++ return gomp_loop_doacross_dynamic_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_GUIDED: ++ return gomp_loop_doacross_guided_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_AUTO: ++ /* For now map to schedule(static), later on we could play with feedback ++ driven choice. */ ++ return gomp_loop_doacross_static_start (ncounts, counts, ++ 0, istart, iend); ++ default: ++ abort (); ++ } ++} ++ + /* The *_next routines are called when the thread completes processing of + the iteration block currently assigned to it. If the work-share + construct is bound directly to a parallel construct, then the iteration +@@ -483,7 +599,7 @@ GOMP_parallel_loop_runtime_start (void ( + { + struct gomp_task_icv *icv = gomp_icv (false); + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, +- icv->run_sched_var, icv->run_sched_modifier, 0); ++ icv->run_sched_var, icv->run_sched_chunk_size, 0); + } + + ialias_redirect (GOMP_parallel_end) +@@ -521,6 +637,37 @@ GOMP_parallel_loop_guided (void (*fn) (v + GOMP_parallel_end (); + } + ++#ifdef HAVE_ATTRIBUTE_ALIAS ++extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic ++ __attribute__((alias ("GOMP_parallel_loop_dynamic"))); ++extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided ++ __attribute__((alias ("GOMP_parallel_loop_guided"))); ++#else ++void ++GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data, ++ unsigned num_threads, long start, ++ long end, long incr, long chunk_size, ++ unsigned flags) ++{ ++ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, ++ GFS_DYNAMIC, chunk_size, flags); ++ fn (data); ++ GOMP_parallel_end (); ++} ++ ++void ++GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data, ++ unsigned num_threads, long start, ++ long end, long incr, long chunk_size, ++ unsigned flags) ++{ ++ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, ++ GFS_GUIDED, chunk_size, flags); ++ fn (data); ++ GOMP_parallel_end (); ++} ++#endif ++ + void + GOMP_parallel_loop_runtime (void (*fn) (void *), void *data, + unsigned num_threads, long start, long end, +@@ -528,7 +675,7 @@ GOMP_parallel_loop_runtime (void (*fn) ( + { + struct gomp_task_icv *icv = gomp_icv (false); + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, +- icv->run_sched_var, icv->run_sched_modifier, ++ icv->run_sched_var, icv->run_sched_chunk_size, + flags); + fn (data); + GOMP_parallel_end (); +@@ -569,6 +716,10 @@ extern __typeof(gomp_loop_dynamic_start) + __attribute__((alias ("gomp_loop_dynamic_start"))); + extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start + __attribute__((alias ("gomp_loop_guided_start"))); ++extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start ++ __attribute__((alias ("gomp_loop_dynamic_start"))); ++extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start ++ __attribute__((alias ("gomp_loop_guided_start"))); + + extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start + __attribute__((alias ("gomp_loop_ordered_static_start"))); +@@ -577,12 +728,23 @@ extern __typeof(gomp_loop_ordered_dynami + extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start + __attribute__((alias ("gomp_loop_ordered_guided_start"))); + ++extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start ++ __attribute__((alias ("gomp_loop_doacross_static_start"))); ++extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start ++ __attribute__((alias ("gomp_loop_doacross_dynamic_start"))); ++extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start ++ __attribute__((alias ("gomp_loop_doacross_guided_start"))); ++ + extern __typeof(gomp_loop_static_next) GOMP_loop_static_next + __attribute__((alias ("gomp_loop_static_next"))); + extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next + __attribute__((alias ("gomp_loop_dynamic_next"))); + extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next + __attribute__((alias ("gomp_loop_guided_next"))); ++extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next ++ __attribute__((alias ("gomp_loop_dynamic_next"))); ++extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next ++ __attribute__((alias ("gomp_loop_guided_next"))); + + extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next + __attribute__((alias ("gomp_loop_ordered_static_next"))); +@@ -613,6 +775,21 @@ GOMP_loop_guided_start (long start, long + } + + bool ++GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr, ++ long chunk_size, long *istart, ++ long *iend) ++{ ++ return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend); ++} ++ ++bool ++GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend); ++} ++ ++bool + GOMP_loop_ordered_static_start (long start, long end, long incr, + long chunk_size, long *istart, long *iend) + { +@@ -637,6 +814,30 @@ GOMP_loop_ordered_guided_start (long sta + } + + bool ++GOMP_loop_doacross_static_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_doacross_static_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool + GOMP_loop_static_next (long *istart, long *iend) + { + return gomp_loop_static_next (istart, iend); +@@ -653,6 +854,18 @@ GOMP_loop_guided_next (long *istart, lon + { + return gomp_loop_guided_next (istart, iend); + } ++ ++bool ++GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend) ++{ ++ return gomp_loop_dynamic_next (istart, iend); ++} ++ ++bool ++GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend) ++{ ++ return gomp_loop_guided_next (istart, iend); ++} + + bool + GOMP_loop_ordered_static_next (long *istart, long *iend) +--- libgomp/error.c.jj 2013-01-21 16:00:31.834953566 +0100 ++++ libgomp/error.c 2016-07-13 16:57:04.437535335 +0200 +@@ -35,7 +35,26 @@ + #include + + +-static void ++#undef gomp_vdebug ++void ++gomp_vdebug (int kind __attribute__ ((unused)), const char *msg, va_list list) ++{ ++ if (gomp_debug_var) ++ vfprintf (stderr, msg, list); ++} ++ ++#undef gomp_debug ++void ++gomp_debug (int kind, const char *msg, ...) ++{ ++ va_list list; ++ ++ va_start (list, msg); ++ gomp_vdebug (kind, msg, list); ++ va_end (list); ++} ++ ++void + gomp_verror (const char *fmt, va_list list) + { + fputs ("\nlibgomp: ", stderr); +@@ -54,13 +73,18 @@ gomp_error (const char *fmt, ...) + } + + void ++gomp_vfatal (const char *fmt, va_list list) ++{ ++ gomp_verror (fmt, list); ++ exit (EXIT_FAILURE); ++} ++ ++void + gomp_fatal (const char *fmt, ...) + { + va_list list; + + va_start (list, fmt); +- gomp_verror (fmt, list); ++ gomp_vfatal (fmt, list); + va_end (list); +- +- exit (EXIT_FAILURE); + } +--- libgomp/Makefile.am.jj 2014-05-15 11:12:10.000000000 +0200 ++++ libgomp/Makefile.am 2016-07-14 16:10:51.968202878 +0200 +@@ -60,7 +60,13 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_L + libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ + iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ + task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ +- time.c fortran.c affinity.c target.c ++ time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \ ++ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \ ++ oacc-plugin.c oacc-cuda.c priority_queue.c ++ ++if USE_FORTRAN ++libgomp_la_SOURCES += openacc.f90 ++endif + + nodist_noinst_HEADERS = libgomp_f.h + nodist_libsubinclude_HEADERS = omp.h +--- libgomp/Makefile.in.jj 2014-05-15 11:12:10.000000000 +0200 ++++ libgomp/Makefile.in 2016-07-14 16:11:10.981954087 +0200 +@@ -36,6 +36,7 @@ POST_UNINSTALL = : + build_triplet = @build@ + host_triplet = @host@ + target_triplet = @target@ ++@USE_FORTRAN_TRUE@am__append_1 = openacc.f90 + subdir = . + DIST_COMMON = ChangeLog $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/configure $(am__configure_deps) \ +@@ -92,11 +93,15 @@ am__installdirs = "$(DESTDIR)$(toolexecl + "$(DESTDIR)$(toolexeclibdir)" + LTLIBRARIES = $(toolexeclib_LTLIBRARIES) + libgomp_la_LIBADD = ++@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo + am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \ + error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \ + parallel.lo sections.lo single.lo task.lo team.lo work.lo \ + lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \ +- fortran.lo affinity.lo target.lo ++ fortran.lo affinity.lo target.lo splay-tree.lo \ ++ libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \ ++ oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \ ++ priority_queue.lo $(am__objects_1) + libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) + DEFAULT_INCLUDES = -I.@am__isrc@ + depcomp = $(SHELL) $(top_srcdir)/../depcomp +@@ -108,6 +113,13 @@ LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIB + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) + CCLD = $(CC) ++FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS) ++LTFCCOMPILE = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ ++ --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS) ++FCLD = $(FC) ++FCLINK = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ ++ --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) $(AM_LDFLAGS) \ ++ $(LDFLAGS) -o $@ + SOURCES = $(libgomp_la_SOURCES) + MULTISRCTOP = + MULTIBUILDTOP = +@@ -315,10 +327,12 @@ libgomp_la_LDFLAGS = $(libgomp_version_i + libgomp_la_DEPENDENCIES = $(libgomp_version_dep) + libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) + libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ +- iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ +- task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ +- time.c fortran.c affinity.c target.c +- ++ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c \ ++ single.c task.c team.c work.c lock.c mutex.c proc.c sem.c \ ++ bar.c ptrlock.c time.c fortran.c affinity.c target.c \ ++ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ ++ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ ++ priority_queue.c $(am__append_1) + nodist_noinst_HEADERS = libgomp_f.h + nodist_libsubinclude_HEADERS = omp.h + @USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod +@@ -351,7 +365,7 @@ all: config.h + $(MAKE) $(AM_MAKEFLAGS) all-recursive + + .SUFFIXES: +-.SUFFIXES: .c .dvi .lo .o .obj .ps ++.SUFFIXES: .c .dvi .f90 .lo .o .obj .ps + am--refresh: + @: + $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +@@ -463,17 +477,27 @@ distclean-compile: + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop_ull.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mutex.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-async.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-cuda.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-host.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-init.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@ +@@ -501,6 +525,15 @@ distclean-compile: + @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + ++.f90.o: ++ $(FCCOMPILE) -c -o $@ $< ++ ++.f90.obj: ++ $(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` ++ ++.f90.lo: ++ $(LTFCCOMPILE) -c -o $@ $< ++ + mostlyclean-libtool: + -rm -f *.lo + +--- libgomp/task.c.jj 2014-08-06 16:25:16.575091658 +0200 ++++ libgomp/task.c 2016-07-13 17:47:58.722758497 +0200 +@@ -28,6 +28,7 @@ + #include "libgomp.h" + #include + #include ++#include "gomp-constants.h" + + typedef struct gomp_task_depend_entry *hash_entry_type; + +@@ -63,6 +64,14 @@ void + gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task, + struct gomp_task_icv *prev_icv) + { ++ /* It would seem that using memset here would be a win, but it turns ++ out that partially filling gomp_task allows us to keep the ++ overhead of task creation low. In the nqueens-1.c test, for a ++ sufficiently large N, we drop the overhead from 5-6% to 1%. ++ ++ Note, the nqueens-1.c test in serial mode is a good test to ++ benchmark the overhead of creating tasks as there are millions of ++ tiny tasks created that all run undeferred. */ + task->parent = parent_task; + task->icv = *prev_icv; + task->kind = GOMP_TASK_IMPLICIT; +@@ -71,7 +80,7 @@ gomp_init_task (struct gomp_task *task, + task->final_task = false; + task->copy_ctors_done = false; + task->parent_depends_on = false; +- task->children = NULL; ++ priority_queue_init (&task->children_queue); + task->taskgroup = NULL; + task->dependers = NULL; + task->depend_hash = NULL; +@@ -90,30 +99,194 @@ gomp_end_task (void) + thr->task = task->parent; + } + ++/* Clear the parent field of every task in LIST. */ ++ + static inline void +-gomp_clear_parent (struct gomp_task *children) ++gomp_clear_parent_in_list (struct priority_list *list) + { +- struct gomp_task *task = children; +- +- if (task) ++ struct priority_node *p = list->tasks; ++ if (p) + do + { +- task->parent = NULL; +- task = task->next_child; ++ priority_node_to_task (PQ_CHILDREN, p)->parent = NULL; ++ p = p->next; + } +- while (task != children); ++ while (p != list->tasks); ++} ++ ++/* Splay tree version of gomp_clear_parent_in_list. ++ ++ Clear the parent field of every task in NODE within SP, and free ++ the node when done. */ ++ ++static void ++gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node) ++{ ++ if (!node) ++ return; ++ prio_splay_tree_node left = node->left, right = node->right; ++ gomp_clear_parent_in_list (&node->key.l); ++#if _LIBGOMP_CHECKING_ ++ memset (node, 0xaf, sizeof (*node)); ++#endif ++ /* No need to remove the node from the tree. We're nuking ++ everything, so just free the nodes and our caller can clear the ++ entire splay tree. */ ++ free (node); ++ gomp_clear_parent_in_tree (sp, left); ++ gomp_clear_parent_in_tree (sp, right); ++} ++ ++/* Clear the parent field of every task in Q and remove every task ++ from Q. */ ++ ++static inline void ++gomp_clear_parent (struct priority_queue *q) ++{ ++ if (priority_queue_multi_p (q)) ++ { ++ gomp_clear_parent_in_tree (&q->t, q->t.root); ++ /* All the nodes have been cleared in gomp_clear_parent_in_tree. ++ No need to remove anything. We can just nuke everything. */ ++ q->t.root = NULL; ++ } ++ else ++ gomp_clear_parent_in_list (&q->l); + } + +-static void gomp_task_maybe_wait_for_dependencies (void **depend); ++/* Helper function for GOMP_task and gomp_create_target_task. ++ ++ For a TASK with in/out dependencies, fill in the various dependency ++ queues. PARENT is the parent of said task. DEPEND is as in ++ GOMP_task. */ ++ ++static void ++gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, ++ void **depend) ++{ ++ size_t ndepend = (uintptr_t) depend[0]; ++ size_t nout = (uintptr_t) depend[1]; ++ size_t i; ++ hash_entry_type ent; ++ ++ task->depend_count = ndepend; ++ task->num_dependees = 0; ++ if (parent->depend_hash == NULL) ++ parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); ++ for (i = 0; i < ndepend; i++) ++ { ++ task->depend[i].addr = depend[2 + i]; ++ task->depend[i].next = NULL; ++ task->depend[i].prev = NULL; ++ task->depend[i].task = task; ++ task->depend[i].is_in = i >= nout; ++ task->depend[i].redundant = false; ++ task->depend[i].redundant_out = false; ++ ++ hash_entry_type *slot = htab_find_slot (&parent->depend_hash, ++ &task->depend[i], INSERT); ++ hash_entry_type out = NULL, last = NULL; ++ if (*slot) ++ { ++ /* If multiple depends on the same task are the same, all but the ++ first one are redundant. As inout/out come first, if any of them ++ is inout/out, it will win, which is the right semantics. */ ++ if ((*slot)->task == task) ++ { ++ task->depend[i].redundant = true; ++ continue; ++ } ++ for (ent = *slot; ent; ent = ent->next) ++ { ++ if (ent->redundant_out) ++ break; ++ ++ last = ent; ++ ++ /* depend(in:...) doesn't depend on earlier depend(in:...). */ ++ if (i >= nout && ent->is_in) ++ continue; ++ ++ if (!ent->is_in) ++ out = ent; ++ ++ struct gomp_task *tsk = ent->task; ++ if (tsk->dependers == NULL) ++ { ++ tsk->dependers ++ = gomp_malloc (sizeof (struct gomp_dependers_vec) ++ + 6 * sizeof (struct gomp_task *)); ++ tsk->dependers->n_elem = 1; ++ tsk->dependers->allocated = 6; ++ tsk->dependers->elem[0] = task; ++ task->num_dependees++; ++ continue; ++ } ++ /* We already have some other dependency on tsk from earlier ++ depend clause. */ ++ else if (tsk->dependers->n_elem ++ && (tsk->dependers->elem[tsk->dependers->n_elem - 1] ++ == task)) ++ continue; ++ else if (tsk->dependers->n_elem == tsk->dependers->allocated) ++ { ++ tsk->dependers->allocated ++ = tsk->dependers->allocated * 2 + 2; ++ tsk->dependers ++ = gomp_realloc (tsk->dependers, ++ sizeof (struct gomp_dependers_vec) ++ + (tsk->dependers->allocated ++ * sizeof (struct gomp_task *))); ++ } ++ tsk->dependers->elem[tsk->dependers->n_elem++] = task; ++ task->num_dependees++; ++ } ++ task->depend[i].next = *slot; ++ (*slot)->prev = &task->depend[i]; ++ } ++ *slot = &task->depend[i]; ++ ++ /* There is no need to store more than one depend({,in}out:) task per ++ address in the hash table chain for the purpose of creation of ++ deferred tasks, because each out depends on all earlier outs, thus it ++ is enough to record just the last depend({,in}out:). For depend(in:), ++ we need to keep all of the previous ones not terminated yet, because ++ a later depend({,in}out:) might need to depend on all of them. So, if ++ the new task's clause is depend({,in}out:), we know there is at most ++ one other depend({,in}out:) clause in the list (out). For ++ non-deferred tasks we want to see all outs, so they are moved to the ++ end of the chain, after first redundant_out entry all following ++ entries should be redundant_out. */ ++ if (!task->depend[i].is_in && out) ++ { ++ if (out != last) ++ { ++ out->next->prev = out->prev; ++ out->prev->next = out->next; ++ out->next = last->next; ++ out->prev = last; ++ last->next = out; ++ if (out->next) ++ out->next->prev = out; ++ } ++ out->redundant_out = true; ++ } ++ } ++} + + /* Called when encountering an explicit task directive. If IF_CLAUSE is + false, then we must not delay in executing the task. If UNTIED is true, +- then the task may be executed by any member of the team. */ ++ then the task may be executed by any member of the team. ++ ++ DEPEND is an array containing: ++ depend[0]: number of depend elements. ++ depend[1]: number of depend elements of type "out". ++ depend[2..N+1]: address of [1..N]th depend element. */ + + void + GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), + long arg_size, long arg_align, bool if_clause, unsigned flags, +- void **depend) ++ void **depend, int priority) + { + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; +@@ -125,8 +298,7 @@ GOMP_task (void (*fn) (void *), void *da + might be running on different thread than FN. */ + if (cpyfn) + if_clause = false; +- if (flags & 1) +- flags &= ~1; ++ flags &= ~GOMP_TASK_FLAG_UNTIED; + #endif + + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ +@@ -135,6 +307,11 @@ GOMP_task (void (*fn) (void *), void *da + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + ++ if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0) ++ priority = 0; ++ else if (priority > gomp_max_task_priority_var) ++ priority = gomp_max_task_priority_var; ++ + if (!if_clause || team == NULL + || (thr->task && thr->task->final_task) + || team->task_count > 64 * team->nthreads) +@@ -147,12 +324,15 @@ GOMP_task (void (*fn) (void *), void *da + depend clauses for non-deferred tasks other than this, because + the parent task is suspended until the child task finishes and thus + it can't start further child tasks. */ +- if ((flags & 8) && thr->task && thr->task->depend_hash) ++ if ((flags & GOMP_TASK_FLAG_DEPEND) ++ && thr->task && thr->task->depend_hash) + gomp_task_maybe_wait_for_dependencies (depend); + + gomp_init_task (&task, thr->task, gomp_icv (false)); +- task.kind = GOMP_TASK_IFFALSE; +- task.final_task = (thr->task && thr->task->final_task) || (flags & 2); ++ task.kind = GOMP_TASK_UNDEFERRED; ++ task.final_task = (thr->task && thr->task->final_task) ++ || (flags & GOMP_TASK_FLAG_FINAL); ++ task.priority = priority; + if (thr->task) + { + task.in_tied_task = thr->task->in_tied_task; +@@ -178,10 +358,10 @@ GOMP_task (void (*fn) (void *), void *da + child thread, but seeing a stale non-NULL value is not a + problem. Once past the task_lock acquisition, this thread + will see the real value of task.children. */ +- if (task.children != NULL) ++ if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) + { + gomp_mutex_lock (&team->task_lock); +- gomp_clear_parent (task.children); ++ gomp_clear_parent (&task.children_queue); + gomp_mutex_unlock (&team->task_lock); + } + gomp_end_task (); +@@ -195,7 +375,7 @@ GOMP_task (void (*fn) (void *), void *da + bool do_wake; + size_t depend_size = 0; + +- if (flags & 8) ++ if (flags & GOMP_TASK_FLAG_DEPEND) + depend_size = ((uintptr_t) depend[0] + * sizeof (struct gomp_task_depend_entry)); + task = gomp_malloc (sizeof (*task) + depend_size +@@ -203,7 +383,8 @@ GOMP_task (void (*fn) (void *), void *da + arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1) + & ~(uintptr_t) (arg_align - 1)); + gomp_init_task (task, parent, gomp_icv (false)); +- task->kind = GOMP_TASK_IFFALSE; ++ task->priority = priority; ++ task->kind = GOMP_TASK_UNDEFERRED; + task->in_tied_task = parent->in_tied_task; + task->taskgroup = taskgroup; + thr->task = task; +@@ -218,7 +399,7 @@ GOMP_task (void (*fn) (void *), void *da + task->kind = GOMP_TASK_WAITING; + task->fn = fn; + task->fn_data = arg; +- task->final_task = (flags & 2) >> 1; ++ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; + gomp_mutex_lock (&team->task_lock); + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ +@@ -235,171 +416,39 @@ GOMP_task (void (*fn) (void *), void *da + taskgroup->num_children++; + if (depend_size) + { +- size_t ndepend = (uintptr_t) depend[0]; +- size_t nout = (uintptr_t) depend[1]; +- size_t i; +- hash_entry_type ent; +- +- task->depend_count = ndepend; +- task->num_dependees = 0; +- if (parent->depend_hash == NULL) +- parent->depend_hash +- = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); +- for (i = 0; i < ndepend; i++) +- { +- task->depend[i].addr = depend[2 + i]; +- task->depend[i].next = NULL; +- task->depend[i].prev = NULL; +- task->depend[i].task = task; +- task->depend[i].is_in = i >= nout; +- task->depend[i].redundant = false; +- task->depend[i].redundant_out = false; +- +- hash_entry_type *slot +- = htab_find_slot (&parent->depend_hash, &task->depend[i], +- INSERT); +- hash_entry_type out = NULL, last = NULL; +- if (*slot) +- { +- /* If multiple depends on the same task are the +- same, all but the first one are redundant. +- As inout/out come first, if any of them is +- inout/out, it will win, which is the right +- semantics. */ +- if ((*slot)->task == task) +- { +- task->depend[i].redundant = true; +- continue; +- } +- for (ent = *slot; ent; ent = ent->next) +- { +- if (ent->redundant_out) +- break; +- +- last = ent; +- +- /* depend(in:...) doesn't depend on earlier +- depend(in:...). */ +- if (i >= nout && ent->is_in) +- continue; +- +- if (!ent->is_in) +- out = ent; +- +- struct gomp_task *tsk = ent->task; +- if (tsk->dependers == NULL) +- { +- tsk->dependers +- = gomp_malloc (sizeof (struct gomp_dependers_vec) +- + 6 * sizeof (struct gomp_task *)); +- tsk->dependers->n_elem = 1; +- tsk->dependers->allocated = 6; +- tsk->dependers->elem[0] = task; +- task->num_dependees++; +- continue; +- } +- /* We already have some other dependency on tsk +- from earlier depend clause. */ +- else if (tsk->dependers->n_elem +- && (tsk->dependers->elem[tsk->dependers->n_elem +- - 1] +- == task)) +- continue; +- else if (tsk->dependers->n_elem +- == tsk->dependers->allocated) +- { +- tsk->dependers->allocated +- = tsk->dependers->allocated * 2 + 2; +- tsk->dependers +- = gomp_realloc (tsk->dependers, +- sizeof (struct gomp_dependers_vec) +- + (tsk->dependers->allocated +- * sizeof (struct gomp_task *))); +- } +- tsk->dependers->elem[tsk->dependers->n_elem++] = task; +- task->num_dependees++; +- } +- task->depend[i].next = *slot; +- (*slot)->prev = &task->depend[i]; +- } +- *slot = &task->depend[i]; +- +- /* There is no need to store more than one depend({,in}out:) +- task per address in the hash table chain for the purpose +- of creation of deferred tasks, because each out +- depends on all earlier outs, thus it is enough to record +- just the last depend({,in}out:). For depend(in:), we need +- to keep all of the previous ones not terminated yet, because +- a later depend({,in}out:) might need to depend on all of +- them. So, if the new task's clause is depend({,in}out:), +- we know there is at most one other depend({,in}out:) clause +- in the list (out). For non-deferred tasks we want to see +- all outs, so they are moved to the end of the chain, +- after first redundant_out entry all following entries +- should be redundant_out. */ +- if (!task->depend[i].is_in && out) +- { +- if (out != last) +- { +- out->next->prev = out->prev; +- out->prev->next = out->next; +- out->next = last->next; +- out->prev = last; +- last->next = out; +- if (out->next) +- out->next->prev = out; +- } +- out->redundant_out = true; +- } +- } ++ gomp_task_handle_depend (task, parent, depend); + if (task->num_dependees) + { ++ /* Tasks that depend on other tasks are not put into the ++ various waiting queues, so we are done for now. Said ++ tasks are instead put into the queues via ++ gomp_task_run_post_handle_dependers() after their ++ dependencies have been satisfied. After which, they ++ can be picked up by the various scheduling ++ points. */ + gomp_mutex_unlock (&team->task_lock); + return; + } + } +- if (parent->children) +- { +- task->next_child = parent->children; +- task->prev_child = parent->children->prev_child; +- task->next_child->prev_child = task; +- task->prev_child->next_child = task; +- } +- else +- { +- task->next_child = task; +- task->prev_child = task; +- } +- parent->children = task; ++ ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, ++ task, priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); + if (taskgroup) +- { +- if (taskgroup->children) +- { +- task->next_taskgroup = taskgroup->children; +- task->prev_taskgroup = taskgroup->children->prev_taskgroup; +- task->next_taskgroup->prev_taskgroup = task; +- task->prev_taskgroup->next_taskgroup = task; +- } +- else +- { +- task->next_taskgroup = task; +- task->prev_taskgroup = task; +- } +- taskgroup->children = task; +- } +- if (team->task_queue) +- { +- task->next_queue = team->task_queue; +- task->prev_queue = team->task_queue->prev_queue; +- task->next_queue->prev_queue = task; +- task->prev_queue->next_queue = task; +- } +- else +- { +- task->next_queue = task; +- task->prev_queue = task; +- team->task_queue = task; +- } ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ ++ priority_queue_insert (PQ_TEAM, &team->task_queue, ++ task, priority, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ + ++team->task_count; + ++team->task_queued_count; + gomp_team_barrier_set_task_pending (&team->barrier); +@@ -411,36 +460,529 @@ GOMP_task (void (*fn) (void *), void *da + } + } + +-static inline bool +-gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, +- struct gomp_taskgroup *taskgroup, struct gomp_team *team) ++ialias (GOMP_taskgroup_start) ++ialias (GOMP_taskgroup_end) ++ ++#define TYPE long ++#define UTYPE unsigned long ++#define TYPE_is_long 1 ++#include "taskloop.c" ++#undef TYPE ++#undef UTYPE ++#undef TYPE_is_long ++ ++#define TYPE unsigned long long ++#define UTYPE TYPE ++#define GOMP_taskloop GOMP_taskloop_ull ++#include "taskloop.c" ++#undef TYPE ++#undef UTYPE ++#undef GOMP_taskloop ++ ++static void inline ++priority_queue_move_task_first (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) + { ++#if _LIBGOMP_CHECKING_ ++ if (!priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to move first missing task %p", task); ++#endif ++ struct priority_list *list; ++ if (priority_queue_multi_p (head)) ++ { ++ list = priority_queue_lookup_priority (head, task->priority); ++#if _LIBGOMP_CHECKING_ ++ if (!list) ++ gomp_fatal ("Unable to find priority %d", task->priority); ++#endif ++ } ++ else ++ list = &head->l; ++ priority_list_remove (list, task_to_priority_node (type, task), 0); ++ priority_list_insert (type, list, task, task->priority, ++ PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN, ++ task->parent_depends_on); ++} ++ ++/* Actual body of GOMP_PLUGIN_target_task_completion that is executed ++ with team->task_lock held, or is executed in the thread that called ++ gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been ++ run before it acquires team->task_lock. */ ++ ++static void ++gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task) ++{ ++ struct gomp_task *parent = task->parent; + if (parent) ++ priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue, ++ task); ++ ++ struct gomp_taskgroup *taskgroup = task->taskgroup; ++ if (taskgroup) ++ priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task); ++ ++ priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority, ++ PRIORITY_INSERT_BEGIN, false, ++ task->parent_depends_on); ++ task->kind = GOMP_TASK_WAITING; ++ if (parent && parent->taskwait) + { +- if (parent->children == child_task) +- parent->children = child_task->next_child; +- if (__builtin_expect (child_task->parent_depends_on, 0) +- && parent->taskwait->last_parent_depends_on == child_task) +- { +- if (child_task->prev_child->kind == GOMP_TASK_WAITING +- && child_task->prev_child->parent_depends_on) +- parent->taskwait->last_parent_depends_on = child_task->prev_child; +- else +- parent->taskwait->last_parent_depends_on = NULL; ++ if (parent->taskwait->in_taskwait) ++ { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ ++ parent->taskwait->in_taskwait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); + } ++ else if (parent->taskwait->in_depend_wait) ++ { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ ++ parent->taskwait->in_depend_wait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); ++ } ++ } ++ if (taskgroup && taskgroup->in_taskgroup_wait) ++ { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ ++ taskgroup->in_taskgroup_wait = false; ++ gomp_sem_post (&taskgroup->taskgroup_sem); + } +- if (taskgroup && taskgroup->children == child_task) +- taskgroup->children = child_task->next_taskgroup; +- child_task->prev_queue->next_queue = child_task->next_queue; +- child_task->next_queue->prev_queue = child_task->prev_queue; +- if (team->task_queue == child_task) ++ ++ ++team->task_queued_count; ++ gomp_team_barrier_set_task_pending (&team->barrier); ++ /* I'm afraid this can't be done after releasing team->task_lock, ++ as gomp_target_task_completion is run from unrelated thread and ++ therefore in between gomp_mutex_unlock and gomp_team_barrier_wake ++ the team could be gone already. */ ++ if (team->nthreads > team->task_running_count) ++ gomp_team_barrier_wake (&team->barrier, 1); ++} ++ ++/* Signal that a target task TTASK has completed the asynchronously ++ running phase and should be requeued as a task to handle the ++ variable unmapping. */ ++ ++void ++GOMP_PLUGIN_target_task_completion (void *data) ++{ ++ struct gomp_target_task *ttask = (struct gomp_target_task *) data; ++ struct gomp_task *task = ttask->task; ++ struct gomp_team *team = ttask->team; ++ ++ gomp_mutex_lock (&team->task_lock); ++ if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN) + { +- if (child_task->next_queue != child_task) +- team->task_queue = child_task->next_queue; ++ ttask->state = GOMP_TARGET_TASK_FINISHED; ++ gomp_mutex_unlock (&team->task_lock); ++ return; ++ } ++ ttask->state = GOMP_TARGET_TASK_FINISHED; ++ gomp_target_task_completion (team, task); ++ gomp_mutex_unlock (&team->task_lock); ++} ++ ++static void gomp_task_run_post_handle_depend_hash (struct gomp_task *); ++ ++/* Called for nowait target tasks. */ ++ ++bool ++gomp_create_target_task (struct gomp_device_descr *devicep, ++ void (*fn) (void *), size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend, void **args, ++ enum gomp_target_task_state state) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ ++ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ ++ if (team ++ && (gomp_team_barrier_cancelled (&team->barrier) ++ || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) ++ return true; ++ ++ struct gomp_target_task *ttask; ++ struct gomp_task *task; ++ struct gomp_task *parent = thr->task; ++ struct gomp_taskgroup *taskgroup = parent->taskgroup; ++ bool do_wake; ++ size_t depend_size = 0; ++ uintptr_t depend_cnt = 0; ++ size_t tgt_align = 0, tgt_size = 0; ++ ++ if (depend != NULL) ++ { ++ depend_cnt = (uintptr_t) depend[0]; ++ depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry); ++ } ++ if (fn) ++ { ++ /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are ++ firstprivate on the target task. */ ++ size_t i; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ if (tgt_align < align) ++ tgt_align = align; ++ tgt_size = (tgt_size + align - 1) & ~(align - 1); ++ tgt_size += sizes[i]; ++ } ++ if (tgt_align) ++ tgt_size += tgt_align - 1; + else +- team->task_queue = NULL; ++ tgt_size = 0; + } ++ ++ task = gomp_malloc (sizeof (*task) + depend_size ++ + sizeof (*ttask) ++ + mapnum * (sizeof (void *) + sizeof (size_t) ++ + sizeof (unsigned short)) ++ + tgt_size); ++ gomp_init_task (task, parent, gomp_icv (false)); ++ task->priority = 0; ++ task->kind = GOMP_TASK_WAITING; ++ task->in_tied_task = parent->in_tied_task; ++ task->taskgroup = taskgroup; ++ ttask = (struct gomp_target_task *) &task->depend[depend_cnt]; ++ ttask->devicep = devicep; ++ ttask->fn = fn; ++ ttask->mapnum = mapnum; ++ ttask->args = args; ++ memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); ++ ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; ++ memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); ++ ttask->kinds = (unsigned short *) &ttask->sizes[mapnum]; ++ memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short)); ++ if (tgt_align) ++ { ++ char *tgt = (char *) &ttask->kinds[mapnum]; ++ size_t i; ++ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); ++ if (al) ++ tgt += tgt_align - al; ++ tgt_size = 0; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ tgt_size = (tgt_size + align - 1) & ~(align - 1); ++ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); ++ ttask->hostaddrs[i] = tgt + tgt_size; ++ tgt_size = tgt_size + sizes[i]; ++ } ++ } ++ ttask->flags = flags; ++ ttask->state = state; ++ ttask->task = task; ++ ttask->team = team; ++ task->fn = NULL; ++ task->fn_data = ttask; ++ task->final_task = 0; ++ gomp_mutex_lock (&team->task_lock); ++ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ ++ if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier) ++ || (taskgroup && taskgroup->cancelled), 0)) ++ { ++ gomp_mutex_unlock (&team->task_lock); ++ gomp_finish_task (task); ++ free (task); ++ return true; ++ } ++ if (depend_size) ++ { ++ gomp_task_handle_depend (task, parent, depend); ++ if (task->num_dependees) ++ { ++ if (taskgroup) ++ taskgroup->num_children++; ++ gomp_mutex_unlock (&team->task_lock); ++ return true; ++ } ++ } ++ if (state == GOMP_TARGET_TASK_DATA) ++ { ++ gomp_task_run_post_handle_depend_hash (task); ++ gomp_mutex_unlock (&team->task_lock); ++ gomp_finish_task (task); ++ free (task); ++ return false; ++ } ++ if (taskgroup) ++ taskgroup->num_children++; ++ /* For async offloading, if we don't need to wait for dependencies, ++ run the gomp_target_task_fn right away, essentially schedule the ++ mapping part of the task in the current thread. */ ++ if (devicep != NULL ++ && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) ++ { ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ if (taskgroup) ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, 0, PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ task->pnode[PQ_TEAM].next = NULL; ++ task->pnode[PQ_TEAM].prev = NULL; ++ task->kind = GOMP_TASK_TIED; ++ ++team->task_count; ++ gomp_mutex_unlock (&team->task_lock); ++ ++ thr->task = task; ++ gomp_target_task_fn (task->fn_data); ++ thr->task = parent; ++ ++ gomp_mutex_lock (&team->task_lock); ++ task->kind = GOMP_TASK_ASYNC_RUNNING; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ gomp_mutex_unlock (&team->task_lock); ++ return true; ++ } ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ if (taskgroup) ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ ++team->task_count; ++ ++team->task_queued_count; ++ gomp_team_barrier_set_task_pending (&team->barrier); ++ do_wake = team->task_running_count + !parent->in_tied_task ++ < team->nthreads; ++ gomp_mutex_unlock (&team->task_lock); ++ if (do_wake) ++ gomp_team_barrier_wake (&team->barrier, 1); ++ return true; ++} ++ ++/* Given a parent_depends_on task in LIST, move it to the front of its ++ priority so it is run as soon as possible. ++ ++ Care is taken to update the list's LAST_PARENT_DEPENDS_ON field. ++ ++ We rearrange the queue such that all parent_depends_on tasks are ++ first, and last_parent_depends_on points to the last such task we ++ rearranged. For example, given the following tasks in a queue ++ where PD[123] are the parent_depends_on tasks: ++ ++ task->children ++ | ++ V ++ C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4 ++ ++ We rearrange such that: ++ ++ task->children ++ | +--- last_parent_depends_on ++ | | ++ V V ++ PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */ ++ ++static void inline ++priority_list_upgrade_task (struct priority_list *list, ++ struct priority_node *node) ++{ ++ struct priority_node *last_parent_depends_on ++ = list->last_parent_depends_on; ++ if (last_parent_depends_on) ++ { ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ node->prev = last_parent_depends_on; ++ node->next = last_parent_depends_on->next; ++ node->prev->next = node; ++ node->next->prev = node; ++ } ++ else if (node != list->tasks) ++ { ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ node->prev = list->tasks->prev; ++ node->next = list->tasks; ++ list->tasks = node; ++ node->prev->next = node; ++ node->next->prev = node; ++ } ++ list->last_parent_depends_on = node; ++} ++ ++/* Given a parent_depends_on TASK in its parent's children_queue, move ++ it to the front of its priority so it is run as soon as possible. ++ ++ PARENT is passed as an optimization. ++ ++ (This function could be defined in priority_queue.c, but we want it ++ inlined, and putting it in priority_queue.h is not an option, given ++ that gomp_task has not been properly defined at that point). */ ++ ++static void inline ++priority_queue_upgrade_task (struct gomp_task *task, ++ struct gomp_task *parent) ++{ ++ struct priority_queue *head = &parent->children_queue; ++ struct priority_node *node = &task->pnode[PQ_CHILDREN]; ++#if _LIBGOMP_CHECKING_ ++ if (!task->parent_depends_on) ++ gomp_fatal ("priority_queue_upgrade_task: task must be a " ++ "parent_depends_on task"); ++ if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task)) ++ gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task); ++#endif ++ if (priority_queue_multi_p (head)) ++ { ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, task->priority); ++ priority_list_upgrade_task (list, node); ++ } ++ else ++ priority_list_upgrade_task (&head->l, node); ++} ++ ++/* Given a CHILD_TASK in LIST that is about to be executed, move it out of ++ the way in LIST so that other tasks can be considered for ++ execution. LIST contains tasks of type TYPE. ++ ++ Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field ++ if applicable. */ ++ ++static void inline ++priority_list_downgrade_task (enum priority_queue_type type, ++ struct priority_list *list, ++ struct gomp_task *child_task) ++{ ++ struct priority_node *node = task_to_priority_node (type, child_task); ++ if (list->tasks == node) ++ list->tasks = node->next; ++ else if (node->next != list->tasks) ++ { ++ /* The task in NODE is about to become TIED and TIED tasks ++ cannot come before WAITING tasks. If we're about to ++ leave the queue in such an indeterminate state, rewire ++ things appropriately. However, a TIED task at the end is ++ perfectly fine. */ ++ struct gomp_task *next_task = priority_node_to_task (type, node->next); ++ if (next_task->kind == GOMP_TASK_WAITING) ++ { ++ /* Remove from list. */ ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ /* Rewire at the end. */ ++ node->next = list->tasks; ++ node->prev = list->tasks->prev; ++ list->tasks->prev->next = node; ++ list->tasks->prev = node; ++ } ++ } ++ ++ /* If the current task is the last_parent_depends_on for its ++ priority, adjust last_parent_depends_on appropriately. */ ++ if (__builtin_expect (child_task->parent_depends_on, 0) ++ && list->last_parent_depends_on == node) ++ { ++ struct gomp_task *prev_child = priority_node_to_task (type, node->prev); ++ if (node->prev != node ++ && prev_child->kind == GOMP_TASK_WAITING ++ && prev_child->parent_depends_on) ++ list->last_parent_depends_on = node->prev; ++ else ++ { ++ /* There are no more parent_depends_on entries waiting ++ to run, clear the list. */ ++ list->last_parent_depends_on = NULL; ++ } ++ } ++} ++ ++/* Given a TASK in HEAD that is about to be executed, move it out of ++ the way so that other tasks can be considered for execution. HEAD ++ contains tasks of type TYPE. ++ ++ Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field ++ if applicable. ++ ++ (This function could be defined in priority_queue.c, but we want it ++ inlined, and putting it in priority_queue.h is not an option, given ++ that gomp_task has not been properly defined at that point). */ ++ ++static void inline ++priority_queue_downgrade_task (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (!priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to downgrade missing task %p", task); ++#endif ++ if (priority_queue_multi_p (head)) ++ { ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, task->priority); ++ priority_list_downgrade_task (type, list, task); ++ } ++ else ++ priority_list_downgrade_task (type, &head->l, task); ++} ++ ++/* Setup CHILD_TASK to execute. This is done by setting the task to ++ TIED, and updating all relevant queues so that CHILD_TASK is no ++ longer chosen for scheduling. Also, remove CHILD_TASK from the ++ overall team task queue entirely. ++ ++ Return TRUE if task or its containing taskgroup has been ++ cancelled. */ ++ ++static inline bool ++gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, ++ struct gomp_team *team) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (child_task->parent) ++ priority_queue_verify (PQ_CHILDREN, ++ &child_task->parent->children_queue, true); ++ if (child_task->taskgroup) ++ priority_queue_verify (PQ_TASKGROUP, ++ &child_task->taskgroup->taskgroup_queue, false); ++ priority_queue_verify (PQ_TEAM, &team->task_queue, false); ++#endif ++ ++ /* Task is about to go tied, move it out of the way. */ ++ if (parent) ++ priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue, ++ child_task); ++ ++ /* Task is about to go tied, move it out of the way. */ ++ struct gomp_taskgroup *taskgroup = child_task->taskgroup; ++ if (taskgroup) ++ priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ child_task); ++ ++ priority_queue_remove (PQ_TEAM, &team->task_queue, child_task, ++ MEMMODEL_RELAXED); ++ child_task->pnode[PQ_TEAM].next = NULL; ++ child_task->pnode[PQ_TEAM].prev = NULL; + child_task->kind = GOMP_TASK_TIED; ++ + if (--team->task_queued_count == 0) + gomp_team_barrier_clear_task_pending (&team->barrier); + if ((gomp_team_barrier_cancelled (&team->barrier) +@@ -478,6 +1020,14 @@ gomp_task_run_post_handle_depend_hash (s + } + } + ++/* After a CHILD_TASK has been run, adjust the dependency queue for ++ each task that depends on CHILD_TASK, to record the fact that there ++ is one less dependency to worry about. If a task that depended on ++ CHILD_TASK now has no dependencies, place it in the various queues ++ so it gets scheduled to run. ++ ++ TEAM is the team to which CHILD_TASK belongs to. */ ++ + static size_t + gomp_task_run_post_handle_dependers (struct gomp_task *child_task, + struct gomp_team *team) +@@ -487,91 +1037,60 @@ gomp_task_run_post_handle_dependers (str + for (i = 0; i < count; i++) + { + struct gomp_task *task = child_task->dependers->elem[i]; ++ ++ /* CHILD_TASK satisfies a dependency for TASK. Keep track of ++ TASK's remaining dependencies. Once TASK has no other ++ depenencies, put it into the various queues so it will get ++ scheduled for execution. */ + if (--task->num_dependees != 0) + continue; + + struct gomp_taskgroup *taskgroup = task->taskgroup; + if (parent) + { +- if (parent->children) +- { +- /* If parent is in gomp_task_maybe_wait_for_dependencies +- and it doesn't need to wait for this task, put it after +- all ready to run tasks it needs to wait for. */ +- if (parent->taskwait && parent->taskwait->last_parent_depends_on +- && !task->parent_depends_on) +- { +- struct gomp_task *last_parent_depends_on +- = parent->taskwait->last_parent_depends_on; +- task->next_child = last_parent_depends_on->next_child; +- task->prev_child = last_parent_depends_on; +- } +- else +- { +- task->next_child = parent->children; +- task->prev_child = parent->children->prev_child; +- parent->children = task; +- } +- task->next_child->prev_child = task; +- task->prev_child->next_child = task; +- } +- else +- { +- task->next_child = task; +- task->prev_child = task; +- parent->children = task; +- } ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, ++ task, task->priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/true, ++ task->parent_depends_on); + if (parent->taskwait) + { + if (parent->taskwait->in_taskwait) + { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ + parent->taskwait->in_taskwait = false; + gomp_sem_post (&parent->taskwait->taskwait_sem); + } + else if (parent->taskwait->in_depend_wait) + { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ + parent->taskwait->in_depend_wait = false; + gomp_sem_post (&parent->taskwait->taskwait_sem); + } +- if (parent->taskwait->last_parent_depends_on == NULL +- && task->parent_depends_on) +- parent->taskwait->last_parent_depends_on = task; + } + } + if (taskgroup) + { +- if (taskgroup->children) +- { +- task->next_taskgroup = taskgroup->children; +- task->prev_taskgroup = taskgroup->children->prev_taskgroup; +- task->next_taskgroup->prev_taskgroup = task; +- task->prev_taskgroup->next_taskgroup = task; +- } +- else +- { +- task->next_taskgroup = task; +- task->prev_taskgroup = task; +- } +- taskgroup->children = task; ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, task->priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); + if (taskgroup->in_taskgroup_wait) + { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ + taskgroup->in_taskgroup_wait = false; + gomp_sem_post (&taskgroup->taskgroup_sem); + } + } +- if (team->task_queue) +- { +- task->next_queue = team->task_queue; +- task->prev_queue = team->task_queue->prev_queue; +- task->next_queue->prev_queue = task; +- task->prev_queue->next_queue = task; +- } +- else +- { +- task->next_queue = task; +- task->prev_queue = task; +- team->task_queue = task; +- } ++ priority_queue_insert (PQ_TEAM, &team->task_queue, ++ task, task->priority, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); + ++team->task_count; + ++team->task_queued_count; + ++ret; +@@ -601,12 +1120,18 @@ gomp_task_run_post_handle_depend (struct + return gomp_task_run_post_handle_dependers (child_task, team); + } + ++/* Remove CHILD_TASK from its parent. */ ++ + static inline void + gomp_task_run_post_remove_parent (struct gomp_task *child_task) + { + struct gomp_task *parent = child_task->parent; + if (parent == NULL) + return; ++ ++ /* If this was the last task the parent was depending on, ++ synchronize with gomp_task_maybe_wait_for_dependencies so it can ++ clean up and return. */ + if (__builtin_expect (child_task->parent_depends_on, 0) + && --parent->taskwait->n_depend == 0 + && parent->taskwait->in_depend_wait) +@@ -614,36 +1139,31 @@ gomp_task_run_post_remove_parent (struct + parent->taskwait->in_depend_wait = false; + gomp_sem_post (&parent->taskwait->taskwait_sem); + } +- child_task->prev_child->next_child = child_task->next_child; +- child_task->next_child->prev_child = child_task->prev_child; +- if (parent->children != child_task) +- return; +- if (child_task->next_child != child_task) +- parent->children = child_task->next_child; +- else ++ ++ if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue, ++ child_task, MEMMODEL_RELEASE) ++ && parent->taskwait && parent->taskwait->in_taskwait) + { +- /* We access task->children in GOMP_taskwait +- outside of the task lock mutex region, so +- need a release barrier here to ensure memory +- written by child_task->fn above is flushed +- before the NULL is written. */ +- __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE); +- if (parent->taskwait && parent->taskwait->in_taskwait) +- { +- parent->taskwait->in_taskwait = false; +- gomp_sem_post (&parent->taskwait->taskwait_sem); +- } ++ parent->taskwait->in_taskwait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); + } ++ child_task->pnode[PQ_CHILDREN].next = NULL; ++ child_task->pnode[PQ_CHILDREN].prev = NULL; + } + ++/* Remove CHILD_TASK from its taskgroup. */ ++ + static inline void + gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) + { + struct gomp_taskgroup *taskgroup = child_task->taskgroup; + if (taskgroup == NULL) + return; +- child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup; +- child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup; ++ bool empty = priority_queue_remove (PQ_TASKGROUP, ++ &taskgroup->taskgroup_queue, ++ child_task, MEMMODEL_RELAXED); ++ child_task->pnode[PQ_TASKGROUP].next = NULL; ++ child_task->pnode[PQ_TASKGROUP].prev = NULL; + if (taskgroup->num_children > 1) + --taskgroup->num_children; + else +@@ -655,18 +1175,10 @@ gomp_task_run_post_remove_taskgroup (str + before the NULL is written. */ + __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE); + } +- if (taskgroup->children != child_task) +- return; +- if (child_task->next_taskgroup != child_task) +- taskgroup->children = child_task->next_taskgroup; +- else ++ if (empty && taskgroup->in_taskgroup_wait) + { +- taskgroup->children = NULL; +- if (taskgroup->in_taskgroup_wait) +- { +- taskgroup->in_taskgroup_wait = false; +- gomp_sem_post (&taskgroup->taskgroup_sem); +- } ++ taskgroup->in_taskgroup_wait = false; ++ gomp_sem_post (&taskgroup->taskgroup_sem); + } + } + +@@ -696,11 +1208,15 @@ gomp_barrier_handle_tasks (gomp_barrier_ + while (1) + { + bool cancelled = false; +- if (team->task_queue != NULL) ++ if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED)) + { +- child_task = team->task_queue; ++ bool ignored; ++ child_task ++ = priority_queue_next_task (PQ_TEAM, &team->task_queue, ++ PQ_IGNORED, NULL, ++ &ignored); + cancelled = gomp_task_run_pre (child_task, child_task->parent, +- child_task->taskgroup, team); ++ team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -729,7 +1245,29 @@ gomp_barrier_handle_tasks (gomp_barrier_ + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ team->task_running_count--; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -741,7 +1279,7 @@ gomp_barrier_handle_tasks (gomp_barrier_ + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); + gomp_task_run_post_remove_parent (child_task); +- gomp_clear_parent (child_task->children); ++ gomp_clear_parent (&child_task->children_queue); + gomp_task_run_post_remove_taskgroup (child_task); + to_free = child_task; + child_task = NULL; +@@ -765,7 +1303,9 @@ gomp_barrier_handle_tasks (gomp_barrier_ + } + } + +-/* Called when encountering a taskwait directive. */ ++/* Called when encountering a taskwait directive. ++ ++ Wait for all children of the current task. */ + + void + GOMP_taskwait (void) +@@ -785,15 +1325,16 @@ GOMP_taskwait (void) + child thread task work function are seen before we exit from + GOMP_taskwait. */ + if (task == NULL +- || __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == NULL) ++ || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE)) + return; + + memset (&taskwait, 0, sizeof (taskwait)); ++ bool child_q = false; + gomp_mutex_lock (&team->task_lock); + while (1) + { + bool cancelled = false; +- if (task->children == NULL) ++ if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED)) + { + bool destroy_taskwait = task->taskwait != NULL; + task->taskwait = NULL; +@@ -807,12 +1348,14 @@ GOMP_taskwait (void) + gomp_sem_destroy (&taskwait.taskwait_sem); + return; + } +- if (task->children->kind == GOMP_TASK_WAITING) ++ struct gomp_task *next_task ++ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, ++ PQ_TEAM, &team->task_queue, &child_q); ++ if (next_task->kind == GOMP_TASK_WAITING) + { +- child_task = task->children; ++ child_task = next_task; + cancelled +- = gomp_task_run_pre (child_task, task, child_task->taskgroup, +- team); ++ = gomp_task_run_pre (child_task, task, team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -826,8 +1369,10 @@ GOMP_taskwait (void) + } + else + { +- /* All tasks we are waiting for are already running +- in other threads. Wait for them. */ ++ /* All tasks we are waiting for are either running in other ++ threads, or they are tasks that have not had their ++ dependencies met (so they're not even in the queue). Wait ++ for them. */ + if (task->taskwait == NULL) + { + taskwait.in_depend_wait = false; +@@ -851,7 +1396,28 @@ GOMP_taskwait (void) + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -862,17 +1428,19 @@ GOMP_taskwait (void) + finish_cancelled:; + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); +- child_task->prev_child->next_child = child_task->next_child; +- child_task->next_child->prev_child = child_task->prev_child; +- if (task->children == child_task) +- { +- if (child_task->next_child != child_task) +- task->children = child_task->next_child; +- else +- task->children = NULL; ++ ++ if (child_q) ++ { ++ priority_queue_remove (PQ_CHILDREN, &task->children_queue, ++ child_task, MEMMODEL_RELAXED); ++ child_task->pnode[PQ_CHILDREN].next = NULL; ++ child_task->pnode[PQ_CHILDREN].prev = NULL; + } +- gomp_clear_parent (child_task->children); ++ ++ gomp_clear_parent (&child_task->children_queue); ++ + gomp_task_run_post_remove_taskgroup (child_task); ++ + to_free = child_task; + child_task = NULL; + team->task_count--; +@@ -887,10 +1455,20 @@ GOMP_taskwait (void) + } + } + +-/* This is like GOMP_taskwait, but we only wait for tasks that the +- upcoming task depends on. */ ++/* An undeferred task is about to run. Wait for all tasks that this ++ undeferred task depends on. + +-static void ++ This is done by first putting all known ready dependencies ++ (dependencies that have their own dependencies met) at the top of ++ the scheduling queues. Then we iterate through these imminently ++ ready tasks (and possibly other high priority tasks), and run them. ++ If we run out of ready dependencies to execute, we either wait for ++ the reamining dependencies to finish, or wait for them to get ++ scheduled so we can run them. ++ ++ DEPEND is as in GOMP_task. */ ++ ++void + gomp_task_maybe_wait_for_dependencies (void **depend) + { + struct gomp_thread *thr = gomp_thread (); +@@ -898,7 +1476,6 @@ gomp_task_maybe_wait_for_dependencies (v + struct gomp_team *team = thr->ts.team; + struct gomp_task_depend_entry elem, *ent = NULL; + struct gomp_taskwait taskwait; +- struct gomp_task *last_parent_depends_on = NULL; + size_t ndepend = (uintptr_t) depend[0]; + size_t nout = (uintptr_t) depend[1]; + size_t i; +@@ -922,32 +1499,11 @@ gomp_task_maybe_wait_for_dependencies (v + { + tsk->parent_depends_on = true; + ++num_awaited; ++ /* If depenency TSK itself has no dependencies and is ++ ready to run, move it up front so that we run it as ++ soon as possible. */ + if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) +- { +- /* If a task we need to wait for is not already +- running and is ready to be scheduled, move it +- to front, so that we run it as soon as possible. */ +- if (last_parent_depends_on) +- { +- tsk->prev_child->next_child = tsk->next_child; +- tsk->next_child->prev_child = tsk->prev_child; +- tsk->prev_child = last_parent_depends_on; +- tsk->next_child = last_parent_depends_on->next_child; +- tsk->prev_child->next_child = tsk; +- tsk->next_child->prev_child = tsk; +- } +- else if (tsk != task->children) +- { +- tsk->prev_child->next_child = tsk->next_child; +- tsk->next_child->prev_child = tsk->prev_child; +- tsk->prev_child = task->children; +- tsk->next_child = task->children->next_child; +- task->children = tsk; +- tsk->prev_child->next_child = tsk; +- tsk->next_child->prev_child = tsk; +- } +- last_parent_depends_on = tsk; +- } ++ priority_queue_upgrade_task (tsk, task); + } + } + } +@@ -959,7 +1515,6 @@ gomp_task_maybe_wait_for_dependencies (v + + memset (&taskwait, 0, sizeof (taskwait)); + taskwait.n_depend = num_awaited; +- taskwait.last_parent_depends_on = last_parent_depends_on; + gomp_sem_init (&taskwait.taskwait_sem, 0); + task->taskwait = &taskwait; + +@@ -978,12 +1533,30 @@ gomp_task_maybe_wait_for_dependencies (v + gomp_sem_destroy (&taskwait.taskwait_sem); + return; + } +- if (task->children->kind == GOMP_TASK_WAITING) ++ ++ /* Theoretically when we have multiple priorities, we should ++ chose between the highest priority item in ++ task->children_queue and team->task_queue here, so we should ++ use priority_queue_next_task(). However, since we are ++ running an undeferred task, perhaps that makes all tasks it ++ depends on undeferred, thus a priority of INF? This would ++ make it unnecessary to take anything into account here, ++ but the dependencies. ++ ++ On the other hand, if we want to use priority_queue_next_task(), ++ care should be taken to only use priority_queue_remove() ++ below if the task was actually removed from the children ++ queue. */ ++ bool ignored; ++ struct gomp_task *next_task ++ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, ++ PQ_IGNORED, NULL, &ignored); ++ ++ if (next_task->kind == GOMP_TASK_WAITING) + { +- child_task = task->children; ++ child_task = next_task; + cancelled +- = gomp_task_run_pre (child_task, task, child_task->taskgroup, +- team); ++ = gomp_task_run_pre (child_task, task, team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -996,8 +1569,10 @@ gomp_task_maybe_wait_for_dependencies (v + } + } + else +- /* All tasks we are waiting for are already running +- in other threads. Wait for them. */ ++ /* All tasks we are waiting for are either running in other ++ threads, or they are tasks that have not had their ++ dependencies met (so they're not even in the queue). Wait ++ for them. */ + taskwait.in_depend_wait = true; + gomp_mutex_unlock (&team->task_lock); + if (do_wake) +@@ -1014,7 +1589,28 @@ gomp_task_maybe_wait_for_dependencies (v + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -1027,16 +1623,13 @@ gomp_task_maybe_wait_for_dependencies (v + = gomp_task_run_post_handle_depend (child_task, team); + if (child_task->parent_depends_on) + --taskwait.n_depend; +- child_task->prev_child->next_child = child_task->next_child; +- child_task->next_child->prev_child = child_task->prev_child; +- if (task->children == child_task) +- { +- if (child_task->next_child != child_task) +- task->children = child_task->next_child; +- else +- task->children = NULL; +- } +- gomp_clear_parent (child_task->children); ++ ++ priority_queue_remove (PQ_CHILDREN, &task->children_queue, ++ child_task, MEMMODEL_RELAXED); ++ child_task->pnode[PQ_CHILDREN].next = NULL; ++ child_task->pnode[PQ_CHILDREN].prev = NULL; ++ ++ gomp_clear_parent (&child_task->children_queue); + gomp_task_run_post_remove_taskgroup (child_task); + to_free = child_task; + child_task = NULL; +@@ -1069,14 +1662,14 @@ GOMP_taskgroup_start (void) + struct gomp_taskgroup *taskgroup; + + /* If team is NULL, all tasks are executed as +- GOMP_TASK_IFFALSE tasks and thus all children tasks of ++ GOMP_TASK_UNDEFERRED tasks and thus all children tasks of + taskgroup and their descendant tasks will be finished + by the time GOMP_taskgroup_end is called. */ + if (team == NULL) + return; + taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup)); + taskgroup->prev = task->taskgroup; +- taskgroup->children = NULL; ++ priority_queue_init (&taskgroup->taskgroup_queue); + taskgroup->in_taskgroup_wait = false; + taskgroup->cancelled = false; + taskgroup->num_children = 0; +@@ -1098,6 +1691,17 @@ GOMP_taskgroup_end (void) + if (team == NULL) + return; + taskgroup = task->taskgroup; ++ if (__builtin_expect (taskgroup == NULL, 0) ++ && thr->ts.level == 0) ++ { ++ /* This can happen if GOMP_taskgroup_start is called when ++ thr->ts.team == NULL, but inside of the taskgroup there ++ is #pragma omp target nowait that creates an implicit ++ team with a single thread. In this case, we want to wait ++ for all outstanding tasks in this team. */ ++ gomp_team_barrier_wait (&team->barrier); ++ return; ++ } + + /* The acquire barrier on load of taskgroup->num_children here + synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup. +@@ -1108,19 +1712,25 @@ GOMP_taskgroup_end (void) + if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0) + goto finish; + ++ bool unused; + gomp_mutex_lock (&team->task_lock); + while (1) + { + bool cancelled = false; +- if (taskgroup->children == NULL) ++ if (priority_queue_empty_p (&taskgroup->taskgroup_queue, ++ MEMMODEL_RELAXED)) + { + if (taskgroup->num_children) + { +- if (task->children == NULL) ++ if (priority_queue_empty_p (&task->children_queue, ++ MEMMODEL_RELAXED)) + goto do_wait; +- child_task = task->children; +- } +- else ++ child_task ++ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, ++ PQ_TEAM, &team->task_queue, ++ &unused); ++ } ++ else + { + gomp_mutex_unlock (&team->task_lock); + if (to_free) +@@ -1132,12 +1742,13 @@ GOMP_taskgroup_end (void) + } + } + else +- child_task = taskgroup->children; ++ child_task ++ = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ PQ_TEAM, &team->task_queue, &unused); + if (child_task->kind == GOMP_TASK_WAITING) + { + cancelled +- = gomp_task_run_pre (child_task, child_task->parent, taskgroup, +- team); ++ = gomp_task_run_pre (child_task, child_task->parent, team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -1153,8 +1764,10 @@ GOMP_taskgroup_end (void) + { + child_task = NULL; + do_wait: +- /* All tasks we are waiting for are already running +- in other threads. Wait for them. */ ++ /* All tasks we are waiting for are either running in other ++ threads, or they are tasks that have not had their ++ dependencies met (so they're not even in the queue). Wait ++ for them. */ + taskgroup->in_taskgroup_wait = true; + } + gomp_mutex_unlock (&team->task_lock); +@@ -1172,7 +1785,28 @@ GOMP_taskgroup_end (void) + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -1184,7 +1818,7 @@ GOMP_taskgroup_end (void) + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); + gomp_task_run_post_remove_parent (child_task); +- gomp_clear_parent (child_task->children); ++ gomp_clear_parent (&child_task->children_queue); + gomp_task_run_post_remove_taskgroup (child_task); + to_free = child_task; + child_task = NULL; +--- libgomp/libgomp_g.h.jj 2014-05-15 10:56:31.429532978 +0200 ++++ libgomp/libgomp_g.h 2016-07-13 16:57:04.422535521 +0200 +@@ -29,6 +29,7 @@ + #define LIBGOMP_G_H 1 + + #include ++#include + + /* barrier.c */ + +@@ -50,6 +51,10 @@ extern bool GOMP_loop_static_start (long + extern bool GOMP_loop_dynamic_start (long, long, long, long, long *, long *); + extern bool GOMP_loop_guided_start (long, long, long, long, long *, long *); + extern bool GOMP_loop_runtime_start (long, long, long, long *, long *); ++extern bool GOMP_loop_nonmonotonic_dynamic_start (long, long, long, long, ++ long *, long *); ++extern bool GOMP_loop_nonmonotonic_guided_start (long, long, long, long, ++ long *, long *); + + extern bool GOMP_loop_ordered_static_start (long, long, long, long, + long *, long *); +@@ -63,12 +68,23 @@ extern bool GOMP_loop_static_next (long + extern bool GOMP_loop_dynamic_next (long *, long *); + extern bool GOMP_loop_guided_next (long *, long *); + extern bool GOMP_loop_runtime_next (long *, long *); ++extern bool GOMP_loop_nonmonotonic_dynamic_next (long *, long *); ++extern bool GOMP_loop_nonmonotonic_guided_next (long *, long *); + + extern bool GOMP_loop_ordered_static_next (long *, long *); + extern bool GOMP_loop_ordered_dynamic_next (long *, long *); + extern bool GOMP_loop_ordered_guided_next (long *, long *); + extern bool GOMP_loop_ordered_runtime_next (long *, long *); + ++extern bool GOMP_loop_doacross_static_start (unsigned, long *, long, long *, ++ long *); ++extern bool GOMP_loop_doacross_dynamic_start (unsigned, long *, long, long *, ++ long *); ++extern bool GOMP_loop_doacross_guided_start (unsigned, long *, long, long *, ++ long *); ++extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *, ++ long *); ++ + extern void GOMP_parallel_loop_static_start (void (*)(void *), void *, + unsigned, long, long, long, long); + extern void GOMP_parallel_loop_dynamic_start (void (*)(void *), void *, +@@ -89,6 +105,12 @@ extern void GOMP_parallel_loop_guided (v + extern void GOMP_parallel_loop_runtime (void (*)(void *), void *, + unsigned, long, long, long, + unsigned); ++extern void GOMP_parallel_loop_nonmonotonic_dynamic (void (*)(void *), void *, ++ unsigned, long, long, ++ long, long, unsigned); ++extern void GOMP_parallel_loop_nonmonotonic_guided (void (*)(void *), void *, ++ unsigned, long, long, ++ long, long, unsigned); + + extern void GOMP_loop_end (void); + extern void GOMP_loop_end_nowait (void); +@@ -119,6 +141,18 @@ extern bool GOMP_loop_ull_runtime_start + unsigned long long, + unsigned long long *, + unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_dynamic_start (bool, unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_guided_start (bool, unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); + + extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long, + unsigned long long, +@@ -152,6 +186,10 @@ extern bool GOMP_loop_ull_guided_next (u + unsigned long long *); + extern bool GOMP_loop_ull_runtime_next (unsigned long long *, + unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_dynamic_next (unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_guided_next (unsigned long long *, ++ unsigned long long *); + + extern bool GOMP_loop_ull_ordered_static_next (unsigned long long *, + unsigned long long *); +@@ -162,10 +200,34 @@ extern bool GOMP_loop_ull_ordered_guided + extern bool GOMP_loop_ull_ordered_runtime_next (unsigned long long *, + unsigned long long *); + ++extern bool GOMP_loop_ull_doacross_static_start (unsigned, ++ unsigned long long *, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_doacross_dynamic_start (unsigned, ++ unsigned long long *, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_doacross_guided_start (unsigned, ++ unsigned long long *, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_doacross_runtime_start (unsigned, ++ unsigned long long *, ++ unsigned long long *, ++ unsigned long long *); ++ + /* ordered.c */ + + extern void GOMP_ordered_start (void); + extern void GOMP_ordered_end (void); ++extern void GOMP_doacross_post (long *); ++extern void GOMP_doacross_wait (long, ...); ++extern void GOMP_doacross_ull_post (unsigned long long *); ++extern void GOMP_doacross_ull_wait (unsigned long long, ...); + + /* parallel.c */ + +@@ -178,7 +240,15 @@ extern bool GOMP_cancellation_point (int + /* task.c */ + + extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *), +- long, long, bool, unsigned, void **); ++ long, long, bool, unsigned, void **, int); ++extern void GOMP_taskloop (void (*) (void *), void *, ++ void (*) (void *, void *), long, long, unsigned, ++ unsigned long, int, long, long, long); ++extern void GOMP_taskloop_ull (void (*) (void *), void *, ++ void (*) (void *, void *), long, long, ++ unsigned, unsigned long, int, ++ unsigned long long, unsigned long long, ++ unsigned long long); + extern void GOMP_taskwait (void); + extern void GOMP_taskyield (void); + extern void GOMP_taskgroup_start (void); +@@ -206,11 +276,38 @@ extern void GOMP_single_copy_end (void * + + extern void GOMP_target (int, void (*) (void *), const void *, + size_t, void **, size_t *, unsigned char *); ++extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *, ++ unsigned short *, unsigned int, void **, void **); + extern void GOMP_target_data (int, const void *, + size_t, void **, size_t *, unsigned char *); ++extern void GOMP_target_data_ext (int, size_t, void **, size_t *, ++ unsigned short *); + extern void GOMP_target_end_data (void); + extern void GOMP_target_update (int, const void *, + size_t, void **, size_t *, unsigned char *); ++extern void GOMP_target_update_ext (int, size_t, void **, size_t *, ++ unsigned short *, unsigned int, void **); ++extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *, ++ unsigned short *, unsigned int, ++ void **); + extern void GOMP_teams (unsigned int, unsigned int); + ++/* oacc-parallel.c */ ++ ++extern void GOACC_parallel_keyed (int, void (*) (void *), size_t, ++ void **, size_t *, unsigned short *, ...); ++extern void GOACC_parallel (int, void (*) (void *), size_t, void **, size_t *, ++ unsigned short *, int, int, int, int, int, ...); ++extern void GOACC_data_start (int, size_t, void **, size_t *, ++ unsigned short *); ++extern void GOACC_data_end (void); ++extern void GOACC_enter_exit_data (int, size_t, void **, ++ size_t *, unsigned short *, int, int, ...); ++extern void GOACC_update (int, size_t, void **, size_t *, ++ unsigned short *, int, int, ...); ++extern void GOACC_wait (int, int, ...); ++extern int GOACC_get_num_threads (void); ++extern int GOACC_get_thread_num (void); ++extern void GOACC_declare (int, size_t, void **, size_t *, unsigned short *); ++ + #endif /* LIBGOMP_G_H */ +--- libgomp/libgomp.h.jj 2014-08-01 15:59:49.145188127 +0200 ++++ libgomp/libgomp.h 2016-07-14 17:40:24.038243456 +0200 +@@ -34,12 +34,35 @@ + #ifndef LIBGOMP_H + #define LIBGOMP_H 1 + ++#ifndef _LIBGOMP_CHECKING_ ++/* Define to 1 to perform internal sanity checks. */ ++#define _LIBGOMP_CHECKING_ 0 ++#endif ++ + #include "config.h" + #include "gstdint.h" ++#include "libgomp-plugin.h" + + #include + #include + #include ++#include ++ ++/* Needed for memset in priority_queue.c. */ ++#if _LIBGOMP_CHECKING_ ++# ifdef STRING_WITH_STRINGS ++# include ++# include ++# else ++# ifdef HAVE_STRING_H ++# include ++# else ++# ifdef HAVE_STRINGS_H ++# include ++# endif ++# endif ++# endif ++#endif + + #ifdef HAVE_ATTRIBUTE_VISIBILITY + # pragma GCC visibility push(hidden) +@@ -56,6 +79,44 @@ enum memmodel + MEMMODEL_SEQ_CST = 5 + }; + ++/* alloc.c */ ++ ++extern void *gomp_malloc (size_t) __attribute__((malloc)); ++extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); ++extern void *gomp_realloc (void *, size_t); ++ ++/* Avoid conflicting prototypes of alloca() in system headers by using ++ GCC's builtin alloca(). */ ++#define gomp_alloca(x) __builtin_alloca(x) ++ ++/* error.c */ ++ ++extern void gomp_vdebug (int, const char *, va_list); ++extern void gomp_debug (int, const char *, ...) ++ __attribute__ ((format (printf, 2, 3))); ++#define gomp_vdebug(KIND, FMT, VALIST) \ ++ do { \ ++ if (__builtin_expect (gomp_debug_var, 0)) \ ++ (gomp_vdebug) ((KIND), (FMT), (VALIST)); \ ++ } while (0) ++#define gomp_debug(KIND, ...) \ ++ do { \ ++ if (__builtin_expect (gomp_debug_var, 0)) \ ++ (gomp_debug) ((KIND), __VA_ARGS__); \ ++ } while (0) ++extern void gomp_verror (const char *, va_list); ++extern void gomp_error (const char *, ...) ++ __attribute__ ((format (printf, 1, 2))); ++extern void gomp_vfatal (const char *, va_list) ++ __attribute__ ((noreturn)); ++extern void gomp_fatal (const char *, ...) ++ __attribute__ ((noreturn, format (printf, 1, 2))); ++ ++struct gomp_task; ++struct gomp_taskgroup; ++struct htab; ++ ++#include "priority_queue.h" + #include "sem.h" + #include "mutex.h" + #include "bar.h" +@@ -74,6 +135,44 @@ enum gomp_schedule_type + GFS_AUTO + }; + ++struct gomp_doacross_work_share ++{ ++ union { ++ /* chunk_size copy, as ws->chunk_size is multiplied by incr for ++ GFS_DYNAMIC. */ ++ long chunk_size; ++ /* Likewise, but for ull implementation. */ ++ unsigned long long chunk_size_ull; ++ /* For schedule(static,0) this is the number ++ of iterations assigned to the last thread, i.e. number of ++ iterations / number of threads. */ ++ long q; ++ /* Likewise, but for ull implementation. */ ++ unsigned long long q_ull; ++ }; ++ /* Size of each array entry (padded to cache line size). */ ++ unsigned long elt_sz; ++ /* Number of dimensions in sink vectors. */ ++ unsigned int ncounts; ++ /* True if the iterations can be flattened. */ ++ bool flattened; ++ /* Actual array (of elt_sz sized units), aligned to cache line size. ++ This is indexed by team_id for GFS_STATIC and outermost iteration ++ / chunk_size for other schedules. */ ++ unsigned char *array; ++ /* These two are only used for schedule(static,0). */ ++ /* This one is number of iterations % number of threads. */ ++ long t; ++ union { ++ /* And this one is cached t * (q + 1). */ ++ long boundary; ++ /* Likewise, but for the ull implementation. */ ++ unsigned long long boundary_ull; ++ }; ++ /* Array of shift counts for each dimension if they can be flattened. */ ++ unsigned int shift_counts[]; ++}; ++ + struct gomp_work_share + { + /* This member records the SCHEDULE clause to be used for this construct. +@@ -105,13 +204,18 @@ struct gomp_work_share + }; + }; + +- /* This is a circular queue that details which threads will be allowed +- into the ordered region and in which order. When a thread allocates +- iterations on which it is going to work, it also registers itself at +- the end of the array. When a thread reaches the ordered region, it +- checks to see if it is the one at the head of the queue. If not, it +- blocks on its RELEASE semaphore. */ +- unsigned *ordered_team_ids; ++ union { ++ /* This is a circular queue that details which threads will be allowed ++ into the ordered region and in which order. When a thread allocates ++ iterations on which it is going to work, it also registers itself at ++ the end of the array. When a thread reaches the ordered region, it ++ checks to see if it is the one at the head of the queue. If not, it ++ blocks on its RELEASE semaphore. */ ++ unsigned *ordered_team_ids; ++ ++ /* This is a pointer to DOACROSS work share data. */ ++ struct gomp_doacross_work_share *doacross; ++ }; + + /* This is the number of threads that have registered themselves in + the circular queue ordered_team_ids. */ +@@ -230,7 +334,7 @@ struct gomp_task_icv + { + unsigned long nthreads_var; + enum gomp_schedule_type run_sched_var; +- int run_sched_modifier; ++ int run_sched_chunk_size; + int default_device_var; + unsigned int thread_limit_var; + bool dyn_var; +@@ -246,6 +350,7 @@ extern gomp_mutex_t gomp_managed_threads + #endif + extern unsigned long gomp_max_active_levels_var; + extern bool gomp_cancel_var; ++extern int gomp_max_task_priority_var; + extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; + extern unsigned long gomp_available_cpus, gomp_managed_threads; + extern unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len; +@@ -253,25 +358,36 @@ extern char *gomp_bind_var_list; + extern unsigned long gomp_bind_var_list_len; + extern void **gomp_places_list; + extern unsigned long gomp_places_list_len; ++extern int gomp_debug_var; ++extern int goacc_device_num; ++extern char *goacc_device_type; + + enum gomp_task_kind + { ++ /* Implicit task. */ + GOMP_TASK_IMPLICIT, +- GOMP_TASK_IFFALSE, ++ /* Undeferred task. */ ++ GOMP_TASK_UNDEFERRED, ++ /* Task created by GOMP_task and waiting to be run. */ + GOMP_TASK_WAITING, +- GOMP_TASK_TIED ++ /* Task currently executing or scheduled and about to execute. */ ++ GOMP_TASK_TIED, ++ /* Used for target tasks that have vars mapped and async run started, ++ but not yet completed. Once that completes, they will be readded ++ into the queues as GOMP_TASK_WAITING in order to perform the var ++ unmapping. */ ++ GOMP_TASK_ASYNC_RUNNING + }; + +-struct gomp_task; +-struct gomp_taskgroup; +-struct htab; +- + struct gomp_task_depend_entry + { ++ /* Address of dependency. */ + void *addr; + struct gomp_task_depend_entry *next; + struct gomp_task_depend_entry *prev; ++ /* Task that provides the dependency in ADDR. */ + struct gomp_task *task; ++ /* Depend entry is of type "IN". */ + bool is_in; + bool redundant; + bool redundant_out; +@@ -290,8 +406,8 @@ struct gomp_taskwait + { + bool in_taskwait; + bool in_depend_wait; ++ /* Number of tasks we are waiting for. */ + size_t n_depend; +- struct gomp_task *last_parent_depends_on; + gomp_sem_t taskwait_sem; + }; + +@@ -299,20 +415,31 @@ struct gomp_taskwait + + struct gomp_task + { ++ /* Parent of this task. */ + struct gomp_task *parent; +- struct gomp_task *children; +- struct gomp_task *next_child; +- struct gomp_task *prev_child; +- struct gomp_task *next_queue; +- struct gomp_task *prev_queue; +- struct gomp_task *next_taskgroup; +- struct gomp_task *prev_taskgroup; ++ /* Children of this task. */ ++ struct priority_queue children_queue; ++ /* Taskgroup this task belongs in. */ + struct gomp_taskgroup *taskgroup; ++ /* Tasks that depend on this task. */ + struct gomp_dependers_vec *dependers; + struct htab *depend_hash; + struct gomp_taskwait *taskwait; ++ /* Number of items in DEPEND. */ + size_t depend_count; ++ /* Number of tasks this task depends on. Once this counter reaches ++ 0, we have no unsatisfied dependencies, and this task can be put ++ into the various queues to be scheduled. */ + size_t num_dependees; ++ ++ /* Priority of this task. */ ++ int priority; ++ /* The priority node for this task in each of the different queues. ++ We put this here to avoid allocating space for each priority ++ node. Then we play offsetof() games to convert between pnode[] ++ entries and the gomp_task in which they reside. */ ++ struct priority_node pnode[3]; ++ + struct gomp_task_icv icv; + void (*fn) (void *); + void *fn_data; +@@ -320,20 +447,58 @@ struct gomp_task + bool in_tied_task; + bool final_task; + bool copy_ctors_done; ++ /* Set for undeferred tasks with unsatisfied dependencies which ++ block further execution of their parent until the dependencies ++ are satisfied. */ + bool parent_depends_on; ++ /* Dependencies provided and/or needed for this task. DEPEND_COUNT ++ is the number of items available. */ + struct gomp_task_depend_entry depend[]; + }; + ++/* This structure describes a single #pragma omp taskgroup. */ ++ + struct gomp_taskgroup + { + struct gomp_taskgroup *prev; +- struct gomp_task *children; ++ /* Queue of tasks that belong in this taskgroup. */ ++ struct priority_queue taskgroup_queue; + bool in_taskgroup_wait; + bool cancelled; + gomp_sem_t taskgroup_sem; + size_t num_children; + }; + ++/* Various state of OpenMP async offloading tasks. */ ++enum gomp_target_task_state ++{ ++ GOMP_TARGET_TASK_DATA, ++ GOMP_TARGET_TASK_BEFORE_MAP, ++ GOMP_TARGET_TASK_FALLBACK, ++ GOMP_TARGET_TASK_READY_TO_RUN, ++ GOMP_TARGET_TASK_RUNNING, ++ GOMP_TARGET_TASK_FINISHED ++}; ++ ++/* This structure describes a target task. */ ++ ++struct gomp_target_task ++{ ++ struct gomp_device_descr *devicep; ++ void (*fn) (void *); ++ size_t mapnum; ++ size_t *sizes; ++ unsigned short *kinds; ++ unsigned int flags; ++ enum gomp_target_task_state state; ++ struct target_mem_desc *tgt; ++ struct gomp_task *task; ++ struct gomp_team *team; ++ /* Device-specific target arguments. */ ++ void **args; ++ void *hostaddrs[]; ++}; ++ + /* This structure describes a "team" of threads. These are the threads + that are spawned by a PARALLEL constructs, as well as the work sharing + constructs that the team encounters. */ +@@ -396,7 +561,8 @@ struct gomp_team + struct gomp_work_share work_shares[8]; + + gomp_mutex_t task_lock; +- struct gomp_task *task_queue; ++ /* Scheduled tasks. */ ++ struct priority_queue task_queue; + /* Number of all GOMP_TASK_{WAITING,TIED} tasks in the team. */ + unsigned int task_count; + /* Number of GOMP_TASK_WAITING tasks currently waiting to be scheduled. */ +@@ -451,6 +617,9 @@ struct gomp_thread_pool + struct gomp_thread **threads; + unsigned threads_size; + unsigned threads_used; ++ /* The last team is used for non-nested teams to delay their destruction to ++ make sure all the threads in the team move on to the pool's barrier before ++ the team's barrier is destroyed. */ + struct gomp_team *last_team; + /* Number of threads running in this contention group. */ + unsigned long threads_busy; +@@ -519,23 +688,7 @@ extern bool gomp_affinity_same_place (vo + extern bool gomp_affinity_finalize_place_list (bool); + extern bool gomp_affinity_init_level (int, unsigned long, bool); + extern void gomp_affinity_print_place (void *); +- +-/* alloc.c */ +- +-extern void *gomp_malloc (size_t) __attribute__((malloc)); +-extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); +-extern void *gomp_realloc (void *, size_t); +- +-/* Avoid conflicting prototypes of alloca() in system headers by using +- GCC's builtin alloca(). */ +-#define gomp_alloca(x) __builtin_alloca(x) +- +-/* error.c */ +- +-extern void gomp_error (const char *, ...) +- __attribute__((format (printf, 1, 2))); +-extern void gomp_fatal (const char *, ...) +- __attribute__((noreturn, format (printf, 1, 2))); ++extern void gomp_get_place_proc_ids_8 (int, int64_t *); + + /* iter.c */ + +@@ -572,6 +725,9 @@ extern void gomp_ordered_next (void); + extern void gomp_ordered_static_init (void); + extern void gomp_ordered_static_next (void); + extern void gomp_ordered_sync (void); ++extern void gomp_doacross_init (unsigned, long *, long); ++extern void gomp_doacross_ull_init (unsigned, unsigned long long *, ++ unsigned long long); + + /* parallel.c */ + +@@ -588,6 +744,12 @@ extern void gomp_init_task (struct gomp_ + struct gomp_task_icv *); + extern void gomp_end_task (void); + extern void gomp_barrier_handle_tasks (gomp_barrier_state_t); ++extern void gomp_task_maybe_wait_for_dependencies (void **); ++extern bool gomp_create_target_task (struct gomp_device_descr *, ++ void (*) (void *), size_t, void **, ++ size_t *, unsigned short *, unsigned int, ++ void **, void **, ++ enum gomp_target_task_state); + + static void inline + gomp_finish_task (struct gomp_task *task) +@@ -606,7 +768,213 @@ extern void gomp_free_thread (void *); + + /* target.c */ + ++extern void gomp_init_targets_once (void); + extern int gomp_get_num_devices (void); ++extern bool gomp_target_task_fn (void *); ++ ++/* Splay tree definitions. */ ++typedef struct splay_tree_node_s *splay_tree_node; ++typedef struct splay_tree_s *splay_tree; ++typedef struct splay_tree_key_s *splay_tree_key; ++ ++struct target_var_desc { ++ /* Splay key. */ ++ splay_tree_key key; ++ /* True if data should be copied from device to host at the end. */ ++ bool copy_from; ++ /* True if data always should be copied from device to host at the end. */ ++ bool always_copy_from; ++ /* Relative offset against key host_start. */ ++ uintptr_t offset; ++ /* Actual length. */ ++ uintptr_t length; ++}; ++ ++struct target_mem_desc { ++ /* Reference count. */ ++ uintptr_t refcount; ++ /* All the splay nodes allocated together. */ ++ splay_tree_node array; ++ /* Start of the target region. */ ++ uintptr_t tgt_start; ++ /* End of the targer region. */ ++ uintptr_t tgt_end; ++ /* Handle to free. */ ++ void *to_free; ++ /* Previous target_mem_desc. */ ++ struct target_mem_desc *prev; ++ /* Number of items in following list. */ ++ size_t list_count; ++ ++ /* Corresponding target device descriptor. */ ++ struct gomp_device_descr *device_descr; ++ ++ /* List of target items to remove (or decrease refcount) ++ at the end of region. */ ++ struct target_var_desc list[]; ++}; ++ ++/* Special value for refcount - infinity. */ ++#define REFCOUNT_INFINITY (~(uintptr_t) 0) ++/* Special value for refcount - tgt_offset contains target address of the ++ artificial pointer to "omp declare target link" object. */ ++#define REFCOUNT_LINK (~(uintptr_t) 1) ++ ++struct splay_tree_key_s { ++ /* Address of the host object. */ ++ uintptr_t host_start; ++ /* Address immediately after the host object. */ ++ uintptr_t host_end; ++ /* Descriptor of the target memory. */ ++ struct target_mem_desc *tgt; ++ /* Offset from tgt->tgt_start to the start of the target object. */ ++ uintptr_t tgt_offset; ++ /* Reference count. */ ++ uintptr_t refcount; ++ /* Pointer to the original mapping of "omp declare target link" object. */ ++ splay_tree_key link_key; ++}; ++ ++/* The comparison function. */ ++ ++static inline int ++splay_compare (splay_tree_key x, splay_tree_key y) ++{ ++ if (x->host_start == x->host_end ++ && y->host_start == y->host_end) ++ return 0; ++ if (x->host_end <= y->host_start) ++ return -1; ++ if (x->host_start >= y->host_end) ++ return 1; ++ return 0; ++} ++ ++#include "splay-tree.h" ++ ++typedef struct acc_dispatch_t ++{ ++ /* This is a linked list of data mapped using the ++ acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas. ++ Unlike mapped_data in the goacc_thread struct, unmapping can ++ happen out-of-order with respect to mapping. */ ++ /* This is guarded by the lock in the "outer" struct gomp_device_descr. */ ++ struct target_mem_desc *data_environ; ++ ++ /* Execute. */ ++ void (*exec_func) (void (*) (void *), size_t, void **, void **, int, ++ unsigned *, void *); ++ ++ /* Async cleanup callback registration. */ ++ void (*register_async_cleanup_func) (void *, int); ++ ++ /* Asynchronous routines. */ ++ int (*async_test_func) (int); ++ int (*async_test_all_func) (void); ++ void (*async_wait_func) (int); ++ void (*async_wait_async_func) (int, int); ++ void (*async_wait_all_func) (void); ++ void (*async_wait_all_async_func) (int); ++ void (*async_set_async_func) (int); ++ ++ /* Create/destroy TLS data. */ ++ void *(*create_thread_data_func) (int); ++ void (*destroy_thread_data_func) (void *); ++ ++ /* NVIDIA target specific routines. */ ++ struct { ++ void *(*get_current_device_func) (void); ++ void *(*get_current_context_func) (void); ++ void *(*get_stream_func) (int); ++ int (*set_stream_func) (int, void *); ++ } cuda; ++} acc_dispatch_t; ++ ++/* Various state of the accelerator device. */ ++enum gomp_device_state ++{ ++ GOMP_DEVICE_UNINITIALIZED, ++ GOMP_DEVICE_INITIALIZED, ++ GOMP_DEVICE_FINALIZED ++}; ++ ++/* This structure describes accelerator device. ++ It contains name of the corresponding libgomp plugin, function handlers for ++ interaction with the device, ID-number of the device, and information about ++ mapped memory. */ ++struct gomp_device_descr ++{ ++ /* Immutable data, which is only set during initialization, and which is not ++ guarded by the lock. */ ++ ++ /* The name of the device. */ ++ const char *name; ++ ++ /* Capabilities of device (supports OpenACC, OpenMP). */ ++ unsigned int capabilities; ++ ++ /* This is the ID number of device among devices of the same type. */ ++ int target_id; ++ ++ /* This is the TYPE of device. */ ++ enum offload_target_type type; ++ ++ /* Function handlers. */ ++ const char *(*get_name_func) (void); ++ unsigned int (*get_caps_func) (void); ++ int (*get_type_func) (void); ++ int (*get_num_devices_func) (void); ++ bool (*init_device_func) (int); ++ bool (*fini_device_func) (int); ++ unsigned (*version_func) (void); ++ int (*load_image_func) (int, unsigned, const void *, struct addr_pair **); ++ bool (*unload_image_func) (int, unsigned, const void *); ++ void *(*alloc_func) (int, size_t); ++ bool (*free_func) (int, void *); ++ bool (*dev2host_func) (int, void *, const void *, size_t); ++ bool (*host2dev_func) (int, void *, const void *, size_t); ++ bool (*dev2dev_func) (int, void *, const void *, size_t); ++ bool (*can_run_func) (void *); ++ void (*run_func) (int, void *, void *, void **); ++ void (*async_run_func) (int, void *, void *, void **, void *); ++ ++ /* Splay tree containing information about mapped memory regions. */ ++ struct splay_tree_s mem_map; ++ ++ /* Mutex for the mutable data. */ ++ gomp_mutex_t lock; ++ ++ /* Current state of the device. OpenACC allows to move from INITIALIZED state ++ back to UNINITIALIZED state. OpenMP allows only to move from INITIALIZED ++ to FINALIZED state (at program shutdown). */ ++ enum gomp_device_state state; ++ ++ /* OpenACC-specific data and functions. */ ++ /* This is mutable because of its mutable data_environ and target_data ++ members. */ ++ acc_dispatch_t openacc; ++}; ++ ++/* Kind of the pragma, for which gomp_map_vars () is called. */ ++enum gomp_map_vars_kind ++{ ++ GOMP_MAP_VARS_OPENACC, ++ GOMP_MAP_VARS_TARGET, ++ GOMP_MAP_VARS_DATA, ++ GOMP_MAP_VARS_ENTER_DATA ++}; ++ ++extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *); ++extern void gomp_acc_remove_pointer (void *, bool, int, int); ++ ++extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, ++ size_t, void **, void **, ++ size_t *, void *, bool, ++ enum gomp_map_vars_kind); ++extern void gomp_unmap_vars (struct target_mem_desc *, bool); ++extern void gomp_init_device (struct gomp_device_descr *); ++extern void gomp_free_memmap (struct splay_tree_s *); ++extern void gomp_unload_device (struct gomp_device_descr *); + + /* work.c */ + +@@ -646,8 +1014,28 @@ typedef enum omp_proc_bind_t + omp_proc_bind_spread = 4 + } omp_proc_bind_t; + ++typedef enum omp_lock_hint_t ++{ ++ omp_lock_hint_none = 0, ++ omp_lock_hint_uncontended = 1, ++ omp_lock_hint_contended = 2, ++ omp_lock_hint_nonspeculative = 4, ++ omp_lock_hint_speculative = 8, ++} omp_lock_hint_t; ++ ++extern void omp_init_lock_with_hint (omp_lock_t *, omp_lock_hint_t) ++ __GOMP_NOTHROW; ++extern void omp_init_nest_lock_with_hint (omp_lock_t *, omp_lock_hint_t) ++ __GOMP_NOTHROW; ++ + extern int omp_get_cancellation (void) __GOMP_NOTHROW; + extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW; ++extern int omp_get_num_places (void) __GOMP_NOTHROW; ++extern int omp_get_place_num_procs (int) __GOMP_NOTHROW; ++extern void omp_get_place_proc_ids (int, int *) __GOMP_NOTHROW; ++extern int omp_get_place_num (void) __GOMP_NOTHROW; ++extern int omp_get_partition_num_places (void) __GOMP_NOTHROW; ++extern void omp_get_partition_place_nums (int *) __GOMP_NOTHROW; + + extern void omp_set_default_device (int) __GOMP_NOTHROW; + extern int omp_get_default_device (void) __GOMP_NOTHROW; +@@ -656,6 +1044,24 @@ extern int omp_get_num_teams (void) __GO + extern int omp_get_team_num (void) __GOMP_NOTHROW; + + extern int omp_is_initial_device (void) __GOMP_NOTHROW; ++extern int omp_get_initial_device (void) __GOMP_NOTHROW; ++extern int omp_get_max_task_priority (void) __GOMP_NOTHROW; ++ ++extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW; ++extern void omp_target_free (void *, int) __GOMP_NOTHROW; ++extern int omp_target_is_present (void *, int) __GOMP_NOTHROW; ++extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__, ++ __SIZE_TYPE__, int, int) __GOMP_NOTHROW; ++extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, int, int) ++ __GOMP_NOTHROW; ++extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__, ++ __SIZE_TYPE__, int) __GOMP_NOTHROW; ++extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW; + + #if !defined (HAVE_ATTRIBUTE_VISIBILITY) \ + || !defined (HAVE_ATTRIBUTE_ALIAS) \ +@@ -728,4 +1134,34 @@ extern int gomp_test_nest_lock_25 (omp_n + # define ialias_call(fn) fn + #endif + ++/* Helper function for priority_node_to_task() and ++ task_to_priority_node(). ++ ++ Return the offset from a task to its priority_node entry. The ++ priority_node entry is has a type of TYPE. */ ++ ++static inline size_t ++priority_queue_offset (enum priority_queue_type type) ++{ ++ return offsetof (struct gomp_task, pnode[(int) type]); ++} ++ ++/* Return the task associated with a priority NODE of type TYPE. */ ++ ++static inline struct gomp_task * ++priority_node_to_task (enum priority_queue_type type, ++ struct priority_node *node) ++{ ++ return (struct gomp_task *) ((char *) node - priority_queue_offset (type)); ++} ++ ++/* Return the priority node of type TYPE for a given TASK. */ ++ ++static inline struct priority_node * ++task_to_priority_node (enum priority_queue_type type, ++ struct gomp_task *task) ++{ ++ return (struct priority_node *) ((char *) task ++ + priority_queue_offset (type)); ++} + #endif /* LIBGOMP_H */ +--- libgomp/env.c.jj 2014-05-15 10:56:32.420522486 +0200 ++++ libgomp/env.c 2016-07-13 16:57:04.437535335 +0200 +@@ -27,6 +27,8 @@ + + #include "libgomp.h" + #include "libgomp_f.h" ++#include "oacc-int.h" ++#include "gomp-constants.h" + #include + #include + #include +@@ -56,7 +58,7 @@ struct gomp_task_icv gomp_global_icv = { + .nthreads_var = 1, + .thread_limit_var = UINT_MAX, + .run_sched_var = GFS_DYNAMIC, +- .run_sched_modifier = 1, ++ .run_sched_chunk_size = 1, + .default_device_var = 0, + .dyn_var = false, + .nest_var = false, +@@ -66,6 +68,7 @@ struct gomp_task_icv gomp_global_icv = { + + unsigned long gomp_max_active_levels_var = INT_MAX; + bool gomp_cancel_var = false; ++int gomp_max_task_priority_var = 0; + #ifndef HAVE_SYNC_BUILTINS + gomp_mutex_t gomp_managed_threads_lock; + #endif +@@ -76,6 +79,9 @@ char *gomp_bind_var_list; + unsigned long gomp_bind_var_list_len; + void **gomp_places_list; + unsigned long gomp_places_list_len; ++int gomp_debug_var; ++char *goacc_device_type; ++int goacc_device_num; + + /* Parse the OMP_SCHEDULE environment variable. */ + +@@ -118,7 +124,7 @@ parse_schedule (void) + ++env; + if (*env == '\0') + { +- gomp_global_icv.run_sched_modifier ++ gomp_global_icv.run_sched_chunk_size + = gomp_global_icv.run_sched_var != GFS_STATIC; + return; + } +@@ -144,7 +150,7 @@ parse_schedule (void) + + if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC) + value = 1; +- gomp_global_icv.run_sched_modifier = value; ++ gomp_global_icv.run_sched_chunk_size = value; + return; + + unknown: +@@ -1011,6 +1017,16 @@ parse_affinity (bool ignore) + return false; + } + ++static void ++parse_acc_device_type (void) ++{ ++ const char *env = getenv ("ACC_DEVICE_TYPE"); ++ ++ if (env && *env != '\0') ++ goacc_device_type = strdup (env); ++ else ++ goacc_device_type = NULL; ++} + + static void + handle_omp_display_env (unsigned long stacksize, int wait_policy) +@@ -1054,7 +1070,7 @@ handle_omp_display_env (unsigned long st + + fputs ("\nOPENMP DISPLAY ENVIRONMENT BEGIN\n", stderr); + +- fputs (" _OPENMP = '201307'\n", stderr); ++ fputs (" _OPENMP = '201511'\n", stderr); + fprintf (stderr, " OMP_DYNAMIC = '%s'\n", + gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); + fprintf (stderr, " OMP_NESTED = '%s'\n", +@@ -1142,6 +1158,8 @@ handle_omp_display_env (unsigned long st + gomp_cancel_var ? "TRUE" : "FALSE"); + fprintf (stderr, " OMP_DEFAULT_DEVICE = '%d'\n", + gomp_global_icv.default_device_var); ++ fprintf (stderr, " OMP_MAX_TASK_PRIORITY = '%d'\n", ++ gomp_max_task_priority_var); + + if (verbose) + { +@@ -1174,6 +1192,7 @@ initialize_env (void) + parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); + parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); + parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); ++ parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); + parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, + true); + if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) +@@ -1181,6 +1200,7 @@ initialize_env (void) + gomp_global_icv.thread_limit_var + = thread_limit_var > INT_MAX ? UINT_MAX : thread_limit_var; + } ++ parse_int ("GOMP_DEBUG", &gomp_debug_var, true); + #ifndef HAVE_SYNC_BUILTINS + gomp_mutex_init (&gomp_managed_threads_lock); + #endif +@@ -1271,6 +1291,15 @@ initialize_env (void) + } + + handle_omp_display_env (stacksize, wait_policy); ++ ++ /* OpenACC. */ ++ ++ if (!parse_int ("ACC_DEVICE_NUM", &goacc_device_num, true)) ++ goacc_device_num = 0; ++ ++ parse_acc_device_type (); ++ ++ goacc_runtime_initialize (); + } + + +@@ -1312,21 +1341,21 @@ omp_get_nested (void) + } + + void +-omp_set_schedule (omp_sched_t kind, int modifier) ++omp_set_schedule (omp_sched_t kind, int chunk_size) + { + struct gomp_task_icv *icv = gomp_icv (true); + switch (kind) + { + case omp_sched_static: +- if (modifier < 1) +- modifier = 0; +- icv->run_sched_modifier = modifier; ++ if (chunk_size < 1) ++ chunk_size = 0; ++ icv->run_sched_chunk_size = chunk_size; + break; + case omp_sched_dynamic: + case omp_sched_guided: +- if (modifier < 1) +- modifier = 1; +- icv->run_sched_modifier = modifier; ++ if (chunk_size < 1) ++ chunk_size = 1; ++ icv->run_sched_chunk_size = chunk_size; + break; + case omp_sched_auto: + break; +@@ -1337,11 +1366,11 @@ omp_set_schedule (omp_sched_t kind, int + } + + void +-omp_get_schedule (omp_sched_t *kind, int *modifier) ++omp_get_schedule (omp_sched_t *kind, int *chunk_size) + { + struct gomp_task_icv *icv = gomp_icv (false); + *kind = icv->run_sched_var; +- *modifier = icv->run_sched_modifier; ++ *chunk_size = icv->run_sched_chunk_size; + } + + int +@@ -1377,6 +1406,12 @@ omp_get_cancellation (void) + return gomp_cancel_var; + } + ++int ++omp_get_max_task_priority (void) ++{ ++ return gomp_max_task_priority_var; ++} ++ + omp_proc_bind_t + omp_get_proc_bind (void) + { +@@ -1425,6 +1460,59 @@ omp_is_initial_device (void) + return 1; + } + ++int ++omp_get_initial_device (void) ++{ ++ return GOMP_DEVICE_HOST_FALLBACK; ++} ++ ++int ++omp_get_num_places (void) ++{ ++ return gomp_places_list_len; ++} ++ ++int ++omp_get_place_num (void) ++{ ++ if (gomp_places_list == NULL) ++ return -1; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ return (int) thr->place - 1; ++} ++ ++int ++omp_get_partition_num_places (void) ++{ ++ if (gomp_places_list == NULL) ++ return 0; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ return thr->ts.place_partition_len; ++} ++ ++void ++omp_get_partition_place_nums (int *place_nums) ++{ ++ if (gomp_places_list == NULL) ++ return; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ unsigned int i; ++ for (i = 0; i < thr->ts.place_partition_len; i++) ++ *place_nums++ = thr->ts.place_partition_off + i; ++} ++ + ialias (omp_set_dynamic) + ialias (omp_set_nested) + ialias (omp_set_num_threads) +@@ -1444,3 +1532,9 @@ ialias (omp_get_num_devices) + ialias (omp_get_num_teams) + ialias (omp_get_team_num) + ialias (omp_is_initial_device) ++ialias (omp_get_initial_device) ++ialias (omp_get_max_task_priority) ++ialias (omp_get_num_places) ++ialias (omp_get_place_num) ++ialias (omp_get_partition_num_places) ++ialias (omp_get_partition_place_nums) +--- libgomp/openacc.h.jj 2016-07-13 16:57:04.432535397 +0200 ++++ libgomp/openacc.h 2016-07-13 16:57:04.432535397 +0200 +@@ -0,0 +1,131 @@ ++/* OpenACC Runtime Library User-facing Declarations ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef _OPENACC_H ++#define _OPENACC_H 1 ++ ++/* The OpenACC standard is silent on whether or not including ++ might or must not include other header files. We chose to include ++ some. */ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#if __cplusplus >= 201103 ++# define __GOACC_NOTHROW noexcept ++#elif __cplusplus ++# define __GOACC_NOTHROW throw () ++#else /* Not C++ */ ++# define __GOACC_NOTHROW __attribute__ ((__nothrow__)) ++#endif ++ ++/* Types */ ++typedef enum acc_device_t { ++ /* Keep in sync with include/gomp-constants.h. */ ++ acc_device_none = 0, ++ acc_device_default = 1, ++ acc_device_host = 2, ++ /* acc_device_host_nonshm = 3 removed. */ ++ acc_device_not_host = 4, ++ acc_device_nvidia = 5, ++ _ACC_device_hwm, ++ /* Ensure enumeration is layout compatible with int. */ ++ _ACC_highest = __INT_MAX__, ++ _ACC_neg = -1 ++} acc_device_t; ++ ++typedef enum acc_async_t { ++ /* Keep in sync with include/gomp-constants.h. */ ++ acc_async_noval = -1, ++ acc_async_sync = -2 ++} acc_async_t; ++ ++int acc_get_num_devices (acc_device_t) __GOACC_NOTHROW; ++void acc_set_device_type (acc_device_t) __GOACC_NOTHROW; ++acc_device_t acc_get_device_type (void) __GOACC_NOTHROW; ++void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW; ++int acc_get_device_num (acc_device_t) __GOACC_NOTHROW; ++int acc_async_test (int) __GOACC_NOTHROW; ++int acc_async_test_all (void) __GOACC_NOTHROW; ++void acc_wait (int) __GOACC_NOTHROW; ++void acc_wait_async (int, int) __GOACC_NOTHROW; ++void acc_wait_all (void) __GOACC_NOTHROW; ++void acc_wait_all_async (int) __GOACC_NOTHROW; ++void acc_init (acc_device_t) __GOACC_NOTHROW; ++void acc_shutdown (acc_device_t) __GOACC_NOTHROW; ++#ifdef __cplusplus ++int acc_on_device (int __arg) __GOACC_NOTHROW; ++#else ++int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW; ++#endif ++void *acc_malloc (size_t) __GOACC_NOTHROW; ++void acc_free (void *) __GOACC_NOTHROW; ++/* Some of these would be more correct with const qualifiers, but ++ the standard specifies otherwise. */ ++void *acc_copyin (void *, size_t) __GOACC_NOTHROW; ++void *acc_present_or_copyin (void *, size_t) __GOACC_NOTHROW; ++void *acc_create (void *, size_t) __GOACC_NOTHROW; ++void *acc_present_or_create (void *, size_t) __GOACC_NOTHROW; ++void acc_copyout (void *, size_t) __GOACC_NOTHROW; ++void acc_delete (void *, size_t) __GOACC_NOTHROW; ++void acc_update_device (void *, size_t) __GOACC_NOTHROW; ++void acc_update_self (void *, size_t) __GOACC_NOTHROW; ++void acc_map_data (void *, void *, size_t) __GOACC_NOTHROW; ++void acc_unmap_data (void *) __GOACC_NOTHROW; ++void *acc_deviceptr (void *) __GOACC_NOTHROW; ++void *acc_hostptr (void *) __GOACC_NOTHROW; ++int acc_is_present (void *, size_t) __GOACC_NOTHROW; ++void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; ++void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; ++ ++/* Old names. OpenACC does not specify whether these can or must ++ not be macros, inlines or aliases for the new names. */ ++#define acc_pcreate acc_present_or_create ++#define acc_pcopyin acc_present_or_copyin ++ ++/* CUDA-specific routines. */ ++void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; ++void *acc_get_current_cuda_context (void) __GOACC_NOTHROW; ++void *acc_get_cuda_stream (int) __GOACC_NOTHROW; ++int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW; ++ ++#ifdef __cplusplus ++} ++ ++/* Forwarding function with correctly typed arg. */ ++ ++#pragma acc routine seq ++inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW ++{ ++ return acc_on_device ((int) __arg); ++} ++#endif ++ ++#endif /* _OPENACC_H */ +--- libgomp/config/linux/doacross.h.jj 2016-07-13 16:57:18.902355979 +0200 ++++ libgomp/config/linux/doacross.h 2016-07-13 16:57:18.902355979 +0200 +@@ -0,0 +1,57 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Jakub Jelinek . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This is a Linux specific implementation of doacross spinning. */ ++ ++#ifndef GOMP_DOACROSS_H ++#define GOMP_DOACROSS_H 1 ++ ++#include "libgomp.h" ++#include ++#include "wait.h" ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility push(hidden) ++#endif ++ ++static inline void doacross_spin (unsigned long *addr, unsigned long expected, ++ unsigned long cur) ++{ ++ /* FIXME: back off depending on how large expected - cur is. */ ++ do ++ { ++ cpu_relax (); ++ cur = __atomic_load_n (addr, MEMMODEL_RELAXED); ++ if (expected < cur) ++ return; ++ } ++ while (1); ++} ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility pop ++#endif ++ ++#endif /* GOMP_DOACROSS_H */ +--- libgomp/config/posix/doacross.h.jj 2016-07-13 16:57:18.903355966 +0200 ++++ libgomp/config/posix/doacross.h 2016-07-13 16:57:18.903355966 +0200 +@@ -0,0 +1,62 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Jakub Jelinek . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This is a generic implementation of doacross spinning. */ ++ ++#ifndef GOMP_DOACROSS_H ++#define GOMP_DOACROSS_H 1 ++ ++#include "libgomp.h" ++#include ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility push(hidden) ++#endif ++ ++static inline void ++cpu_relax (void) ++{ ++ __asm volatile ("" : : : "memory"); ++} ++ ++static inline void doacross_spin (unsigned long *addr, unsigned long expected, ++ unsigned long cur) ++{ ++ /* FIXME: back off depending on how large expected - cur is. */ ++ do ++ { ++ cpu_relax (); ++ cur = __atomic_load_n (addr, MEMMODEL_RELAXED); ++ if (expected < cur) ++ return; ++ } ++ while (1); ++} ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility pop ++#endif ++ ++#endif /* GOMP_DOACROSS_H */ +--- libgomp/splay-tree.c.jj 2016-07-13 16:57:18.919355768 +0200 ++++ libgomp/splay-tree.c 2016-07-13 16:57:18.919355768 +0200 +@@ -0,0 +1,238 @@ ++/* A splay-tree datatype. ++ Copyright (C) 1998-2016 Free Software Foundation, Inc. ++ Contributed by Mark Mitchell (mark@markmitchell.com). ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* The splay tree code copied from include/splay-tree.h and adjusted, ++ so that all the data lives directly in splay_tree_node_s structure ++ and no extra allocations are needed. */ ++ ++/* For an easily readable description of splay-trees, see: ++ ++ Lewis, Harry R. and Denenberg, Larry. Data Structures and Their ++ Algorithms. Harper-Collins, Inc. 1991. ++ ++ The major feature of splay trees is that all basic tree operations ++ are amortized O(log n) time for a tree with n nodes. */ ++ ++#include "libgomp.h" ++ ++/* Rotate the edge joining the left child N with its parent P. PP is the ++ grandparents' pointer to P. */ ++ ++static inline void ++rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) ++{ ++ splay_tree_node tmp; ++ tmp = n->right; ++ n->right = p; ++ p->left = tmp; ++ *pp = n; ++} ++ ++/* Rotate the edge joining the right child N with its parent P. PP is the ++ grandparents' pointer to P. */ ++ ++static inline void ++rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) ++{ ++ splay_tree_node tmp; ++ tmp = n->left; ++ n->left = p; ++ p->right = tmp; ++ *pp = n; ++} ++ ++/* Bottom up splay of KEY. */ ++ ++static void ++splay_tree_splay (splay_tree sp, splay_tree_key key) ++{ ++ if (sp->root == NULL) ++ return; ++ ++ do { ++ int cmp1, cmp2; ++ splay_tree_node n, c; ++ ++ n = sp->root; ++ cmp1 = splay_compare (key, &n->key); ++ ++ /* Found. */ ++ if (cmp1 == 0) ++ return; ++ ++ /* Left or right? If no child, then we're done. */ ++ if (cmp1 < 0) ++ c = n->left; ++ else ++ c = n->right; ++ if (!c) ++ return; ++ ++ /* Next one left or right? If found or no child, we're done ++ after one rotation. */ ++ cmp2 = splay_compare (key, &c->key); ++ if (cmp2 == 0 ++ || (cmp2 < 0 && !c->left) ++ || (cmp2 > 0 && !c->right)) ++ { ++ if (cmp1 < 0) ++ rotate_left (&sp->root, n, c); ++ else ++ rotate_right (&sp->root, n, c); ++ return; ++ } ++ ++ /* Now we have the four cases of double-rotation. */ ++ if (cmp1 < 0 && cmp2 < 0) ++ { ++ rotate_left (&n->left, c, c->left); ++ rotate_left (&sp->root, n, n->left); ++ } ++ else if (cmp1 > 0 && cmp2 > 0) ++ { ++ rotate_right (&n->right, c, c->right); ++ rotate_right (&sp->root, n, n->right); ++ } ++ else if (cmp1 < 0 && cmp2 > 0) ++ { ++ rotate_right (&n->left, c, c->right); ++ rotate_left (&sp->root, n, n->left); ++ } ++ else if (cmp1 > 0 && cmp2 < 0) ++ { ++ rotate_left (&n->right, c, c->left); ++ rotate_right (&sp->root, n, n->right); ++ } ++ } while (1); ++} ++ ++/* Insert a new NODE into SP. The NODE shouldn't exist in the tree. */ ++ ++attribute_hidden void ++splay_tree_insert (splay_tree sp, splay_tree_node node) ++{ ++ int comparison = 0; ++ ++ splay_tree_splay (sp, &node->key); ++ ++ if (sp->root) ++ comparison = splay_compare (&sp->root->key, &node->key); ++ ++ if (sp->root && comparison == 0) ++ gomp_fatal ("Duplicate node"); ++ else ++ { ++ /* Insert it at the root. */ ++ if (sp->root == NULL) ++ node->left = node->right = NULL; ++ else if (comparison < 0) ++ { ++ node->left = sp->root; ++ node->right = node->left->right; ++ node->left->right = NULL; ++ } ++ else ++ { ++ node->right = sp->root; ++ node->left = node->right->left; ++ node->right->left = NULL; ++ } ++ ++ sp->root = node; ++ } ++} ++ ++/* Remove node with KEY from SP. It is not an error if it did not exist. */ ++ ++attribute_hidden void ++splay_tree_remove (splay_tree sp, splay_tree_key key) ++{ ++ splay_tree_splay (sp, key); ++ ++ if (sp->root && splay_compare (&sp->root->key, key) == 0) ++ { ++ splay_tree_node left, right; ++ ++ left = sp->root->left; ++ right = sp->root->right; ++ ++ /* One of the children is now the root. Doesn't matter much ++ which, so long as we preserve the properties of the tree. */ ++ if (left) ++ { ++ sp->root = left; ++ ++ /* If there was a right child as well, hang it off the ++ right-most leaf of the left child. */ ++ if (right) ++ { ++ while (left->right) ++ left = left->right; ++ left->right = right; ++ } ++ } ++ else ++ sp->root = right; ++ } ++} ++ ++/* Lookup KEY in SP, returning NODE if present, and NULL ++ otherwise. */ ++ ++attribute_hidden splay_tree_key ++splay_tree_lookup (splay_tree sp, splay_tree_key key) ++{ ++ splay_tree_splay (sp, key); ++ ++ if (sp->root && splay_compare (&sp->root->key, key) == 0) ++ return &sp->root->key; ++ else ++ return NULL; ++} ++ ++/* Helper function for splay_tree_foreach. ++ ++ Run FUNC on every node in KEY. */ ++ ++static void ++splay_tree_foreach_internal (splay_tree_node node, splay_tree_callback func, ++ void *data) ++{ ++ if (!node) ++ return; ++ func (&node->key, data); ++ splay_tree_foreach_internal (node->left, func, data); ++ /* Yeah, whatever. GCC can fix my tail recursion. */ ++ splay_tree_foreach_internal (node->right, func, data); ++} ++ ++/* Run FUNC on each of the nodes in SP. */ ++ ++attribute_hidden void ++splay_tree_foreach (splay_tree sp, splay_tree_callback func, void *data) ++{ ++ splay_tree_foreach_internal (sp->root, func, data); ++} +--- libgomp/libgomp-plugin.c.jj 2016-07-13 16:57:04.435535360 +0200 ++++ libgomp/libgomp-plugin.c 2016-07-13 16:57:04.435535360 +0200 +@@ -0,0 +1,80 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Exported (non-hidden) functions exposing libgomp interface for plugins. */ ++ ++#include ++ ++#include "libgomp.h" ++#include "libgomp-plugin.h" ++ ++void * ++GOMP_PLUGIN_malloc (size_t size) ++{ ++ return gomp_malloc (size); ++} ++ ++void * ++GOMP_PLUGIN_malloc_cleared (size_t size) ++{ ++ return gomp_malloc_cleared (size); ++} ++ ++void * ++GOMP_PLUGIN_realloc (void *ptr, size_t size) ++{ ++ return gomp_realloc (ptr, size); ++} ++ ++void ++GOMP_PLUGIN_debug (int kind, const char *msg, ...) ++{ ++ va_list ap; ++ ++ va_start (ap, msg); ++ gomp_vdebug (kind, msg, ap); ++ va_end (ap); ++} ++ ++void ++GOMP_PLUGIN_error (const char *msg, ...) ++{ ++ va_list ap; ++ ++ va_start (ap, msg); ++ gomp_verror (msg, ap); ++ va_end (ap); ++} ++ ++void ++GOMP_PLUGIN_fatal (const char *msg, ...) ++{ ++ va_list ap; ++ ++ va_start (ap, msg); ++ gomp_vfatal (msg, ap); ++ va_end (ap); ++} +--- libgomp/libgomp-plugin.h.jj 2016-07-13 16:57:04.438535323 +0200 ++++ libgomp/libgomp-plugin.h 2016-07-13 16:57:04.438535323 +0200 +@@ -0,0 +1,80 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* An interface to various libgomp-internal functions for use by plugins. */ ++ ++#ifndef LIBGOMP_PLUGIN_H ++#define LIBGOMP_PLUGIN_H 1 ++ ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* Capabilities of offloading devices. */ ++#define GOMP_OFFLOAD_CAP_SHARED_MEM (1 << 0) ++#define GOMP_OFFLOAD_CAP_NATIVE_EXEC (1 << 1) ++#define GOMP_OFFLOAD_CAP_OPENMP_400 (1 << 2) ++#define GOMP_OFFLOAD_CAP_OPENACC_200 (1 << 3) ++ ++/* Type of offload target device. Keep in sync with include/gomp-constants.h. */ ++enum offload_target_type ++{ ++ OFFLOAD_TARGET_TYPE_HOST = 2, ++ /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */ ++ OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5, ++ OFFLOAD_TARGET_TYPE_INTEL_MIC = 6, ++ OFFLOAD_TARGET_TYPE_HSA = 7 ++}; ++ ++/* Auxiliary struct, used for transferring pairs of addresses from plugin ++ to libgomp. */ ++struct addr_pair ++{ ++ uintptr_t start; ++ uintptr_t end; ++}; ++ ++/* Miscellaneous functions. */ ++extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc)); ++extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc)); ++extern void *GOMP_PLUGIN_realloc (void *, size_t); ++void GOMP_PLUGIN_target_task_completion (void *); ++ ++extern void GOMP_PLUGIN_debug (int, const char *, ...) ++ __attribute__ ((format (printf, 2, 3))); ++extern void GOMP_PLUGIN_error (const char *, ...) ++ __attribute__ ((format (printf, 1, 2))); ++extern void GOMP_PLUGIN_fatal (const char *, ...) ++ __attribute__ ((noreturn, format (printf, 1, 2))); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif +--- libgomp/oacc-async.c.jj 2016-07-13 16:57:13.488423109 +0200 ++++ libgomp/oacc-async.c 2016-07-13 16:57:13.488423109 +0200 +@@ -0,0 +1,107 @@ ++/* OpenACC Runtime Library Definitions. ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include ++#include "openacc.h" ++#include "libgomp.h" ++#include "oacc-int.h" ++ ++int ++acc_async_test (int async) ++{ ++ if (async < acc_async_sync) ++ gomp_fatal ("invalid async argument: %d", async); ++ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ return thr->dev->openacc.async_test_func (async); ++} ++ ++int ++acc_async_test_all (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ return thr->dev->openacc.async_test_all_func (); ++} ++ ++void ++acc_wait (int async) ++{ ++ if (async < acc_async_sync) ++ gomp_fatal ("invalid async argument: %d", async); ++ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_func (async); ++} ++ ++void ++acc_wait_async (int async1, int async2) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_async_func (async1, async2); ++} ++ ++void ++acc_wait_all (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_all_func (); ++} ++ ++void ++acc_wait_all_async (int async) ++{ ++ if (async < acc_async_sync) ++ gomp_fatal ("invalid async argument: %d", async); ++ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_all_async_func (async); ++} +--- libgomp/splay-tree.h.jj 2016-07-13 16:57:18.934355582 +0200 ++++ libgomp/splay-tree.h 2016-07-13 16:57:18.934355582 +0200 +@@ -0,0 +1,130 @@ ++/* A splay-tree datatype. ++ Copyright (C) 1998-2016 Free Software Foundation, Inc. ++ Contributed by Mark Mitchell (mark@markmitchell.com). ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* The splay tree code copied from include/splay-tree.h and adjusted, ++ so that all the data lives directly in splay_tree_node_s structure ++ and no extra allocations are needed. ++ ++ Files including this header should before including it add: ++typedef struct splay_tree_node_s *splay_tree_node; ++typedef struct splay_tree_s *splay_tree; ++typedef struct splay_tree_key_s *splay_tree_key; ++ define splay_tree_key_s structure, and define ++ splay_compare inline function. ++ ++ Alternatively, they can define splay_tree_prefix macro before ++ including this header and then all the above types, the ++ splay_compare function and the splay_tree_{lookup,insert_remove} ++ function will be prefixed by that prefix. If splay_tree_prefix ++ macro is defined, this header must be included twice: once where ++ you need the header file definitions, and once where you need the ++ .c implementation routines. In the latter case, you must also ++ define the macro splay_tree_c. See the include of splay-tree.h in ++ priority_queue.[hc] for an example. */ ++ ++/* For an easily readable description of splay-trees, see: ++ ++ Lewis, Harry R. and Denenberg, Larry. Data Structures and Their ++ Algorithms. Harper-Collins, Inc. 1991. ++ ++ The major feature of splay trees is that all basic tree operations ++ are amortized O(log n) time for a tree with n nodes. */ ++ ++#ifdef splay_tree_prefix ++# define splay_tree_name_1(prefix, name) prefix ## _ ## name ++# define splay_tree_name(prefix, name) splay_tree_name_1 (prefix, name) ++# define splay_tree_node_s \ ++ splay_tree_name (splay_tree_prefix, splay_tree_node_s) ++# define splay_tree_s \ ++ splay_tree_name (splay_tree_prefix, splay_tree_s) ++# define splay_tree_key_s \ ++ splay_tree_name (splay_tree_prefix, splay_tree_key_s) ++# define splay_tree_node \ ++ splay_tree_name (splay_tree_prefix, splay_tree_node) ++# define splay_tree \ ++ splay_tree_name (splay_tree_prefix, splay_tree) ++# define splay_tree_key \ ++ splay_tree_name (splay_tree_prefix, splay_tree_key) ++# define splay_compare \ ++ splay_tree_name (splay_tree_prefix, splay_compare) ++# define splay_tree_lookup \ ++ splay_tree_name (splay_tree_prefix, splay_tree_lookup) ++# define splay_tree_insert \ ++ splay_tree_name (splay_tree_prefix, splay_tree_insert) ++# define splay_tree_remove \ ++ splay_tree_name (splay_tree_prefix, splay_tree_remove) ++# define splay_tree_foreach \ ++ splay_tree_name (splay_tree_prefix, splay_tree_foreach) ++# define splay_tree_callback \ ++ splay_tree_name (splay_tree_prefix, splay_tree_callback) ++#endif ++ ++#ifndef splay_tree_c ++/* Header file definitions and prototypes. */ ++ ++/* The nodes in the splay tree. */ ++struct splay_tree_node_s { ++ struct splay_tree_key_s key; ++ /* The left and right children, respectively. */ ++ splay_tree_node left; ++ splay_tree_node right; ++}; ++ ++/* The splay tree. */ ++struct splay_tree_s { ++ splay_tree_node root; ++}; ++ ++typedef void (*splay_tree_callback) (splay_tree_key, void *); ++ ++extern splay_tree_key splay_tree_lookup (splay_tree, splay_tree_key); ++extern void splay_tree_insert (splay_tree, splay_tree_node); ++extern void splay_tree_remove (splay_tree, splay_tree_key); ++extern void splay_tree_foreach (splay_tree, splay_tree_callback, void *); ++#else /* splay_tree_c */ ++# ifdef splay_tree_prefix ++# include "splay-tree.c" ++# undef splay_tree_name_1 ++# undef splay_tree_name ++# undef splay_tree_node_s ++# undef splay_tree_s ++# undef splay_tree_key_s ++# undef splay_tree_node ++# undef splay_tree ++# undef splay_tree_key ++# undef splay_compare ++# undef splay_tree_lookup ++# undef splay_tree_insert ++# undef splay_tree_remove ++# undef splay_tree_foreach ++# undef splay_tree_callback ++# undef splay_tree_c ++# endif ++#endif /* #ifndef splay_tree_c */ ++ ++#ifdef splay_tree_prefix ++# undef splay_tree_prefix ++#endif +--- libgomp/oacc-plugin.c.jj 2016-07-13 16:57:13.481423196 +0200 ++++ libgomp/oacc-plugin.c 2016-07-14 15:40:21.653151873 +0200 +@@ -0,0 +1,44 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Initialize and register OpenACC dispatch table from libgomp plugin. */ ++ ++#include "libgomp.h" ++#include "oacc-plugin.h" ++#include "oacc-int.h" ++ ++void ++GOMP_PLUGIN_async_unmap_vars (void *ptr, int async) ++{ ++} ++ ++/* Return the target-specific part of the TLS data for the current thread. */ ++ ++void * ++GOMP_PLUGIN_acc_thread (void) ++{ ++ return NULL; ++} +--- libgomp/oacc-init.c.jj 2016-07-13 16:57:04.423535509 +0200 ++++ libgomp/oacc-init.c 2016-07-14 19:06:41.679575688 +0200 +@@ -0,0 +1,640 @@ ++/* OpenACC Runtime initialization routines ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "libgomp.h" ++#include "oacc-int.h" ++#include "openacc.h" ++#include ++#include ++#include ++#include ++#include ++ ++/* This lock is used to protect access to cached_base_dev, dispatchers and ++ the (abstract) initialisation state of attached offloading devices. */ ++ ++static gomp_mutex_t acc_device_lock; ++ ++/* A cached version of the dispatcher for the global "current" accelerator type, ++ e.g. used as the default when creating new host threads. This is the ++ device-type equivalent of goacc_device_num (which specifies which device to ++ use out of potentially several of the same type). If there are several ++ devices of a given type, this points at the first one. */ ++ ++static struct gomp_device_descr *cached_base_dev = NULL; ++ ++#if defined HAVE_TLS || defined USE_EMUTLS ++__thread struct goacc_thread *goacc_tls_data; ++#else ++pthread_key_t goacc_tls_key; ++#endif ++static pthread_key_t goacc_cleanup_key; ++ ++static struct goacc_thread *goacc_threads; ++static gomp_mutex_t goacc_thread_lock; ++ ++/* An array of dispatchers for device types, indexed by the type. This array ++ only references "base" devices, and other instances of the same type are ++ found by simply indexing from each such device (which are stored linearly, ++ grouped by device in target.c:devices). */ ++static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 }; ++ ++attribute_hidden void ++goacc_register (struct gomp_device_descr *disp) ++{ ++ /* Only register the 0th device here. */ ++ if (disp->target_id != 0) ++ return; ++ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ assert (acc_device_type (disp->type) != acc_device_none ++ && acc_device_type (disp->type) != acc_device_default ++ && acc_device_type (disp->type) != acc_device_not_host); ++ assert (!dispatchers[disp->type]); ++ dispatchers[disp->type] = disp; ++ ++ gomp_mutex_unlock (&acc_device_lock); ++} ++ ++static const char * ++name_of_acc_device_t (enum acc_device_t type) ++{ ++ switch (type) ++ { ++ case acc_device_none: return "none"; ++ case acc_device_default: return "default"; ++ case acc_device_host: return "host"; ++ case acc_device_not_host: return "not_host"; ++ case acc_device_nvidia: return "nvidia"; ++ default: gomp_fatal ("unknown device type %u", (unsigned) type); ++ } ++} ++ ++/* ACC_DEVICE_LOCK must be held before calling this function. If FAIL_IS_ERROR ++ is true, this function raises an error if there are no devices of type D, ++ otherwise it returns NULL in that case. */ ++ ++static struct gomp_device_descr * ++resolve_device (acc_device_t d, bool fail_is_error) ++{ ++ acc_device_t d_arg = d; ++ ++ switch (d) ++ { ++ case acc_device_default: ++ { ++ if (goacc_device_type) ++ { ++ /* Lookup the named device. */ ++ if (!strcasecmp (goacc_device_type, "host")) ++ { ++ d = acc_device_host; ++ goto found; ++ } ++ ++ if (fail_is_error) ++ { ++ gomp_mutex_unlock (&acc_device_lock); ++ gomp_fatal ("device type %s not supported", goacc_device_type); ++ } ++ else ++ return NULL; ++ } ++ ++ /* No default device specified, so start scanning for any non-host ++ device that is available. */ ++ d = acc_device_not_host; ++ } ++ /* FALLTHROUGH */ ++ ++ case acc_device_not_host: ++ if (d_arg == acc_device_default) ++ { ++ d = acc_device_host; ++ goto found; ++ } ++ if (fail_is_error) ++ { ++ gomp_mutex_unlock (&acc_device_lock); ++ gomp_fatal ("no device found"); ++ } ++ else ++ return NULL; ++ break; ++ ++ case acc_device_host: ++ break; ++ ++ default: ++ if (d > _ACC_device_hwm) ++ { ++ if (fail_is_error) ++ goto unsupported_device; ++ else ++ return NULL; ++ } ++ break; ++ } ++ found: ++ ++ assert (d != acc_device_none ++ && d != acc_device_default ++ && d != acc_device_not_host); ++ ++ if (dispatchers[d] == NULL && fail_is_error) ++ { ++ unsupported_device: ++ gomp_mutex_unlock (&acc_device_lock); ++ gomp_fatal ("device type %s not supported", name_of_acc_device_t (d)); ++ } ++ ++ return dispatchers[d]; ++} ++ ++/* Emit a suitable error if no device of a particular type is available, or ++ the given device number is out-of-range. */ ++static void ++acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs) ++{ ++ if (ndevs == 0) ++ gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d)); ++ else ++ gomp_fatal ("device %u out of range", ord); ++} ++ ++/* This is called when plugins have been initialized, and serves to call ++ (indirectly) the target's device_init hook. Calling multiple times without ++ an intervening acc_shutdown_1 call is an error. ACC_DEVICE_LOCK must be ++ held before calling this function. */ ++ ++static struct gomp_device_descr * ++acc_init_1 (acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev, *acc_dev; ++ int ndevs; ++ ++ base_dev = resolve_device (d, true); ++ ++ ndevs = base_dev->get_num_devices_func (); ++ ++ if (ndevs <= 0 || goacc_device_num >= ndevs) ++ acc_dev_num_out_of_range (d, goacc_device_num, ndevs); ++ ++ acc_dev = &base_dev[goacc_device_num]; ++ ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_INITIALIZED) ++ { ++ gomp_mutex_unlock (&acc_dev->lock); ++ gomp_fatal ("device already active"); ++ } ++ ++ gomp_init_device (acc_dev); ++ gomp_mutex_unlock (&acc_dev->lock); ++ ++ return base_dev; ++} ++ ++/* ACC_DEVICE_LOCK must be held before calling this function. */ ++ ++static void ++acc_shutdown_1 (acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev; ++ struct goacc_thread *walk; ++ int ndevs, i; ++ bool devices_active = false; ++ ++ /* Get the base device for this device type. */ ++ base_dev = resolve_device (d, true); ++ ++ ndevs = base_dev->get_num_devices_func (); ++ ++ gomp_mutex_lock (&goacc_thread_lock); ++ ++ /* Free target-specific TLS data and close all devices. */ ++ for (walk = goacc_threads; walk != NULL; walk = walk->next) ++ { ++ if (walk->target_tls) ++ base_dev->openacc.destroy_thread_data_func (walk->target_tls); ++ ++ walk->target_tls = NULL; ++ ++ /* Similarly, if this happens then user code has done something weird. */ ++ if (walk->saved_bound_dev) ++ { ++ gomp_mutex_unlock (&goacc_thread_lock); ++ gomp_fatal ("shutdown during host fallback"); ++ } ++ ++ if (walk->dev) ++ { ++ gomp_mutex_lock (&walk->dev->lock); ++ gomp_free_memmap (&walk->dev->mem_map); ++ gomp_mutex_unlock (&walk->dev->lock); ++ ++ walk->dev = NULL; ++ walk->base_dev = NULL; ++ } ++ } ++ ++ gomp_mutex_unlock (&goacc_thread_lock); ++ ++ /* Close all the devices of this type that have been opened. */ ++ bool ret = true; ++ for (i = 0; i < ndevs; i++) ++ { ++ struct gomp_device_descr *acc_dev = &base_dev[i]; ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_INITIALIZED) ++ { ++ devices_active = true; ++ ret &= acc_dev->fini_device_func (acc_dev->target_id); ++ acc_dev->state = GOMP_DEVICE_UNINITIALIZED; ++ } ++ gomp_mutex_unlock (&acc_dev->lock); ++ } ++ ++ if (!ret) ++ gomp_fatal ("device finalization failed"); ++ ++ if (!devices_active) ++ gomp_fatal ("no device initialized"); ++} ++ ++static struct goacc_thread * ++goacc_new_thread (void) ++{ ++ struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread)); ++ ++#if defined HAVE_TLS || defined USE_EMUTLS ++ goacc_tls_data = thr; ++#else ++ pthread_setspecific (goacc_tls_key, thr); ++#endif ++ ++ pthread_setspecific (goacc_cleanup_key, thr); ++ ++ gomp_mutex_lock (&goacc_thread_lock); ++ thr->next = goacc_threads; ++ goacc_threads = thr; ++ gomp_mutex_unlock (&goacc_thread_lock); ++ ++ return thr; ++} ++ ++static void ++goacc_destroy_thread (void *data) ++{ ++ struct goacc_thread *thr = data, *walk, *prev; ++ ++ gomp_mutex_lock (&goacc_thread_lock); ++ ++ if (thr) ++ { ++ struct gomp_device_descr *acc_dev = thr->dev; ++ ++ if (acc_dev && thr->target_tls) ++ { ++ acc_dev->openacc.destroy_thread_data_func (thr->target_tls); ++ thr->target_tls = NULL; ++ } ++ ++ assert (!thr->mapped_data); ++ ++ /* Remove from thread list. */ ++ for (prev = NULL, walk = goacc_threads; walk; ++ prev = walk, walk = walk->next) ++ if (walk == thr) ++ { ++ if (prev == NULL) ++ goacc_threads = walk->next; ++ else ++ prev->next = walk->next; ++ ++ free (thr); ++ ++ break; ++ } ++ ++ assert (walk); ++ } ++ ++ gomp_mutex_unlock (&goacc_thread_lock); ++} ++ ++/* Use the ORD'th device instance for the current host thread (or -1 for the ++ current global default). The device (and the runtime) must be initialised ++ before calling this function. */ ++ ++void ++goacc_attach_host_thread_to_device (int ord) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL; ++ int num_devices; ++ ++ if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0)) ++ return; ++ ++ if (ord < 0) ++ ord = goacc_device_num; ++ ++ /* Decide which type of device to use. If the current thread has a device ++ type already (e.g. set by acc_set_device_type), use that, else use the ++ global default. */ ++ if (thr && thr->base_dev) ++ base_dev = thr->base_dev; ++ else ++ { ++ assert (cached_base_dev); ++ base_dev = cached_base_dev; ++ } ++ ++ num_devices = base_dev->get_num_devices_func (); ++ if (num_devices <= 0 || ord >= num_devices) ++ acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord, ++ num_devices); ++ ++ if (!thr) ++ thr = goacc_new_thread (); ++ ++ thr->base_dev = base_dev; ++ thr->dev = acc_dev = &base_dev[ord]; ++ thr->saved_bound_dev = NULL; ++ ++ thr->target_tls ++ = acc_dev->openacc.create_thread_data_func (ord); ++ ++ acc_dev->openacc.async_set_async_func (acc_async_sync); ++} ++ ++/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of ++ init/shutdown is per-process or per-thread. We choose per-process. */ ++ ++void ++acc_init (acc_device_t d) ++{ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ cached_base_dev = acc_init_1 (d); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ goacc_attach_host_thread_to_device (-1); ++} ++ ++ialias (acc_init) ++ ++void ++acc_shutdown (acc_device_t d) ++{ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ acc_shutdown_1 (d); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++} ++ ++ialias (acc_shutdown) ++ ++int ++acc_get_num_devices (acc_device_t d) ++{ ++ int n = 0; ++ struct gomp_device_descr *acc_dev; ++ ++ if (d == acc_device_none) ++ return 0; ++ ++ gomp_mutex_lock (&acc_device_lock); ++ acc_dev = resolve_device (d, false); ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ if (!acc_dev) ++ return 0; ++ ++ n = acc_dev->get_num_devices_func (); ++ if (n < 0) ++ n = 0; ++ ++ return n; ++} ++ ++ialias (acc_get_num_devices) ++ ++/* Set the device type for the current thread only (using the current global ++ default device number), initialising that device if necessary. Also set the ++ default device type for new threads to D. */ ++ ++void ++acc_set_device_type (acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev, *acc_dev; ++ struct goacc_thread *thr = goacc_thread (); ++ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ cached_base_dev = base_dev = resolve_device (d, true); ++ acc_dev = &base_dev[goacc_device_num]; ++ ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) ++ gomp_init_device (acc_dev); ++ gomp_mutex_unlock (&acc_dev->lock); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ /* We're changing device type: invalidate the current thread's dev and ++ base_dev pointers. */ ++ if (thr && thr->base_dev != base_dev) ++ { ++ thr->base_dev = thr->dev = NULL; ++ } ++ ++ goacc_attach_host_thread_to_device (-1); ++} ++ ++ialias (acc_set_device_type) ++ ++acc_device_t ++acc_get_device_type (void) ++{ ++ acc_device_t res = acc_device_none; ++ struct gomp_device_descr *dev; ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->base_dev) ++ res = acc_device_type (thr->base_dev->type); ++ else ++ { ++ gomp_mutex_lock (&acc_device_lock); ++ dev = resolve_device (acc_device_default, true); ++ gomp_mutex_unlock (&acc_device_lock); ++ res = acc_device_type (dev->type); ++ } ++ ++ assert (res != acc_device_default ++ && res != acc_device_not_host); ++ ++ return res; ++} ++ ++ialias (acc_get_device_type) ++ ++int ++acc_get_device_num (acc_device_t d) ++{ ++ const struct gomp_device_descr *dev; ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (d >= _ACC_device_hwm) ++ gomp_fatal ("unknown device type %u", (unsigned) d); ++ ++ gomp_mutex_lock (&acc_device_lock); ++ dev = resolve_device (d, true); ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ if (thr && thr->base_dev == dev && thr->dev) ++ return thr->dev->target_id; ++ ++ return goacc_device_num; ++} ++ ++ialias (acc_get_device_num) ++ ++void ++acc_set_device_num (int ord, acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev, *acc_dev; ++ int num_devices; ++ ++ if (ord < 0) ++ ord = goacc_device_num; ++ ++ if ((int) d == 0) ++ /* Set whatever device is being used by the current host thread to use ++ device instance ORD. It's unclear if this is supposed to affect other ++ host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num). */ ++ goacc_attach_host_thread_to_device (ord); ++ else ++ { ++ gomp_mutex_lock (&acc_device_lock); ++ ++ cached_base_dev = base_dev = resolve_device (d, true); ++ ++ num_devices = base_dev->get_num_devices_func (); ++ ++ if (num_devices <= 0 || ord >= num_devices) ++ acc_dev_num_out_of_range (d, ord, num_devices); ++ ++ acc_dev = &base_dev[ord]; ++ ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) ++ gomp_init_device (acc_dev); ++ gomp_mutex_unlock (&acc_dev->lock); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ goacc_attach_host_thread_to_device (ord); ++ } ++ ++ goacc_device_num = ord; ++} ++ ++ialias (acc_set_device_num) ++ ++int ++acc_on_device (acc_device_t dev) ++{ ++ return dev == acc_device_host || dev == acc_device_none; ++} ++ ++ialias (acc_on_device) ++ ++attribute_hidden void ++goacc_runtime_initialize (void) ++{ ++ gomp_mutex_init (&acc_device_lock); ++ ++#if !(defined HAVE_TLS || defined USE_EMUTLS) ++ pthread_key_create (&goacc_tls_key, NULL); ++#endif ++ ++ pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread); ++ ++ cached_base_dev = NULL; ++ ++ goacc_threads = NULL; ++ gomp_mutex_init (&goacc_thread_lock); ++ ++ /* Initialize and register the 'host' device type. */ ++ goacc_host_init (); ++} ++ ++/* Compiler helper functions */ ++ ++attribute_hidden void ++goacc_save_and_set_bind (acc_device_t d) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ assert (!thr->saved_bound_dev); ++ ++ thr->saved_bound_dev = thr->dev; ++ thr->dev = dispatchers[d]; ++} ++ ++attribute_hidden void ++goacc_restore_bind (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ thr->dev = thr->saved_bound_dev; ++ thr->saved_bound_dev = NULL; ++} ++ ++/* This is called from any OpenACC support function that may need to implicitly ++ initialize the libgomp runtime, either globally or from a new host thread. ++ On exit "goacc_thread" will return a valid & populated thread block. */ ++ ++attribute_hidden void ++goacc_lazy_initialize (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->dev) ++ return; ++ ++ if (!cached_base_dev) ++ acc_init (acc_device_default); ++ else ++ goacc_attach_host_thread_to_device (-1); ++} +--- libgomp/oacc-int.h.jj 2016-07-13 16:57:04.400535794 +0200 ++++ libgomp/oacc-int.h 2016-07-13 16:57:04.400535794 +0200 +@@ -0,0 +1,106 @@ ++/* OpenACC Runtime - internal declarations ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This file contains data types and function declarations that are not ++ part of the official OpenACC user interface. There are declarations ++ in here that are part of the GNU OpenACC ABI, in that the compiler is ++ required to know about them and use them. ++ ++ The convention is that the all caps prefix "GOACC" is used group items ++ that are part of the external ABI, and the lower case prefix "goacc" ++ is used group items that are completely private to the library. */ ++ ++#ifndef OACC_INT_H ++#define OACC_INT_H 1 ++ ++#include "openacc.h" ++#include "config.h" ++#include ++#include ++#include ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility push(hidden) ++#endif ++ ++static inline enum acc_device_t ++acc_device_type (enum offload_target_type type) ++{ ++ return (enum acc_device_t) type; ++} ++ ++struct goacc_thread ++{ ++ /* The base device for the current thread. */ ++ struct gomp_device_descr *base_dev; ++ ++ /* The device for the current thread. */ ++ struct gomp_device_descr *dev; ++ ++ struct gomp_device_descr *saved_bound_dev; ++ ++ /* This is a linked list of data mapped by the "acc data" pragma, following ++ strictly push/pop semantics according to lexical scope. */ ++ struct target_mem_desc *mapped_data; ++ ++ /* These structures form a list: this is the next thread in that list. */ ++ struct goacc_thread *next; ++ ++ /* Target-specific data (used by plugin). */ ++ void *target_tls; ++}; ++ ++#if defined HAVE_TLS || defined USE_EMUTLS ++extern __thread struct goacc_thread *goacc_tls_data; ++static inline struct goacc_thread * ++goacc_thread (void) ++{ ++ return goacc_tls_data; ++} ++#else ++extern pthread_key_t goacc_tls_key; ++static inline struct goacc_thread * ++goacc_thread (void) ++{ ++ return pthread_getspecific (goacc_tls_key); ++} ++#endif ++ ++void goacc_register (struct gomp_device_descr *) __GOACC_NOTHROW; ++void goacc_attach_host_thread_to_device (int); ++void goacc_runtime_initialize (void); ++void goacc_save_and_set_bind (acc_device_t); ++void goacc_restore_bind (void); ++void goacc_lazy_initialize (void); ++void goacc_host_init (void); ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility pop ++#endif ++ ++#endif +--- libgomp/oacc-host.c.jj 2016-07-13 16:57:13.489423096 +0200 ++++ libgomp/oacc-host.c 2016-07-13 16:57:13.489423096 +0200 +@@ -0,0 +1,266 @@ ++/* OpenACC Runtime Library: acc_device_host. ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "libgomp.h" ++#include "oacc-int.h" ++#include "gomp-constants.h" ++ ++#include ++#include ++#include ++ ++static struct gomp_device_descr host_dispatch; ++ ++static const char * ++host_get_name (void) ++{ ++ return host_dispatch.name; ++} ++ ++static unsigned int ++host_get_caps (void) ++{ ++ return host_dispatch.capabilities; ++} ++ ++static int ++host_get_type (void) ++{ ++ return host_dispatch.type; ++} ++ ++static int ++host_get_num_devices (void) ++{ ++ return 1; ++} ++ ++static bool ++host_init_device (int n __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static bool ++host_fini_device (int n __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static unsigned ++host_version (void) ++{ ++ return GOMP_VERSION; ++} ++ ++static int ++host_load_image (int n __attribute__ ((unused)), ++ unsigned v __attribute__ ((unused)), ++ const void *t __attribute__ ((unused)), ++ struct addr_pair **r __attribute__ ((unused))) ++{ ++ return 0; ++} ++ ++static bool ++host_unload_image (int n __attribute__ ((unused)), ++ unsigned v __attribute__ ((unused)), ++ const void *t __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static void * ++host_alloc (int n __attribute__ ((unused)), size_t s) ++{ ++ return gomp_malloc (s); ++} ++ ++static bool ++host_free (int n __attribute__ ((unused)), void *p) ++{ ++ free (p); ++ return true; ++} ++ ++static bool ++host_dev2host (int n __attribute__ ((unused)), ++ void *h __attribute__ ((unused)), ++ const void *d __attribute__ ((unused)), ++ size_t s __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static bool ++host_host2dev (int n __attribute__ ((unused)), ++ void *d __attribute__ ((unused)), ++ const void *h __attribute__ ((unused)), ++ size_t s __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static void ++host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars, ++ void **args __attribute__((unused))) ++{ ++ void (*fn)(void *) = (void (*)(void *)) fn_ptr; ++ ++ fn (vars); ++} ++ ++static void ++host_openacc_exec (void (*fn) (void *), ++ size_t mapnum __attribute__ ((unused)), ++ void **hostaddrs, ++ void **devaddrs __attribute__ ((unused)), ++ int async __attribute__ ((unused)), ++ unsigned *dims __attribute ((unused)), ++ void *targ_mem_desc __attribute__ ((unused))) ++{ ++ fn (hostaddrs); ++} ++ ++static void ++host_openacc_register_async_cleanup (void *targ_mem_desc __attribute__ ((unused)), ++ int async __attribute__ ((unused))) ++{ ++} ++ ++static int ++host_openacc_async_test (int async __attribute__ ((unused))) ++{ ++ return 1; ++} ++ ++static int ++host_openacc_async_test_all (void) ++{ ++ return 1; ++} ++ ++static void ++host_openacc_async_wait (int async __attribute__ ((unused))) ++{ ++} ++ ++static void ++host_openacc_async_wait_async (int async1 __attribute__ ((unused)), ++ int async2 __attribute__ ((unused))) ++{ ++} ++ ++static void ++host_openacc_async_wait_all (void) ++{ ++} ++ ++static void ++host_openacc_async_wait_all_async (int async __attribute__ ((unused))) ++{ ++} ++ ++static void ++host_openacc_async_set_async (int async __attribute__ ((unused))) ++{ ++} ++ ++static void * ++host_openacc_create_thread_data (int ord __attribute__ ((unused))) ++{ ++ return NULL; ++} ++ ++static void ++host_openacc_destroy_thread_data (void *tls_data __attribute__ ((unused))) ++{ ++} ++ ++static struct gomp_device_descr host_dispatch = ++ { ++ .name = "host", ++ .capabilities = (GOMP_OFFLOAD_CAP_SHARED_MEM ++ | GOMP_OFFLOAD_CAP_NATIVE_EXEC ++ | GOMP_OFFLOAD_CAP_OPENACC_200), ++ .target_id = 0, ++ .type = OFFLOAD_TARGET_TYPE_HOST, ++ ++ .get_name_func = host_get_name, ++ .get_caps_func = host_get_caps, ++ .get_type_func = host_get_type, ++ .get_num_devices_func = host_get_num_devices, ++ .init_device_func = host_init_device, ++ .fini_device_func = host_fini_device, ++ .version_func = host_version, ++ .load_image_func = host_load_image, ++ .unload_image_func = host_unload_image, ++ .alloc_func = host_alloc, ++ .free_func = host_free, ++ .dev2host_func = host_dev2host, ++ .host2dev_func = host_host2dev, ++ .run_func = host_run, ++ ++ .mem_map = { NULL }, ++ /* .lock initilized in goacc_host_init. */ ++ .state = GOMP_DEVICE_UNINITIALIZED, ++ ++ .openacc = { ++ .data_environ = NULL, ++ ++ .exec_func = host_openacc_exec, ++ ++ .register_async_cleanup_func = host_openacc_register_async_cleanup, ++ ++ .async_test_func = host_openacc_async_test, ++ .async_test_all_func = host_openacc_async_test_all, ++ .async_wait_func = host_openacc_async_wait, ++ .async_wait_async_func = host_openacc_async_wait_async, ++ .async_wait_all_func = host_openacc_async_wait_all, ++ .async_wait_all_async_func = host_openacc_async_wait_all_async, ++ .async_set_async_func = host_openacc_async_set_async, ++ ++ .create_thread_data_func = host_openacc_create_thread_data, ++ .destroy_thread_data_func = host_openacc_destroy_thread_data, ++ ++ .cuda = { ++ .get_current_device_func = NULL, ++ .get_current_context_func = NULL, ++ .get_stream_func = NULL, ++ .set_stream_func = NULL, ++ } ++ } ++ }; ++ ++/* Initialize and register this device type. */ ++void ++goacc_host_init (void) ++{ ++ gomp_mutex_init (&host_dispatch.lock); ++ goacc_register (&host_dispatch); ++} +--- libgomp/oacc-parallel.c.jj 2016-07-13 16:57:04.399535807 +0200 ++++ libgomp/oacc-parallel.c 2016-07-14 18:53:06.694996381 +0200 +@@ -0,0 +1,241 @@ ++/* Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This file handles OpenACC constructs. */ ++ ++#include "openacc.h" ++#include "libgomp.h" ++#include "libgomp_g.h" ++#include "gomp-constants.h" ++#include "oacc-int.h" ++#ifdef HAVE_INTTYPES_H ++# include /* For PRIu64. */ ++#endif ++#include ++#include ++#include ++ ++static void goacc_wait (int async, int num_waits, va_list *ap); ++ ++ ++/* Launch a possibly offloaded function on DEVICE. FN is the host fn ++ address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory ++ blocks to be copied to/from the device. Varadic arguments are ++ keyed optional parameters terminated with a zero. */ ++ ++void ++GOACC_parallel_keyed (int device, void (*fn) (void *), ++ size_t mapnum, void **hostaddrs, size_t *sizes, ++ unsigned short *kinds, ...) ++{ ++ bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; ++ struct goacc_thread *thr; ++ struct gomp_device_descr *acc_dev; ++ ++#ifdef HAVE_INTTYPES_H ++ gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", ++ __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); ++#else ++ gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", ++ __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); ++#endif ++ goacc_lazy_initialize (); ++ ++ thr = goacc_thread (); ++ acc_dev = thr->dev; ++ ++ /* Host fallback if "if" clause is false or if the current device is set to ++ the host. */ ++ if (host_fallback) ++ { ++ goacc_save_and_set_bind (acc_device_host); ++ fn (hostaddrs); ++ goacc_restore_bind (); ++ return; ++ } ++ else if (acc_device_type (acc_dev->type) == acc_device_host) ++ { ++ fn (hostaddrs); ++ return; ++ } ++ ++ /* acc_device_host is the only supported device type. */ ++} ++ ++/* Legacy entry point, only provide host execution. */ ++ ++void ++GOACC_parallel (int device, void (*fn) (void *), ++ size_t mapnum, void **hostaddrs, size_t *sizes, ++ unsigned short *kinds, ++ int num_gangs, int num_workers, int vector_length, ++ int async, int num_waits, ...) ++{ ++ goacc_save_and_set_bind (acc_device_host); ++ fn (hostaddrs); ++ goacc_restore_bind (); ++} ++ ++void ++GOACC_data_start (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds) ++{ ++ goacc_lazy_initialize (); ++} ++ ++void ++GOACC_data_end (void) ++{ ++ gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); ++ gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); ++} ++ ++void ++GOACC_enter_exit_data (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds, ++ int async, int num_waits, ...) ++{ ++ goacc_lazy_initialize (); ++} ++ ++static void ++goacc_wait (int async, int num_waits, va_list *ap) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ struct gomp_device_descr *acc_dev = thr->dev; ++ ++ while (num_waits--) ++ { ++ int qid = va_arg (*ap, int); ++ ++ if (acc_async_test (qid)) ++ continue; ++ ++ if (async == acc_async_sync) ++ acc_wait (qid); ++ else if (qid == async) ++ ;/* If we're waiting on the same asynchronous queue as we're ++ launching on, the queue itself will order work as ++ required, so there's no need to wait explicitly. */ ++ else ++ acc_dev->openacc.async_wait_async_func (qid, async); ++ } ++} ++ ++void ++GOACC_update (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds, ++ int async, int num_waits, ...) ++{ ++ goacc_lazy_initialize (); ++} ++ ++void ++GOACC_wait (int async, int num_waits, ...) ++{ ++ if (num_waits) ++ { ++ va_list ap; ++ ++ va_start (ap, num_waits); ++ goacc_wait (async, num_waits, &ap); ++ va_end (ap); ++ } ++ else if (async == acc_async_sync) ++ acc_wait_all (); ++ else if (async == acc_async_noval) ++ goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); ++} ++ ++int ++GOACC_get_num_threads (void) ++{ ++ return 1; ++} ++ ++int ++GOACC_get_thread_num (void) ++{ ++ return 0; ++} ++ ++void ++GOACC_declare (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds) ++{ ++ int i; ++ ++ for (i = 0; i < mapnum; i++) ++ { ++ unsigned char kind = kinds[i] & 0xff; ++ ++ if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) ++ continue; ++ ++ switch (kind) ++ { ++ case GOMP_MAP_FORCE_ALLOC: ++ case GOMP_MAP_FORCE_FROM: ++ case GOMP_MAP_FORCE_TO: ++ case GOMP_MAP_POINTER: ++ case GOMP_MAP_DELETE: ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ break; ++ ++ case GOMP_MAP_FORCE_DEVICEPTR: ++ break; ++ ++ case GOMP_MAP_ALLOC: ++ if (!acc_is_present (hostaddrs[i], sizes[i])) ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ break; ++ ++ case GOMP_MAP_TO: ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ ++ break; ++ ++ case GOMP_MAP_FROM: ++ kinds[i] = GOMP_MAP_FORCE_FROM; ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ break; ++ ++ case GOMP_MAP_FORCE_PRESENT: ++ if (!acc_is_present (hostaddrs[i], sizes[i])) ++ gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i], ++ (unsigned long) sizes[i]); ++ break; ++ ++ default: ++ assert (0); ++ break; ++ } ++ } ++} +--- libgomp/oacc-cuda.c.jj 2016-07-13 16:57:04.432535397 +0200 ++++ libgomp/oacc-cuda.c 2016-07-13 16:57:04.432535397 +0200 +@@ -0,0 +1,86 @@ ++/* OpenACC Runtime Library: CUDA support glue. ++ ++ Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "openacc.h" ++#include "config.h" ++#include "libgomp.h" ++#include "oacc-int.h" ++ ++void * ++acc_get_current_cuda_device (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.get_current_device_func) ++ return thr->dev->openacc.cuda.get_current_device_func (); ++ ++ return NULL; ++} ++ ++void * ++acc_get_current_cuda_context (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.get_current_context_func) ++ return thr->dev->openacc.cuda.get_current_context_func (); ++ ++ return NULL; ++} ++ ++void * ++acc_get_cuda_stream (int async) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (async < 0) ++ return NULL; ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func) ++ return thr->dev->openacc.cuda.get_stream_func (async); ++ ++ return NULL; ++} ++ ++int ++acc_set_cuda_stream (int async, void *stream) ++{ ++ struct goacc_thread *thr; ++ ++ if (async < 0 || stream == NULL) ++ return 0; ++ ++ goacc_lazy_initialize (); ++ ++ thr = goacc_thread (); ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func) ++ return thr->dev->openacc.cuda.set_stream_func (async, stream); ++ ++ return -1; ++} +--- libgomp/openacc_lib.h.jj 2016-07-13 16:57:13.486423134 +0200 ++++ libgomp/openacc_lib.h 2016-07-13 16:57:13.486423134 +0200 +@@ -0,0 +1,382 @@ ++! OpenACC Runtime Library Definitions. -*- mode: fortran -*- ++ ++! Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++! Contributed by Tobias Burnus ++! and Mentor Embedded. ++ ++! This file is part of the GNU Offloading and Multi Processing Library ++! (libgomp). ++ ++! Libgomp is free software; you can redistribute it and/or modify it ++! under the terms of the GNU General Public License as published by ++! the Free Software Foundation; either version 3, or (at your option) ++! any later version. ++ ++! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++! FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++! more details. ++ ++! Under Section 7 of GPL version 3, you are granted additional ++! permissions described in the GCC Runtime Library Exception, version ++! 3.1, as published by the Free Software Foundation. ++ ++! You should have received a copy of the GNU General Public License and ++! a copy of the GCC Runtime Library Exception along with this program; ++! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++! . ++ ++! NOTE: Due to the use of dimension (..), the code only works when compiled ++! with -std=f2008ts/gnu/legacy but not with other standard settings. ++! Alternatively, the user can use the module version, which permits ++! compilation with -std=f95. ++ ++ integer, parameter :: acc_device_kind = 4 ++ ++! Keep in sync with include/gomp-constants.h. ++ integer (acc_device_kind), parameter :: acc_device_none = 0 ++ integer (acc_device_kind), parameter :: acc_device_default = 1 ++ integer (acc_device_kind), parameter :: acc_device_host = 2 ++! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 ++! removed. ++ integer (acc_device_kind), parameter :: acc_device_not_host = 4 ++ integer (acc_device_kind), parameter :: acc_device_nvidia = 5 ++ ++ integer, parameter :: acc_handle_kind = 4 ++ ++! Keep in sync with include/gomp-constants.h. ++ integer (acc_handle_kind), parameter :: acc_async_noval = -1 ++ integer (acc_handle_kind), parameter :: acc_async_sync = -2 ++ ++ integer, parameter :: openacc_version = 201306 ++ ++ interface acc_get_num_devices ++ function acc_get_num_devices_h (d) ++ import acc_device_kind ++ integer acc_get_num_devices_h ++ integer (acc_device_kind) d ++ end function ++ end interface ++ ++ interface acc_set_device_type ++ subroutine acc_set_device_type_h (d) ++ import acc_device_kind ++ integer (acc_device_kind) d ++ end subroutine ++ end interface ++ ++ interface acc_get_device_type ++ function acc_get_device_type_h () ++ import acc_device_kind ++ integer (acc_device_kind) acc_get_device_type_h ++ end function ++ end interface ++ ++ interface acc_set_device_num ++ subroutine acc_set_device_num_h (n, d) ++ import acc_device_kind ++ integer n ++ integer (acc_device_kind) d ++ end subroutine ++ end interface ++ ++ interface acc_get_device_num ++ function acc_get_device_num_h (d) ++ import acc_device_kind ++ integer acc_get_device_num_h ++ integer (acc_device_kind) d ++ end function ++ end interface ++ ++ interface acc_async_test ++ function acc_async_test_h (a) ++ logical acc_async_test_h ++ integer a ++ end function ++ end interface ++ ++ interface acc_async_test_all ++ function acc_async_test_all_h () ++ logical acc_async_test_all_h ++ end function ++ end interface ++ ++ interface acc_wait ++ subroutine acc_wait_h (a) ++ integer a ++ end subroutine ++ end interface ++ ++ interface acc_wait_async ++ subroutine acc_wait_async_h (a1, a2) ++ integer a1, a2 ++ end subroutine ++ end interface ++ ++ interface acc_wait_all ++ subroutine acc_wait_all_h () ++ end subroutine ++ end interface ++ ++ interface acc_wait_all_async ++ subroutine acc_wait_all_async_h (a) ++ integer a ++ end subroutine ++ end interface ++ ++ interface acc_init ++ subroutine acc_init_h (devicetype) ++ import acc_device_kind ++ integer (acc_device_kind) devicetype ++ end subroutine ++ end interface ++ ++ interface acc_shutdown ++ subroutine acc_shutdown_h (devicetype) ++ import acc_device_kind ++ integer (acc_device_kind) devicetype ++ end subroutine ++ end interface ++ ++ interface acc_on_device ++ function acc_on_device_h (devicetype) ++ import acc_device_kind ++ logical acc_on_device_h ++ integer (acc_device_kind) devicetype ++ end function ++ end interface ++ ++ ! acc_malloc: Only available in C/C++ ++ ! acc_free: Only available in C/C++ ++ ++ interface acc_copyin ++ subroutine acc_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_present_or_copyin ++ subroutine acc_present_or_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_pcopyin ++ subroutine acc_pcopyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_pcopyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_pcopyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_create ++ subroutine acc_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_present_or_create ++ subroutine acc_present_or_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_pcreate ++ subroutine acc_pcreate_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_pcreate_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_pcreate_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_copyout ++ subroutine acc_copyout_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyout_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyout_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_delete ++ subroutine acc_delete_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_delete_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_delete_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_update_device ++ subroutine acc_update_device_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_device_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_device_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_update_self ++ subroutine acc_update_self_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_self_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_self_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ ! acc_map_data: Only available in C/C++ ++ ! acc_unmap_data: Only available in C/C++ ++ ! acc_deviceptr: Only available in C/C++ ++ ! acc_ostptr: Only available in C/C++ ++ ++ interface acc_is_present ++ function acc_is_present_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ logical acc_is_present_32_h ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end function ++ ++ function acc_is_present_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ logical acc_is_present_64_h ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end function ++ ++ function acc_is_present_array_h (a) ++ logical acc_is_present_array_h ++ type (*), dimension (..), contiguous :: a ++ end function ++ end interface ++ ++ ! acc_memcpy_to_device: Only available in C/C++ ++ ! acc_memcpy_from_device: Only available in C/C++ +--- libgomp/gomp-constants.h.jj 2016-07-14 16:02:47.212545826 +0200 ++++ libgomp/gomp-constants.h 2016-05-26 21:04:40.000000000 +0200 +@@ -0,0 +1,259 @@ ++/* Communication between GCC and libgomp. ++ ++ Copyright (C) 2014-2015 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef GOMP_CONSTANTS_H ++#define GOMP_CONSTANTS_H 1 ++ ++/* Memory mapping types. */ ++ ++/* One byte. */ ++#define GOMP_MAP_LAST (1 << 8) ++ ++#define GOMP_MAP_FLAG_TO (1 << 0) ++#define GOMP_MAP_FLAG_FROM (1 << 1) ++/* Special map kinds, enumerated starting here. */ ++#define GOMP_MAP_FLAG_SPECIAL_0 (1 << 2) ++#define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3) ++#define GOMP_MAP_FLAG_SPECIAL_2 (1 << 4) ++#define GOMP_MAP_FLAG_SPECIAL (GOMP_MAP_FLAG_SPECIAL_1 \ ++ | GOMP_MAP_FLAG_SPECIAL_0) ++/* Flag to force a specific behavior (or else, trigger a run-time error). */ ++#define GOMP_MAP_FLAG_FORCE (1 << 7) ++ ++enum gomp_map_kind ++ { ++ /* If not already present, allocate. */ ++ GOMP_MAP_ALLOC = 0, ++ /* ..., and copy to device. */ ++ GOMP_MAP_TO = (GOMP_MAP_ALLOC | GOMP_MAP_FLAG_TO), ++ /* ..., and copy from device. */ ++ GOMP_MAP_FROM = (GOMP_MAP_ALLOC | GOMP_MAP_FLAG_FROM), ++ /* ..., and copy to and from device. */ ++ GOMP_MAP_TOFROM = (GOMP_MAP_TO | GOMP_MAP_FROM), ++ /* The following kind is an internal only map kind, used for pointer based ++ array sections. OMP_CLAUSE_SIZE for these is not the pointer size, ++ which is implicitly POINTER_SIZE_UNITS, but the bias. */ ++ GOMP_MAP_POINTER = (GOMP_MAP_FLAG_SPECIAL_0 | 0), ++ /* Also internal, behaves like GOMP_MAP_TO, but additionally any ++ GOMP_MAP_POINTER records consecutive after it which have addresses ++ falling into that range will not be ignored if GOMP_MAP_TO_PSET wasn't ++ mapped already. */ ++ GOMP_MAP_TO_PSET = (GOMP_MAP_FLAG_SPECIAL_0 | 1), ++ /* Must already be present. */ ++ GOMP_MAP_FORCE_PRESENT = (GOMP_MAP_FLAG_SPECIAL_0 | 2), ++ /* Deallocate a mapping, without copying from device. */ ++ GOMP_MAP_DELETE = (GOMP_MAP_FLAG_SPECIAL_0 | 3), ++ /* Is a device pointer. OMP_CLAUSE_SIZE for these is unused; is implicitly ++ POINTER_SIZE_UNITS. */ ++ GOMP_MAP_FORCE_DEVICEPTR = (GOMP_MAP_FLAG_SPECIAL_1 | 0), ++ /* Do not map, copy bits for firstprivate instead. */ ++ /* OpenACC device_resident. */ ++ GOMP_MAP_DEVICE_RESIDENT = (GOMP_MAP_FLAG_SPECIAL_1 | 1), ++ /* OpenACC link. */ ++ GOMP_MAP_LINK = (GOMP_MAP_FLAG_SPECIAL_1 | 2), ++ /* Allocate. */ ++ GOMP_MAP_FIRSTPRIVATE = (GOMP_MAP_FLAG_SPECIAL | 0), ++ /* Similarly, but store the value in the pointer rather than ++ pointed by the pointer. */ ++ GOMP_MAP_FIRSTPRIVATE_INT = (GOMP_MAP_FLAG_SPECIAL | 1), ++ /* Pointer translate host address into device address and copy that ++ back to host. */ ++ GOMP_MAP_USE_DEVICE_PTR = (GOMP_MAP_FLAG_SPECIAL | 2), ++ /* Allocate a zero length array section. Prefer next non-zero length ++ mapping over previous non-zero length mapping over zero length mapping ++ at the address. If not already mapped, do nothing (and pointer translate ++ to NULL). */ ++ GOMP_MAP_ZERO_LEN_ARRAY_SECTION = (GOMP_MAP_FLAG_SPECIAL | 3), ++ /* Allocate. */ ++ GOMP_MAP_FORCE_ALLOC = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC), ++ /* ..., and copy to device. */ ++ GOMP_MAP_FORCE_TO = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TO), ++ /* ..., and copy from device. */ ++ GOMP_MAP_FORCE_FROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_FROM), ++ /* ..., and copy to and from device. */ ++ GOMP_MAP_FORCE_TOFROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TOFROM), ++ /* If not already present, allocate. And unconditionally copy to ++ device. */ ++ GOMP_MAP_ALWAYS_TO = (GOMP_MAP_FLAG_SPECIAL_2 | GOMP_MAP_TO), ++ /* If not already present, allocate. And unconditionally copy from ++ device. */ ++ GOMP_MAP_ALWAYS_FROM = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FROM), ++ /* If not already present, allocate. And unconditionally copy to and from ++ device. */ ++ GOMP_MAP_ALWAYS_TOFROM = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_TOFROM), ++ /* Map a sparse struct; the address is the base of the structure, alignment ++ it's required alignment, and size is the number of adjacent entries ++ that belong to the struct. The adjacent entries should be sorted by ++ increasing address, so it is easy to determine lowest needed address ++ (address of the first adjacent entry) and highest needed address ++ (address of the last adjacent entry plus its size). */ ++ GOMP_MAP_STRUCT = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FLAG_SPECIAL | 0), ++ /* On a location of a pointer/reference that is assumed to be already mapped ++ earlier, store the translated address of the preceeding mapping. ++ No refcount is bumped by this, and the store is done unconditionally. */ ++ GOMP_MAP_ALWAYS_POINTER = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FLAG_SPECIAL | 1), ++ /* Forced deallocation of zero length array section. */ ++ GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION ++ = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FLAG_SPECIAL | 3), ++ /* Decrement usage count and deallocate if zero. */ ++ GOMP_MAP_RELEASE = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_DELETE), ++ ++ /* Internal to GCC, not used in libgomp. */ ++ /* Do not map, but pointer assign a pointer instead. */ ++ GOMP_MAP_FIRSTPRIVATE_POINTER = (GOMP_MAP_LAST | 1), ++ /* Do not map, but pointer assign a reference instead. */ ++ GOMP_MAP_FIRSTPRIVATE_REFERENCE = (GOMP_MAP_LAST | 2) ++ }; ++ ++#define GOMP_MAP_COPY_TO_P(X) \ ++ (!((X) & GOMP_MAP_FLAG_SPECIAL) \ ++ && ((X) & GOMP_MAP_FLAG_TO)) ++ ++#define GOMP_MAP_COPY_FROM_P(X) \ ++ (!((X) & GOMP_MAP_FLAG_SPECIAL) \ ++ && ((X) & GOMP_MAP_FLAG_FROM)) ++ ++#define GOMP_MAP_POINTER_P(X) \ ++ ((X) == GOMP_MAP_POINTER) ++ ++#define GOMP_MAP_ALWAYS_TO_P(X) \ ++ (((X) == GOMP_MAP_ALWAYS_TO) || ((X) == GOMP_MAP_ALWAYS_TOFROM)) ++ ++#define GOMP_MAP_ALWAYS_FROM_P(X) \ ++ (((X) == GOMP_MAP_ALWAYS_FROM) || ((X) == GOMP_MAP_ALWAYS_TOFROM)) ++ ++#define GOMP_MAP_ALWAYS_P(X) \ ++ (GOMP_MAP_ALWAYS_TO_P (X) || ((X) == GOMP_MAP_ALWAYS_FROM)) ++ ++ ++/* Asynchronous behavior. Keep in sync with ++ libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t. */ ++ ++#define GOMP_ASYNC_NOVAL -1 ++#define GOMP_ASYNC_SYNC -2 ++ ++ ++/* Device codes. Keep in sync with ++ libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_device_t as well as ++ libgomp/libgomp-plugin.h. */ ++#define GOMP_DEVICE_NONE 0 ++#define GOMP_DEVICE_DEFAULT 1 ++#define GOMP_DEVICE_HOST 2 ++/* #define GOMP_DEVICE_HOST_NONSHM 3 removed. */ ++#define GOMP_DEVICE_NOT_HOST 4 ++#define GOMP_DEVICE_NVIDIA_PTX 5 ++#define GOMP_DEVICE_INTEL_MIC 6 ++#define GOMP_DEVICE_HSA 7 ++ ++#define GOMP_DEVICE_ICV -1 ++#define GOMP_DEVICE_HOST_FALLBACK -2 ++ ++/* GOMP_task/GOMP_taskloop* flags argument. */ ++#define GOMP_TASK_FLAG_UNTIED (1 << 0) ++#define GOMP_TASK_FLAG_FINAL (1 << 1) ++#define GOMP_TASK_FLAG_MERGEABLE (1 << 2) ++#define GOMP_TASK_FLAG_DEPEND (1 << 3) ++#define GOMP_TASK_FLAG_PRIORITY (1 << 4) ++#define GOMP_TASK_FLAG_UP (1 << 8) ++#define GOMP_TASK_FLAG_GRAINSIZE (1 << 9) ++#define GOMP_TASK_FLAG_IF (1 << 10) ++#define GOMP_TASK_FLAG_NOGROUP (1 << 11) ++ ++/* GOMP_target{_ext,update_ext,enter_exit_data} flags argument. */ ++#define GOMP_TARGET_FLAG_NOWAIT (1 << 0) ++#define GOMP_TARGET_FLAG_EXIT_DATA (1 << 1) ++/* Internal to libgomp. */ ++#define GOMP_TARGET_FLAG_UPDATE (1U << 31) ++ ++/* Versions of libgomp and device-specific plugins. GOMP_VERSION ++ should be incremented whenever an ABI-incompatible change is introduced ++ to the plugin interface defined in libgomp/libgomp.h. */ ++#define GOMP_VERSION 1 ++#define GOMP_VERSION_NVIDIA_PTX 1 ++#define GOMP_VERSION_INTEL_MIC 0 ++#define GOMP_VERSION_HSA 0 ++ ++#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV)) ++#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff) ++#define GOMP_VERSION_DEV(PACK) ((PACK) & 0xffff) ++ ++#define GOMP_DIM_GANG 0 ++#define GOMP_DIM_WORKER 1 ++#define GOMP_DIM_VECTOR 2 ++#define GOMP_DIM_MAX 3 ++#define GOMP_DIM_MASK(X) (1u << (X)) ++ ++/* Varadic launch arguments. End of list is marked by a zero. */ ++#define GOMP_LAUNCH_DIM 1 /* Launch dimensions, op = mask */ ++#define GOMP_LAUNCH_ASYNC 2 /* Async, op = cst val if not MAX */ ++#define GOMP_LAUNCH_WAIT 3 /* Waits, op = num waits. */ ++#define GOMP_LAUNCH_CODE_SHIFT 28 ++#define GOMP_LAUNCH_DEVICE_SHIFT 16 ++#define GOMP_LAUNCH_OP_SHIFT 0 ++#define GOMP_LAUNCH_PACK(CODE,DEVICE,OP) \ ++ (((CODE) << GOMP_LAUNCH_CODE_SHIFT) \ ++ | ((DEVICE) << GOMP_LAUNCH_DEVICE_SHIFT) \ ++ | ((OP) << GOMP_LAUNCH_OP_SHIFT)) ++#define GOMP_LAUNCH_CODE(X) (((X) >> GOMP_LAUNCH_CODE_SHIFT) & 0xf) ++#define GOMP_LAUNCH_DEVICE(X) (((X) >> GOMP_LAUNCH_DEVICE_SHIFT) & 0xfff) ++#define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff) ++#define GOMP_LAUNCH_OP_MAX 0xffff ++ ++/* Bitmask to apply in order to find out the intended device of a target ++ argument. */ ++#define GOMP_TARGET_ARG_DEVICE_MASK ((1 << 7) - 1) ++/* The target argument is significant for all devices. */ ++#define GOMP_TARGET_ARG_DEVICE_ALL 0 ++ ++/* Flag set when the subsequent element in the device-specific argument ++ values. */ ++#define GOMP_TARGET_ARG_SUBSEQUENT_PARAM (1 << 7) ++ ++/* Bitmask to apply to a target argument to find out the value identifier. */ ++#define GOMP_TARGET_ARG_ID_MASK (((1 << 8) - 1) << 8) ++/* Target argument index of NUM_TEAMS. */ ++#define GOMP_TARGET_ARG_NUM_TEAMS (1 << 8) ++/* Target argument index of THREAD_LIMIT. */ ++#define GOMP_TARGET_ARG_THREAD_LIMIT (2 << 8) ++ ++/* If the value is directly embeded in target argument, it should be a 16-bit ++ at most and shifted by this many bits. */ ++#define GOMP_TARGET_ARG_VALUE_SHIFT 16 ++ ++/* HSA specific data structures. */ ++ ++/* Identifiers of device-specific target arguments. */ ++#define GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES (1 << 8) ++ ++#endif +--- libgomp/oacc-mem.c.jj 2016-07-13 16:57:04.433535385 +0200 ++++ libgomp/oacc-mem.c 2016-07-14 15:39:44.644631308 +0200 +@@ -0,0 +1,204 @@ ++/* OpenACC Runtime initialization routines ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "openacc.h" ++#include "config.h" ++#include "libgomp.h" ++#include "gomp-constants.h" ++#include "oacc-int.h" ++#include ++#include ++#include ++ ++/* OpenACC is silent on how memory exhaustion is indicated. We return ++ NULL. */ ++ ++void * ++acc_malloc (size_t s) ++{ ++ if (!s) ++ return NULL; ++ ++ goacc_lazy_initialize (); ++ return malloc (s); ++} ++ ++/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event ++ the device address is mapped. We choose to check if it mapped, ++ and if it is, to unmap it. */ ++void ++acc_free (void *d) ++{ ++ return free (d); ++} ++ ++void ++acc_memcpy_to_device (void *d, void *h, size_t s) ++{ ++ memmove (d, h, s); ++} ++ ++void ++acc_memcpy_from_device (void *h, void *d, size_t s) ++{ ++ memmove (h, d, s); ++} ++ ++/* Return the device pointer that corresponds to host data H. Or NULL ++ if no mapping. */ ++ ++void * ++acc_deviceptr (void *h) ++{ ++ goacc_lazy_initialize (); ++ return h; ++} ++ ++/* Return the host pointer that corresponds to device data D. Or NULL ++ if no mapping. */ ++ ++void * ++acc_hostptr (void *d) ++{ ++ goacc_lazy_initialize (); ++ return d; ++} ++ ++/* Return 1 if host data [H,+S] is present on the device. */ ++ ++int ++acc_is_present (void *h, size_t s) ++{ ++ if (!s || !h) ++ return 0; ++ ++ goacc_lazy_initialize (); ++ return h != NULL; ++} ++ ++/* Create a mapping for host [H,+S] -> device [D,+S] */ ++ ++void ++acc_map_data (void *h, void *d, size_t s) ++{ ++ goacc_lazy_initialize (); ++ ++ if (d != h) ++ gomp_fatal ("cannot map data on shared-memory system"); ++} ++ ++void ++acc_unmap_data (void *h) ++{ ++} ++ ++#define FLAG_PRESENT (1 << 0) ++#define FLAG_CREATE (1 << 1) ++#define FLAG_COPY (1 << 2) ++ ++static void * ++present_create_copy (unsigned f, void *h, size_t s) ++{ ++ if (!h || !s) ++ gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); ++ ++ goacc_lazy_initialize (); ++ return h; ++} ++ ++void * ++acc_create (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_CREATE, h, s); ++} ++ ++void * ++acc_copyin (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s); ++} ++ ++void * ++acc_present_or_create (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); ++} ++ ++void * ++acc_present_or_copyin (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); ++} ++ ++#define FLAG_COPYOUT (1 << 0) ++ ++static void ++delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) ++{ ++} ++ ++void ++acc_delete (void *h , size_t s) ++{ ++ delete_copyout (0, h, s, __FUNCTION__); ++} ++ ++void ++acc_copyout (void *h, size_t s) ++{ ++ delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__); ++} ++ ++static void ++update_dev_host (int is_dev, void *h, size_t s) ++{ ++ goacc_lazy_initialize (); ++} ++ ++void ++acc_update_device (void *h, size_t s) ++{ ++ update_dev_host (1, h, s); ++} ++ ++void ++acc_update_self (void *h, size_t s) ++{ ++ update_dev_host (0, h, s); ++} ++ ++void ++gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, ++ void *kinds) ++{ ++} ++ ++void ++gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) ++{ ++} +--- libgomp/oacc-plugin.h.jj 2016-07-13 16:57:13.487423121 +0200 ++++ libgomp/oacc-plugin.h 2016-07-13 16:57:13.487423121 +0200 +@@ -0,0 +1,33 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef OACC_PLUGIN_H ++#define OACC_PLUGIN_H 1 ++ ++extern void GOMP_PLUGIN_async_unmap_vars (void *, int); ++extern void *GOMP_PLUGIN_acc_thread (void); ++ ++#endif +--- libgomp/taskloop.c.jj 2016-07-13 16:57:18.935355570 +0200 ++++ libgomp/taskloop.c 2016-07-13 16:57:18.935355570 +0200 +@@ -0,0 +1,340 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Jakub Jelinek . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This file handles the taskloop construct. It is included twice, once ++ for the long and once for unsigned long long variant. */ ++ ++/* Called when encountering an explicit task directive. If IF_CLAUSE is ++ false, then we must not delay in executing the task. If UNTIED is true, ++ then the task may be executed by any member of the team. */ ++ ++void ++GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), ++ long arg_size, long arg_align, unsigned flags, ++ unsigned long num_tasks, int priority, ++ TYPE start, TYPE end, TYPE step) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ ++#ifdef HAVE_BROKEN_POSIX_SEMAPHORES ++ /* If pthread_mutex_* is used for omp_*lock*, then each task must be ++ tied to one thread all the time. This means UNTIED tasks must be ++ tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN ++ might be running on different thread than FN. */ ++ if (cpyfn) ++ flags &= ~GOMP_TASK_FLAG_IF; ++ flags &= ~GOMP_TASK_FLAG_UNTIED; ++#endif ++ ++ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ ++ if (team && gomp_team_barrier_cancelled (&team->barrier)) ++ return; ++ ++#ifdef TYPE_is_long ++ TYPE s = step; ++ if (step > 0) ++ { ++ if (start >= end) ++ return; ++ s--; ++ } ++ else ++ { ++ if (start <= end) ++ return; ++ s++; ++ } ++ UTYPE n = (end - start + s) / step; ++#else ++ UTYPE n; ++ if (flags & GOMP_TASK_FLAG_UP) ++ { ++ if (start >= end) ++ return; ++ n = (end - start + step - 1) / step; ++ } ++ else ++ { ++ if (start <= end) ++ return; ++ n = (start - end - step - 1) / -step; ++ } ++#endif ++ ++ TYPE task_step = step; ++ unsigned long nfirst = n; ++ if (flags & GOMP_TASK_FLAG_GRAINSIZE) ++ { ++ unsigned long grainsize = num_tasks; ++#ifdef TYPE_is_long ++ num_tasks = n / grainsize; ++#else ++ UTYPE ndiv = n / grainsize; ++ num_tasks = ndiv; ++ if (num_tasks != ndiv) ++ num_tasks = ~0UL; ++#endif ++ if (num_tasks <= 1) ++ { ++ num_tasks = 1; ++ task_step = end - start; ++ } ++ else if (num_tasks >= grainsize ++#ifndef TYPE_is_long ++ && num_tasks != ~0UL ++#endif ++ ) ++ { ++ UTYPE mul = num_tasks * grainsize; ++ task_step = (TYPE) grainsize * step; ++ if (mul != n) ++ { ++ task_step += step; ++ nfirst = n - mul - 1; ++ } ++ } ++ else ++ { ++ UTYPE div = n / num_tasks; ++ UTYPE mod = n % num_tasks; ++ task_step = (TYPE) div * step; ++ if (mod) ++ { ++ task_step += step; ++ nfirst = mod - 1; ++ } ++ } ++ } ++ else ++ { ++ if (num_tasks == 0) ++ num_tasks = team ? team->nthreads : 1; ++ if (num_tasks >= n) ++ num_tasks = n; ++ else ++ { ++ UTYPE div = n / num_tasks; ++ UTYPE mod = n % num_tasks; ++ task_step = (TYPE) div * step; ++ if (mod) ++ { ++ task_step += step; ++ nfirst = mod - 1; ++ } ++ } ++ } ++ ++ if (flags & GOMP_TASK_FLAG_NOGROUP) ++ { ++ if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled) ++ return; ++ } ++ else ++ ialias_call (GOMP_taskgroup_start) (); ++ ++ if (priority > gomp_max_task_priority_var) ++ priority = gomp_max_task_priority_var; ++ ++ if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL ++ || (thr->task && thr->task->final_task) ++ || team->task_count + num_tasks > 64 * team->nthreads) ++ { ++ unsigned long i; ++ if (__builtin_expect (cpyfn != NULL, 0)) ++ { ++ struct gomp_task task[num_tasks]; ++ struct gomp_task *parent = thr->task; ++ arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); ++ char buf[num_tasks * arg_size + arg_align - 1]; ++ char *arg = (char *) (((uintptr_t) buf + arg_align - 1) ++ & ~(uintptr_t) (arg_align - 1)); ++ char *orig_arg = arg; ++ for (i = 0; i < num_tasks; i++) ++ { ++ gomp_init_task (&task[i], parent, gomp_icv (false)); ++ task[i].priority = priority; ++ task[i].kind = GOMP_TASK_UNDEFERRED; ++ task[i].final_task = (thr->task && thr->task->final_task) ++ || (flags & GOMP_TASK_FLAG_FINAL); ++ if (thr->task) ++ { ++ task[i].in_tied_task = thr->task->in_tied_task; ++ task[i].taskgroup = thr->task->taskgroup; ++ } ++ thr->task = &task[i]; ++ cpyfn (arg, data); ++ arg += arg_size; ++ } ++ arg = orig_arg; ++ for (i = 0; i < num_tasks; i++) ++ { ++ thr->task = &task[i]; ++ ((TYPE *)arg)[0] = start; ++ start += task_step; ++ ((TYPE *)arg)[1] = start; ++ if (i == nfirst) ++ task_step -= step; ++ fn (arg); ++ arg += arg_size; ++ if (!priority_queue_empty_p (&task[i].children_queue, ++ MEMMODEL_RELAXED)) ++ { ++ gomp_mutex_lock (&team->task_lock); ++ gomp_clear_parent (&task[i].children_queue); ++ gomp_mutex_unlock (&team->task_lock); ++ } ++ gomp_end_task (); ++ } ++ } ++ else ++ for (i = 0; i < num_tasks; i++) ++ { ++ struct gomp_task task; ++ ++ gomp_init_task (&task, thr->task, gomp_icv (false)); ++ task.priority = priority; ++ task.kind = GOMP_TASK_UNDEFERRED; ++ task.final_task = (thr->task && thr->task->final_task) ++ || (flags & GOMP_TASK_FLAG_FINAL); ++ if (thr->task) ++ { ++ task.in_tied_task = thr->task->in_tied_task; ++ task.taskgroup = thr->task->taskgroup; ++ } ++ thr->task = &task; ++ ((TYPE *)data)[0] = start; ++ start += task_step; ++ ((TYPE *)data)[1] = start; ++ if (i == nfirst) ++ task_step -= step; ++ fn (data); ++ if (!priority_queue_empty_p (&task.children_queue, ++ MEMMODEL_RELAXED)) ++ { ++ gomp_mutex_lock (&team->task_lock); ++ gomp_clear_parent (&task.children_queue); ++ gomp_mutex_unlock (&team->task_lock); ++ } ++ gomp_end_task (); ++ } ++ } ++ else ++ { ++ struct gomp_task *tasks[num_tasks]; ++ struct gomp_task *parent = thr->task; ++ struct gomp_taskgroup *taskgroup = parent->taskgroup; ++ char *arg; ++ int do_wake; ++ unsigned long i; ++ ++ for (i = 0; i < num_tasks; i++) ++ { ++ struct gomp_task *task ++ = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); ++ tasks[i] = task; ++ arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) ++ & ~(uintptr_t) (arg_align - 1)); ++ gomp_init_task (task, parent, gomp_icv (false)); ++ task->priority = priority; ++ task->kind = GOMP_TASK_UNDEFERRED; ++ task->in_tied_task = parent->in_tied_task; ++ task->taskgroup = taskgroup; ++ thr->task = task; ++ if (cpyfn) ++ { ++ cpyfn (arg, data); ++ task->copy_ctors_done = true; ++ } ++ else ++ memcpy (arg, data, arg_size); ++ ((TYPE *)arg)[0] = start; ++ start += task_step; ++ ((TYPE *)arg)[1] = start; ++ if (i == nfirst) ++ task_step -= step; ++ thr->task = parent; ++ task->kind = GOMP_TASK_WAITING; ++ task->fn = fn; ++ task->fn_data = arg; ++ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; ++ } ++ gomp_mutex_lock (&team->task_lock); ++ /* If parallel or taskgroup has been cancelled, don't start new ++ tasks. */ ++ if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) ++ || (taskgroup && taskgroup->cancelled)) ++ && cpyfn == NULL, 0)) ++ { ++ gomp_mutex_unlock (&team->task_lock); ++ for (i = 0; i < num_tasks; i++) ++ { ++ gomp_finish_task (tasks[i]); ++ free (tasks[i]); ++ } ++ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) ++ ialias_call (GOMP_taskgroup_end) (); ++ return; ++ } ++ if (taskgroup) ++ taskgroup->num_children += num_tasks; ++ for (i = 0; i < num_tasks; i++) ++ { ++ struct gomp_task *task = tasks[i]; ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, ++ task, priority, ++ PRIORITY_INSERT_BEGIN, ++ /*last_parent_depends_on=*/false, ++ task->parent_depends_on); ++ if (taskgroup) ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, priority, PRIORITY_INSERT_BEGIN, ++ /*last_parent_depends_on=*/false, ++ task->parent_depends_on); ++ priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority, ++ PRIORITY_INSERT_END, ++ /*last_parent_depends_on=*/false, ++ task->parent_depends_on); ++ ++team->task_count; ++ ++team->task_queued_count; ++ } ++ gomp_team_barrier_set_task_pending (&team->barrier); ++ if (team->task_running_count + !parent->in_tied_task ++ < team->nthreads) ++ { ++ do_wake = team->nthreads - team->task_running_count ++ - !parent->in_tied_task; ++ if ((unsigned long) do_wake > num_tasks) ++ do_wake = num_tasks; ++ } ++ else ++ do_wake = 0; ++ gomp_mutex_unlock (&team->task_lock); ++ if (do_wake) ++ gomp_team_barrier_wake (&team->barrier, do_wake); ++ } ++ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) ++ ialias_call (GOMP_taskgroup_end) (); ++} +--- libgomp/priority_queue.h.jj 2016-07-13 16:57:04.438535323 +0200 ++++ libgomp/priority_queue.h 2016-07-13 16:57:04.438535323 +0200 +@@ -0,0 +1,485 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Aldy Hernandez . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Header file for a priority queue of GOMP tasks. */ ++ ++/* ?? Perhaps all the priority_tree_* functions are complex and rare ++ enough to go out-of-line and be moved to priority_queue.c. ?? */ ++ ++#ifndef _PRIORITY_QUEUE_H_ ++#define _PRIORITY_QUEUE_H_ ++ ++/* One task. */ ++ ++struct priority_node ++{ ++ /* Next and previous chains in a circular doubly linked list for ++ tasks within this task's priority. */ ++ struct priority_node *next, *prev; ++}; ++ ++/* All tasks within the same priority. */ ++ ++struct priority_list ++{ ++ /* Priority of the tasks in this set. */ ++ int priority; ++ ++ /* Tasks. */ ++ struct priority_node *tasks; ++ ++ /* This points to the last of the higher priority WAITING tasks. ++ Remember that for the children queue, we have: ++ ++ parent_depends_on WAITING tasks. ++ !parent_depends_on WAITING tasks. ++ TIED tasks. ++ ++ This is a pointer to the last of the parent_depends_on WAITING ++ tasks which are essentially, higher priority items within their ++ priority. */ ++ struct priority_node *last_parent_depends_on; ++}; ++ ++/* Another splay tree instantiation, for priority_list's. */ ++typedef struct prio_splay_tree_node_s *prio_splay_tree_node; ++typedef struct prio_splay_tree_s *prio_splay_tree; ++typedef struct prio_splay_tree_key_s *prio_splay_tree_key; ++struct prio_splay_tree_key_s { ++ /* This structure must only containing a priority_list, as we cast ++ prio_splay_tree_key to priority_list throughout. */ ++ struct priority_list l; ++}; ++#define splay_tree_prefix prio ++#include "splay-tree.h" ++ ++/* The entry point into a priority queue of tasks. ++ ++ There are two alternate implementations with which to store tasks: ++ as a balanced tree of sorts, or as a simple list of tasks. If ++ there are only priority-0 items (ROOT is NULL), we use the simple ++ list, otherwise (ROOT is non-NULL) we use the tree. */ ++ ++struct priority_queue ++{ ++ /* If t.root != NULL, this is a splay tree of priority_lists to hold ++ all tasks. This is only used if multiple priorities are in play, ++ otherwise we use the priority_list `l' below to hold all ++ (priority-0) tasks. */ ++ struct prio_splay_tree_s t; ++ ++ /* If T above is NULL, only priority-0 items exist, so keep them ++ in a simple list. */ ++ struct priority_list l; ++}; ++ ++enum priority_insert_type { ++ /* Insert at the beginning of a priority list. */ ++ PRIORITY_INSERT_BEGIN, ++ /* Insert at the end of a priority list. */ ++ PRIORITY_INSERT_END ++}; ++ ++/* Used to determine in which queue a given priority node belongs in. ++ See pnode field of gomp_task. */ ++ ++enum priority_queue_type ++{ ++ PQ_TEAM, /* Node belongs in gomp_team's task_queue. */ ++ PQ_CHILDREN, /* Node belongs in parent's children_queue. */ ++ PQ_TASKGROUP, /* Node belongs in taskgroup->taskgroup_queue. */ ++ PQ_IGNORED = 999 ++}; ++ ++/* Priority queue implementation prototypes. */ ++ ++extern bool priority_queue_task_in_queue_p (enum priority_queue_type, ++ struct priority_queue *, ++ struct gomp_task *); ++extern void priority_queue_dump (enum priority_queue_type, ++ struct priority_queue *); ++extern void priority_queue_verify (enum priority_queue_type, ++ struct priority_queue *, bool); ++extern void priority_tree_remove (enum priority_queue_type, ++ struct priority_queue *, ++ struct priority_node *); ++extern struct gomp_task *priority_tree_next_task (enum priority_queue_type, ++ struct priority_queue *, ++ enum priority_queue_type, ++ struct priority_queue *, ++ bool *); ++ ++/* Return TRUE if there is more than one priority in HEAD. This is ++ used throughout to to choose between the fast path (priority 0 only ++ items) and a world with multiple priorities. */ ++ ++static inline bool ++priority_queue_multi_p (struct priority_queue *head) ++{ ++ return __builtin_expect (head->t.root != NULL, 0); ++} ++ ++/* Initialize a priority queue. */ ++ ++static inline void ++priority_queue_init (struct priority_queue *head) ++{ ++ head->t.root = NULL; ++ /* To save a few microseconds, we don't initialize head->l.priority ++ to 0 here. It is implied that priority will be 0 if head->t.root ++ == NULL. ++ ++ priority_tree_insert() will fix this when we encounter multiple ++ priorities. */ ++ head->l.tasks = NULL; ++ head->l.last_parent_depends_on = NULL; ++} ++ ++static inline void ++priority_queue_free (struct priority_queue *head) ++{ ++ /* There's nothing to do, as tasks were freed as they were removed ++ in priority_queue_remove. */ ++} ++ ++/* Forward declarations. */ ++static inline size_t priority_queue_offset (enum priority_queue_type); ++static inline struct gomp_task *priority_node_to_task ++ (enum priority_queue_type, ++ struct priority_node *); ++static inline struct priority_node *task_to_priority_node ++ (enum priority_queue_type, ++ struct gomp_task *); ++ ++/* Return TRUE if priority queue HEAD is empty. ++ ++ MODEL IS MEMMODEL_ACQUIRE if we should use an acquire atomic to ++ read from the root of the queue, otherwise MEMMODEL_RELAXED if we ++ should use a plain load. */ ++ ++static inline _Bool ++priority_queue_empty_p (struct priority_queue *head, enum memmodel model) ++{ ++ /* Note: The acquire barriers on the loads here synchronize with ++ the write of a NULL in gomp_task_run_post_remove_parent. It is ++ not necessary that we synchronize with other non-NULL writes at ++ this point, but we must ensure that all writes to memory by a ++ child thread task work function are seen before we exit from ++ GOMP_taskwait. */ ++ if (priority_queue_multi_p (head)) ++ { ++ if (model == MEMMODEL_ACQUIRE) ++ return __atomic_load_n (&head->t.root, MEMMODEL_ACQUIRE) == NULL; ++ return head->t.root == NULL; ++ } ++ if (model == MEMMODEL_ACQUIRE) ++ return __atomic_load_n (&head->l.tasks, MEMMODEL_ACQUIRE) == NULL; ++ return head->l.tasks == NULL; ++} ++ ++/* Look for a given PRIORITY in HEAD. Return it if found, otherwise ++ return NULL. This only applies to the tree variant in HEAD. There ++ is no point in searching for priorities in HEAD->L. */ ++ ++static inline struct priority_list * ++priority_queue_lookup_priority (struct priority_queue *head, int priority) ++{ ++ if (head->t.root == NULL) ++ return NULL; ++ struct prio_splay_tree_key_s k; ++ k.l.priority = priority; ++ return (struct priority_list *) ++ prio_splay_tree_lookup (&head->t, &k); ++} ++ ++/* Insert task in DATA, with PRIORITY, in the priority list in LIST. ++ LIST contains items of type TYPE. ++ ++ If POS is PRIORITY_INSERT_BEGIN, the new task is inserted at the ++ top of its respective priority. If POS is PRIORITY_INSERT_END, the ++ task is inserted at the end of its priority. ++ ++ If ADJUST_PARENT_DEPENDS_ON is TRUE, LIST is a children queue, and ++ we must keep track of higher and lower priority WAITING tasks by ++ keeping the queue's last_parent_depends_on field accurate. This ++ only applies to the children queue, and the caller must ensure LIST ++ is a children queue in this case. ++ ++ If ADJUST_PARENT_DEPENDS_ON is TRUE, TASK_IS_PARENT_DEPENDS_ON is ++ set to the task's parent_depends_on field. If ++ ADJUST_PARENT_DEPENDS_ON is FALSE, this field is irrelevant. ++ ++ Return the new priority_node. */ ++ ++static inline void ++priority_list_insert (enum priority_queue_type type, ++ struct priority_list *list, ++ struct gomp_task *task, ++ int priority, ++ enum priority_insert_type pos, ++ bool adjust_parent_depends_on, ++ bool task_is_parent_depends_on) ++{ ++ struct priority_node *node = task_to_priority_node (type, task); ++ if (list->tasks) ++ { ++ /* If we are keeping track of higher/lower priority items, ++ but this is a lower priority WAITING task ++ (parent_depends_on != NULL), put it after all ready to ++ run tasks. See the comment in ++ priority_queue_upgrade_task for a visual on how tasks ++ should be organized. */ ++ if (adjust_parent_depends_on ++ && pos == PRIORITY_INSERT_BEGIN ++ && list->last_parent_depends_on ++ && !task_is_parent_depends_on) ++ { ++ struct priority_node *last_parent_depends_on ++ = list->last_parent_depends_on; ++ node->next = last_parent_depends_on->next; ++ node->prev = last_parent_depends_on; ++ } ++ /* Otherwise, put it at the top/bottom of the queue. */ ++ else ++ { ++ node->next = list->tasks; ++ node->prev = list->tasks->prev; ++ if (pos == PRIORITY_INSERT_BEGIN) ++ list->tasks = node; ++ } ++ node->next->prev = node; ++ node->prev->next = node; ++ } ++ else ++ { ++ node->next = node; ++ node->prev = node; ++ list->tasks = node; ++ } ++ if (adjust_parent_depends_on ++ && list->last_parent_depends_on == NULL ++ && task_is_parent_depends_on) ++ list->last_parent_depends_on = node; ++} ++ ++/* Tree version of priority_list_insert. */ ++ ++static inline void ++priority_tree_insert (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task, ++ int priority, ++ enum priority_insert_type pos, ++ bool adjust_parent_depends_on, ++ bool task_is_parent_depends_on) ++{ ++ if (__builtin_expect (head->t.root == NULL, 0)) ++ { ++ /* The first time around, transfer any priority 0 items to the ++ tree. */ ++ if (head->l.tasks != NULL) ++ { ++ prio_splay_tree_node k = gomp_malloc (sizeof (*k)); ++ k->left = NULL; ++ k->right = NULL; ++ k->key.l.priority = 0; ++ k->key.l.tasks = head->l.tasks; ++ k->key.l.last_parent_depends_on = head->l.last_parent_depends_on; ++ prio_splay_tree_insert (&head->t, k); ++ head->l.tasks = NULL; ++ } ++ } ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, priority); ++ if (!list) ++ { ++ prio_splay_tree_node k = gomp_malloc (sizeof (*k)); ++ k->left = NULL; ++ k->right = NULL; ++ k->key.l.priority = priority; ++ k->key.l.tasks = NULL; ++ k->key.l.last_parent_depends_on = NULL; ++ prio_splay_tree_insert (&head->t, k); ++ list = &k->key.l; ++ } ++ priority_list_insert (type, list, task, priority, pos, ++ adjust_parent_depends_on, ++ task_is_parent_depends_on); ++} ++ ++/* Generic version of priority_*_insert. */ ++ ++static inline void ++priority_queue_insert (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task, ++ int priority, ++ enum priority_insert_type pos, ++ bool adjust_parent_depends_on, ++ bool task_is_parent_depends_on) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to insert existing task %p", task); ++#endif ++ if (priority_queue_multi_p (head) || __builtin_expect (priority > 0, 0)) ++ priority_tree_insert (type, head, task, priority, pos, ++ adjust_parent_depends_on, ++ task_is_parent_depends_on); ++ else ++ priority_list_insert (type, &head->l, task, priority, pos, ++ adjust_parent_depends_on, ++ task_is_parent_depends_on); ++} ++ ++/* If multiple priorities are in play, return the highest priority ++ task from within Q1 and Q2, while giving preference to tasks from ++ Q1. If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to ++ TRUE, otherwise it is set to FALSE. ++ ++ If multiple priorities are not in play (only 0 priorities are ++ available), the next task is chosen exclusively from Q1. ++ ++ As a special case, Q2 can be NULL, in which case, we just choose ++ the highest priority WAITING task in Q1. This is an optimization ++ to speed up looking through only one queue. ++ ++ We assume Q1 has at least one item. */ ++ ++static inline struct gomp_task * ++priority_queue_next_task (enum priority_queue_type t1, ++ struct priority_queue *q1, ++ enum priority_queue_type t2, ++ struct priority_queue *q2, ++ bool *q1_chosen_p) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (priority_queue_empty_p (q1, MEMMODEL_RELAXED)) ++ gomp_fatal ("priority_queue_next_task: Q1 is empty"); ++#endif ++ if (priority_queue_multi_p (q1)) ++ { ++ struct gomp_task *t ++ = priority_tree_next_task (t1, q1, t2, q2, q1_chosen_p); ++ /* If T is NULL, there are no WAITING tasks in Q1. In which ++ case, return any old (non-waiting) task which will cause the ++ caller to do the right thing when checking T->KIND == ++ GOMP_TASK_WAITING. */ ++ if (!t) ++ { ++#if _LIBGOMP_CHECKING_ ++ if (*q1_chosen_p == false) ++ gomp_fatal ("priority_queue_next_task inconsistency"); ++#endif ++ return priority_node_to_task (t1, q1->t.root->key.l.tasks); ++ } ++ return t; ++ } ++ else ++ { ++ *q1_chosen_p = true; ++ return priority_node_to_task (t1, q1->l.tasks); ++ } ++} ++ ++/* Remove NODE from LIST. ++ ++ If we are removing the one and only item in the list, and MODEL is ++ MEMMODEL_RELEASE, use an atomic release to clear the list. ++ ++ If the list becomes empty after the remove, return TRUE. */ ++ ++static inline bool ++priority_list_remove (struct priority_list *list, ++ struct priority_node *node, ++ enum memmodel model) ++{ ++ bool empty = false; ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ if (list->tasks == node) ++ { ++ if (node->next != node) ++ list->tasks = node->next; ++ else ++ { ++ /* We access task->children in GOMP_taskwait outside of ++ the task lock mutex region, so need a release barrier ++ here to ensure memory written by child_task->fn above ++ is flushed before the NULL is written. */ ++ if (model == MEMMODEL_RELEASE) ++ __atomic_store_n (&list->tasks, NULL, MEMMODEL_RELEASE); ++ else ++ list->tasks = NULL; ++ empty = true; ++ goto remove_out; ++ } ++ } ++remove_out: ++#if _LIBGOMP_CHECKING_ ++ memset (node, 0xaf, sizeof (*node)); ++#endif ++ return empty; ++} ++ ++/* This is the generic version of priority_list_remove. ++ ++ Remove NODE from priority queue HEAD. HEAD contains tasks of type TYPE. ++ ++ If we are removing the one and only item in the priority queue and ++ MODEL is MEMMODEL_RELEASE, use an atomic release to clear the queue. ++ ++ If the queue becomes empty after the remove, return TRUE. */ ++ ++static inline bool ++priority_queue_remove (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task, ++ enum memmodel model) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (!priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to remove missing task %p", task); ++#endif ++ if (priority_queue_multi_p (head)) ++ { ++ priority_tree_remove (type, head, task_to_priority_node (type, task)); ++ if (head->t.root == NULL) ++ { ++ if (model == MEMMODEL_RELEASE) ++ /* Errr, we store NULL twice, the alternative would be to ++ use an atomic release directly in the splay tree ++ routines. Worth it? */ ++ __atomic_store_n (&head->t.root, NULL, MEMMODEL_RELEASE); ++ return true; ++ } ++ return false; ++ } ++ else ++ return priority_list_remove (&head->l, ++ task_to_priority_node (type, task), model); ++} ++ ++#endif /* _PRIORITY_QUEUE_H_ */ +--- libgomp/priority_queue.c.jj 2016-07-13 16:57:04.435535360 +0200 ++++ libgomp/priority_queue.c 2016-07-13 16:57:04.435535360 +0200 +@@ -0,0 +1,300 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Aldy Hernandez . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Priority queue implementation of GOMP tasks. */ ++ ++#include "libgomp.h" ++ ++#if _LIBGOMP_CHECKING_ ++#include ++ ++/* Sanity check to verify whether a TASK is in LIST. Return TRUE if ++ found, FALSE otherwise. ++ ++ TYPE is the type of priority queue this task resides in. */ ++ ++static inline bool ++priority_queue_task_in_list_p (enum priority_queue_type type, ++ struct priority_list *list, ++ struct gomp_task *task) ++{ ++ struct priority_node *p = list->tasks; ++ do ++ { ++ if (priority_node_to_task (type, p) == task) ++ return true; ++ p = p->next; ++ } ++ while (p != list->tasks); ++ return false; ++} ++ ++/* Tree version of priority_queue_task_in_list_p. */ ++ ++static inline bool ++priority_queue_task_in_tree_p (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) ++{ ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, task->priority); ++ if (!list) ++ return false; ++ return priority_queue_task_in_list_p (type, list, task); ++} ++ ++/* Generic version of priority_queue_task_in_list_p that works for ++ trees or lists. */ ++ ++bool ++priority_queue_task_in_queue_p (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) ++{ ++ if (priority_queue_empty_p (head, MEMMODEL_RELAXED)) ++ return false; ++ if (priority_queue_multi_p (head)) ++ return priority_queue_task_in_tree_p (type, head, task); ++ else ++ return priority_queue_task_in_list_p (type, &head->l, task); ++} ++ ++/* Sanity check LIST to make sure the tasks therein are in the right ++ order. LIST is a priority list of type TYPE. ++ ++ The expected order is that GOMP_TASK_WAITING tasks come before ++ GOMP_TASK_TIED/GOMP_TASK_ASYNC_RUNNING ones. ++ ++ If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING ++ tasks come before !parent_depends_on WAITING tasks. This is only ++ applicable to the children queue, and the caller is expected to ++ ensure that we are verifying the children queue. */ ++ ++static void ++priority_list_verify (enum priority_queue_type type, ++ struct priority_list *list, bool check_deps) ++{ ++ bool seen_tied = false; ++ bool seen_plain_waiting = false; ++ struct priority_node *p = list->tasks; ++ while (1) ++ { ++ struct gomp_task *t = priority_node_to_task (type, p); ++ if (seen_tied && t->kind == GOMP_TASK_WAITING) ++ gomp_fatal ("priority_queue_verify: WAITING task after TIED"); ++ if (t->kind >= GOMP_TASK_TIED) ++ seen_tied = true; ++ else if (check_deps && t->kind == GOMP_TASK_WAITING) ++ { ++ if (t->parent_depends_on) ++ { ++ if (seen_plain_waiting) ++ gomp_fatal ("priority_queue_verify: " ++ "parent_depends_on after !parent_depends_on"); ++ } ++ else ++ seen_plain_waiting = true; ++ } ++ p = p->next; ++ if (p == list->tasks) ++ break; ++ } ++} ++ ++/* Callback type for priority_tree_verify_callback. */ ++struct cbtype ++{ ++ enum priority_queue_type type; ++ bool check_deps; ++}; ++ ++/* Verify every task in NODE. ++ ++ Callback for splay_tree_foreach. */ ++ ++static void ++priority_tree_verify_callback (prio_splay_tree_key key, void *data) ++{ ++ struct cbtype *cb = (struct cbtype *) data; ++ priority_list_verify (cb->type, &key->l, cb->check_deps); ++} ++ ++/* Generic version of priority_list_verify. ++ ++ Sanity check HEAD to make sure the tasks therein are in the right ++ order. The priority_queue holds tasks of type TYPE. ++ ++ If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING ++ tasks come before !parent_depends_on WAITING tasks. This is only ++ applicable to the children queue, and the caller is expected to ++ ensure that we are verifying the children queue. */ ++ ++void ++priority_queue_verify (enum priority_queue_type type, ++ struct priority_queue *head, bool check_deps) ++{ ++ if (priority_queue_empty_p (head, MEMMODEL_RELAXED)) ++ return; ++ if (priority_queue_multi_p (head)) ++ { ++ struct cbtype cb = { type, check_deps }; ++ prio_splay_tree_foreach (&head->t, ++ priority_tree_verify_callback, &cb); ++ } ++ else ++ priority_list_verify (type, &head->l, check_deps); ++} ++#endif /* _LIBGOMP_CHECKING_ */ ++ ++/* Remove NODE from priority queue HEAD, wherever it may be inside the ++ tree. HEAD contains tasks of type TYPE. */ ++ ++void ++priority_tree_remove (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct priority_node *node) ++{ ++ /* ?? The only reason this function is not inlined is because we ++ need to find the priority within gomp_task (which has not been ++ completely defined in the header file). If the lack of inlining ++ is a concern, we could pass the priority number as a ++ parameter, or we could move this to libgomp.h. */ ++ int priority = priority_node_to_task (type, node)->priority; ++ ++ /* ?? We could avoid this lookup by keeping a pointer to the key in ++ the priority_node. */ ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, priority); ++#if _LIBGOMP_CHECKING_ ++ if (!list) ++ gomp_fatal ("Unable to find priority %d", priority); ++#endif ++ /* If NODE was the last in its priority, clean up the priority. */ ++ if (priority_list_remove (list, node, MEMMODEL_RELAXED)) ++ { ++ prio_splay_tree_remove (&head->t, (prio_splay_tree_key) list); ++ list->tasks = NULL; ++#if _LIBGOMP_CHECKING_ ++ memset (list, 0xaf, sizeof (*list)); ++#endif ++ free (list); ++ } ++} ++ ++/* Return the highest priority WAITING task in a splay tree NODE. If ++ there are no WAITING tasks available, return NULL. ++ ++ NODE is a priority list containing tasks of type TYPE. ++ ++ The right most node in a tree contains the highest priority. ++ Recurse down to find such a node. If the task at that max node is ++ not WAITING, bubble back up and look at the remaining tasks ++ in-order. */ ++ ++static struct gomp_task * ++priority_tree_next_task_1 (enum priority_queue_type type, ++ prio_splay_tree_node node) ++{ ++ again: ++ if (!node) ++ return NULL; ++ struct gomp_task *ret = priority_tree_next_task_1 (type, node->right); ++ if (ret) ++ return ret; ++ ret = priority_node_to_task (type, node->key.l.tasks); ++ if (ret->kind == GOMP_TASK_WAITING) ++ return ret; ++ node = node->left; ++ goto again; ++} ++ ++/* Return the highest priority WAITING task from within Q1 and Q2, ++ while giving preference to tasks from Q1. Q1 is a queue containing ++ items of type TYPE1. Q2 is a queue containing items of type TYPE2. ++ ++ Since we are mostly interested in Q1, if there are no WAITING tasks ++ in Q1, we don't bother checking Q2, and just return NULL. ++ ++ As a special case, Q2 can be NULL, in which case, we just choose ++ the highest priority WAITING task in Q1. This is an optimization ++ to speed up looking through only one queue. ++ ++ If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to ++ TRUE, otherwise it is set to FALSE. */ ++ ++struct gomp_task * ++priority_tree_next_task (enum priority_queue_type type1, ++ struct priority_queue *q1, ++ enum priority_queue_type type2, ++ struct priority_queue *q2, ++ bool *q1_chosen_p) ++{ ++ struct gomp_task *t1 = priority_tree_next_task_1 (type1, q1->t.root); ++ if (!t1 ++ /* Special optimization when only searching through one queue. */ ++ || !q2) ++ { ++ *q1_chosen_p = true; ++ return t1; ++ } ++ struct gomp_task *t2 = priority_tree_next_task_1 (type2, q2->t.root); ++ if (!t2 || t1->priority > t2->priority) ++ { ++ *q1_chosen_p = true; ++ return t1; ++ } ++ if (t2->priority > t1->priority) ++ { ++ *q1_chosen_p = false; ++ return t2; ++ } ++ /* If we get here, the priorities are the same, so we must look at ++ parent_depends_on to make our decision. */ ++#if _LIBGOMP_CHECKING_ ++ if (t1 != t2) ++ gomp_fatal ("priority_tree_next_task: t1 != t2"); ++#endif ++ if (t2->parent_depends_on && !t1->parent_depends_on) ++ { ++ *q1_chosen_p = false; ++ return t2; ++ } ++ *q1_chosen_p = true; ++ return t1; ++} ++ ++/* Priority splay trees comparison function. */ ++static inline int ++prio_splay_compare (prio_splay_tree_key x, prio_splay_tree_key y) ++{ ++ if (x->l.priority == y->l.priority) ++ return 0; ++ return x->l.priority < y->l.priority ? -1 : 1; ++} ++ ++/* Define another splay tree instantiation, for priority_list's. */ ++#define splay_tree_prefix prio ++#define splay_tree_c ++#include "splay-tree.h" +--- libgomp/openacc.f90.jj 2016-07-13 16:57:04.434535373 +0200 ++++ libgomp/openacc.f90 2016-07-14 19:01:54.901230875 +0200 +@@ -0,0 +1,911 @@ ++! OpenACC Runtime Library Definitions. ++ ++! Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++! Contributed by Tobias Burnus ++! and Mentor Embedded. ++ ++! This file is part of the GNU Offloading and Multi Processing Library ++! (libgomp). ++ ++! Libgomp is free software; you can redistribute it and/or modify it ++! under the terms of the GNU General Public License as published by ++! the Free Software Foundation; either version 3, or (at your option) ++! any later version. ++ ++! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++! FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++! more details. ++ ++! Under Section 7 of GPL version 3, you are granted additional ++! permissions described in the GCC Runtime Library Exception, version ++! 3.1, as published by the Free Software Foundation. ++ ++! You should have received a copy of the GNU General Public License and ++! a copy of the GCC Runtime Library Exception along with this program; ++! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++! . ++ ++module openacc_kinds ++ use iso_fortran_env, only: int32 ++ implicit none ++ ++ private :: int32 ++ public :: acc_device_kind ++ ++ integer, parameter :: acc_device_kind = int32 ++ ++ public :: acc_device_none, acc_device_default, acc_device_host ++ public :: acc_device_not_host, acc_device_nvidia ++ ++ ! Keep in sync with include/gomp-constants.h. ++ integer (acc_device_kind), parameter :: acc_device_none = 0 ++ integer (acc_device_kind), parameter :: acc_device_default = 1 ++ integer (acc_device_kind), parameter :: acc_device_host = 2 ++ ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. ++ integer (acc_device_kind), parameter :: acc_device_not_host = 4 ++ integer (acc_device_kind), parameter :: acc_device_nvidia = 5 ++ ++ public :: acc_handle_kind ++ ++ integer, parameter :: acc_handle_kind = int32 ++ ++ public :: acc_async_noval, acc_async_sync ++ ++ ! Keep in sync with include/gomp-constants.h. ++ integer (acc_handle_kind), parameter :: acc_async_noval = -1 ++ integer (acc_handle_kind), parameter :: acc_async_sync = -2 ++ ++end module ++ ++module openacc_internal ++ use openacc_kinds ++ implicit none ++ ++ interface ++ function acc_get_num_devices_h (d) ++ import ++ integer acc_get_num_devices_h ++ integer (acc_device_kind) d ++ end function ++ ++ subroutine acc_set_device_type_h (d) ++ import ++ integer (acc_device_kind) d ++ end subroutine ++ ++ function acc_get_device_type_h () ++ import ++ integer (acc_device_kind) acc_get_device_type_h ++ end function ++ ++ subroutine acc_set_device_num_h (n, d) ++ import ++ integer n ++ integer (acc_device_kind) d ++ end subroutine ++ ++ function acc_get_device_num_h (d) ++ import ++ integer acc_get_device_num_h ++ integer (acc_device_kind) d ++ end function ++ ++ function acc_async_test_h (a) ++ logical acc_async_test_h ++ integer a ++ end function ++ ++ function acc_async_test_all_h () ++ logical acc_async_test_all_h ++ end function ++ ++ subroutine acc_wait_h (a) ++ integer a ++ end subroutine ++ ++ subroutine acc_wait_async_h (a1, a2) ++ integer a1, a2 ++ end subroutine ++ ++ subroutine acc_wait_all_h () ++ end subroutine ++ ++ subroutine acc_wait_all_async_h (a) ++ integer a ++ end subroutine ++ ++ subroutine acc_init_h (d) ++ import ++ integer (acc_device_kind) d ++ end subroutine ++ ++ subroutine acc_shutdown_h (d) ++ import ++ integer (acc_device_kind) d ++ end subroutine ++ ++ function acc_on_device_h (d) ++ import ++ integer (acc_device_kind) d ++ logical acc_on_device_h ++ end function ++ ++ subroutine acc_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_present_or_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_present_or_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_copyout_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyout_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyout_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_delete_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_delete_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_delete_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_update_device_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_device_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_device_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_update_self_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_self_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_self_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ function acc_is_present_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ logical acc_is_present_32_h ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end function ++ ++ function acc_is_present_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ logical acc_is_present_64_h ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end function ++ ++ function acc_is_present_array_h (a) ++ logical acc_is_present_array_h ++ type (*), dimension (..), contiguous :: a ++ end function ++ end interface ++ ++ interface ++ function acc_get_num_devices_l (d) & ++ bind (C, name = "acc_get_num_devices") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_get_num_devices_l ++ integer (c_int), value :: d ++ end function ++ ++ subroutine acc_set_device_type_l (d) & ++ bind (C, name = "acc_set_device_type") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: d ++ end subroutine ++ ++ function acc_get_device_type_l () & ++ bind (C, name = "acc_get_device_type") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_get_device_type_l ++ end function ++ ++ subroutine acc_set_device_num_l (n, d) & ++ bind (C, name = "acc_set_device_num") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: n, d ++ end subroutine ++ ++ function acc_get_device_num_l (d) & ++ bind (C, name = "acc_get_device_num") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_get_device_num_l ++ integer (c_int), value :: d ++ end function ++ ++ function acc_async_test_l (a) & ++ bind (C, name = "acc_async_test") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_async_test_l ++ integer (c_int), value :: a ++ end function ++ ++ function acc_async_test_all_l () & ++ bind (C, name = "acc_async_test_all") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_async_test_all_l ++ end function ++ ++ subroutine acc_wait_l (a) & ++ bind (C, name = "acc_wait") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: a ++ end subroutine ++ ++ subroutine acc_wait_async_l (a1, a2) & ++ bind (C, name = "acc_wait_async") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: a1, a2 ++ end subroutine ++ ++ subroutine acc_wait_all_l () & ++ bind (C, name = "acc_wait_all") ++ use iso_c_binding, only: c_int ++ end subroutine ++ ++ subroutine acc_wait_all_async_l (a) & ++ bind (C, name = "acc_wait_all_async") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: a ++ end subroutine ++ ++ subroutine acc_init_l (d) & ++ bind (C, name = "acc_init") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: d ++ end subroutine ++ ++ subroutine acc_shutdown_l (d) & ++ bind (C, name = "acc_shutdown") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: d ++ end subroutine ++ ++ function acc_on_device_l (d) & ++ bind (C, name = "acc_on_device") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_on_device_l ++ integer (c_int), value :: d ++ end function ++ ++ subroutine acc_copyin_l (a, len) & ++ bind (C, name = "acc_copyin") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_l (a, len) & ++ bind (C, name = "acc_present_or_copyin") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_create_l (a, len) & ++ bind (C, name = "acc_create") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_present_or_create_l (a, len) & ++ bind (C, name = "acc_present_or_create") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_copyout_l (a, len) & ++ bind (C, name = "acc_copyout") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_delete_l (a, len) & ++ bind (C, name = "acc_delete") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_update_device_l (a, len) & ++ bind (C, name = "acc_update_device") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_update_self_l (a, len) & ++ bind (C, name = "acc_update_self") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ function acc_is_present_l (a, len) & ++ bind (C, name = "acc_is_present") ++ use iso_c_binding, only: c_int32_t, c_size_t ++ integer (c_int32_t) :: acc_is_present_l ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end function ++ end interface ++end module ++ ++module openacc ++ use openacc_kinds ++ use openacc_internal ++ implicit none ++ ++ public :: openacc_version ++ ++ public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type ++ public :: acc_set_device_num, acc_get_device_num, acc_async_test ++ public :: acc_async_test_all, acc_wait, acc_wait_async, acc_wait_all ++ public :: acc_wait_all_async, acc_init, acc_shutdown, acc_on_device ++ public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create ++ public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete ++ public :: acc_update_device, acc_update_self, acc_is_present ++ ++ integer, parameter :: openacc_version = 201306 ++ ++ interface acc_get_num_devices ++ procedure :: acc_get_num_devices_h ++ end interface ++ ++ interface acc_set_device_type ++ procedure :: acc_set_device_type_h ++ end interface ++ ++ interface acc_get_device_type ++ procedure :: acc_get_device_type_h ++ end interface ++ ++ interface acc_set_device_num ++ procedure :: acc_set_device_num_h ++ end interface ++ ++ interface acc_get_device_num ++ procedure :: acc_get_device_num_h ++ end interface ++ ++ interface acc_async_test ++ procedure :: acc_async_test_h ++ end interface ++ ++ interface acc_async_test_all ++ procedure :: acc_async_test_all_h ++ end interface ++ ++ interface acc_wait ++ procedure :: acc_wait_h ++ end interface ++ ++ interface acc_wait_async ++ procedure :: acc_wait_async_h ++ end interface ++ ++ interface acc_wait_all ++ procedure :: acc_wait_all_h ++ end interface ++ ++ interface acc_wait_all_async ++ procedure :: acc_wait_all_async_h ++ end interface ++ ++ interface acc_init ++ procedure :: acc_init_h ++ end interface ++ ++ interface acc_shutdown ++ procedure :: acc_shutdown_h ++ end interface ++ ++ interface acc_on_device ++ procedure :: acc_on_device_h ++ end interface ++ ++ ! acc_malloc: Only available in C/C++ ++ ! acc_free: Only available in C/C++ ++ ++ ! As vendor extension, the following code supports both 32bit and 64bit ++ ! arguments for "size"; the OpenACC standard only permits default-kind ++ ! integers, which are of kind 4 (i.e. 32 bits). ++ ! Additionally, the two-argument version also takes arrays as argument. ++ ! and the one argument version also scalars. Note that the code assumes ++ ! that the arrays are contiguous. ++ ++ interface acc_copyin ++ procedure :: acc_copyin_32_h ++ procedure :: acc_copyin_64_h ++ procedure :: acc_copyin_array_h ++ end interface ++ ++ interface acc_present_or_copyin ++ procedure :: acc_present_or_copyin_32_h ++ procedure :: acc_present_or_copyin_64_h ++ procedure :: acc_present_or_copyin_array_h ++ end interface ++ ++ interface acc_pcopyin ++ procedure :: acc_present_or_copyin_32_h ++ procedure :: acc_present_or_copyin_64_h ++ procedure :: acc_present_or_copyin_array_h ++ end interface ++ ++ interface acc_create ++ procedure :: acc_create_32_h ++ procedure :: acc_create_64_h ++ procedure :: acc_create_array_h ++ end interface ++ ++ interface acc_present_or_create ++ procedure :: acc_present_or_create_32_h ++ procedure :: acc_present_or_create_64_h ++ procedure :: acc_present_or_create_array_h ++ end interface ++ ++ interface acc_pcreate ++ procedure :: acc_present_or_create_32_h ++ procedure :: acc_present_or_create_64_h ++ procedure :: acc_present_or_create_array_h ++ end interface ++ ++ interface acc_copyout ++ procedure :: acc_copyout_32_h ++ procedure :: acc_copyout_64_h ++ procedure :: acc_copyout_array_h ++ end interface ++ ++ interface acc_delete ++ procedure :: acc_delete_32_h ++ procedure :: acc_delete_64_h ++ procedure :: acc_delete_array_h ++ end interface ++ ++ interface acc_update_device ++ procedure :: acc_update_device_32_h ++ procedure :: acc_update_device_64_h ++ procedure :: acc_update_device_array_h ++ end interface ++ ++ interface acc_update_self ++ procedure :: acc_update_self_32_h ++ procedure :: acc_update_self_64_h ++ procedure :: acc_update_self_array_h ++ end interface ++ ++ ! acc_map_data: Only available in C/C++ ++ ! acc_unmap_data: Only available in C/C++ ++ ! acc_deviceptr: Only available in C/C++ ++ ! acc_hostptr: Only available in C/C++ ++ ++ interface acc_is_present ++ procedure :: acc_is_present_32_h ++ procedure :: acc_is_present_64_h ++ procedure :: acc_is_present_array_h ++ end interface ++ ++ ! acc_memcpy_to_device: Only available in C/C++ ++ ! acc_memcpy_from_device: Only available in C/C++ ++ ++end module ++ ++function acc_get_num_devices_h (d) ++ use openacc_internal, only: acc_get_num_devices_l ++ use openacc_kinds ++ integer acc_get_num_devices_h ++ integer (acc_device_kind) d ++ acc_get_num_devices_h = acc_get_num_devices_l (d) ++end function ++ ++subroutine acc_set_device_type_h (d) ++ use openacc_internal, only: acc_set_device_type_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ call acc_set_device_type_l (d) ++end subroutine ++ ++function acc_get_device_type_h () ++ use openacc_internal, only: acc_get_device_type_l ++ use openacc_kinds ++ integer (acc_device_kind) acc_get_device_type_h ++ acc_get_device_type_h = acc_get_device_type_l () ++end function ++ ++subroutine acc_set_device_num_h (n, d) ++ use openacc_internal, only: acc_set_device_num_l ++ use openacc_kinds ++ integer n ++ integer (acc_device_kind) d ++ call acc_set_device_num_l (n, d) ++end subroutine ++ ++function acc_get_device_num_h (d) ++ use openacc_internal, only: acc_get_device_num_l ++ use openacc_kinds ++ integer acc_get_device_num_h ++ integer (acc_device_kind) d ++ acc_get_device_num_h = acc_get_device_num_l (d) ++end function ++ ++function acc_async_test_h (a) ++ use openacc_internal, only: acc_async_test_l ++ logical acc_async_test_h ++ integer a ++ if (acc_async_test_l (a) .eq. 1) then ++ acc_async_test_h = .TRUE. ++ else ++ acc_async_test_h = .FALSE. ++ end if ++end function ++ ++function acc_async_test_all_h () ++ use openacc_internal, only: acc_async_test_all_l ++ logical acc_async_test_all_h ++ if (acc_async_test_all_l () .eq. 1) then ++ acc_async_test_all_h = .TRUE. ++ else ++ acc_async_test_all_h = .FALSE. ++ end if ++end function ++ ++subroutine acc_wait_h (a) ++ use openacc_internal, only: acc_wait_l ++ integer a ++ call acc_wait_l (a) ++end subroutine ++ ++subroutine acc_wait_async_h (a1, a2) ++ use openacc_internal, only: acc_wait_async_l ++ integer a1, a2 ++ call acc_wait_async_l (a1, a2) ++end subroutine ++ ++subroutine acc_wait_all_h () ++ use openacc_internal, only: acc_wait_all_l ++ call acc_wait_all_l () ++end subroutine ++ ++subroutine acc_wait_all_async_h (a) ++ use openacc_internal, only: acc_wait_all_async_l ++ integer a ++ call acc_wait_all_async_l (a) ++end subroutine ++ ++subroutine acc_init_h (d) ++ use openacc_internal, only: acc_init_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ call acc_init_l (d) ++end subroutine ++ ++subroutine acc_shutdown_h (d) ++ use openacc_internal, only: acc_shutdown_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ call acc_shutdown_l (d) ++end subroutine ++ ++function acc_on_device_h (d) ++ use openacc_internal, only: acc_on_device_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ logical acc_on_device_h ++ if (acc_on_device_l (d) .eq. 1) then ++ acc_on_device_h = .TRUE. ++ else ++ acc_on_device_h = .FALSE. ++ end if ++end function ++ ++subroutine acc_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyin_array_h (a) ++ use openacc_internal, only: acc_copyin_l ++ type (*), dimension (..), contiguous :: a ++ call acc_copyin_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_present_or_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_present_or_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_present_or_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_present_or_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_present_or_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_copyin_array_h (a) ++ use openacc_internal, only: acc_present_or_copyin_l ++ type (*), dimension (..), contiguous :: a ++ call acc_present_or_copyin_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_create_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_create_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_create_array_h (a) ++ use openacc_internal, only: acc_create_l ++ type (*), dimension (..), contiguous :: a ++ call acc_create_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_present_or_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_present_or_create_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_present_or_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_present_or_create_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_present_or_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_create_array_h (a) ++ use openacc_internal, only: acc_present_or_create_l ++ type (*), dimension (..), contiguous :: a ++ call acc_present_or_create_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_copyout_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_copyout_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_copyout_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyout_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_copyout_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_copyout_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyout_array_h (a) ++ use openacc_internal, only: acc_copyout_l ++ type (*), dimension (..), contiguous :: a ++ call acc_copyout_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_delete_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_delete_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_delete_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_delete_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_delete_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_delete_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_delete_array_h (a) ++ use openacc_internal, only: acc_delete_l ++ type (*), dimension (..), contiguous :: a ++ call acc_delete_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_update_device_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_update_device_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_update_device_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_device_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_update_device_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_update_device_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_device_array_h (a) ++ use openacc_internal, only: acc_update_device_l ++ type (*), dimension (..), contiguous :: a ++ call acc_update_device_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_update_self_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_update_self_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_update_self_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_self_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_update_self_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_update_self_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_self_array_h (a) ++ use openacc_internal, only: acc_update_self_l ++ type (*), dimension (..), contiguous :: a ++ call acc_update_self_l (a, sizeof (a)) ++end subroutine ++ ++function acc_is_present_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_is_present_l ++ logical acc_is_present_32_h ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then ++ acc_is_present_32_h = .TRUE. ++ else ++ acc_is_present_32_h = .FALSE. ++ end if ++end function ++ ++function acc_is_present_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_is_present_l ++ logical acc_is_present_64_h ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then ++ acc_is_present_64_h = .TRUE. ++ else ++ acc_is_present_64_h = .FALSE. ++ end if ++end function ++ ++function acc_is_present_array_h (a) ++ use openacc_internal, only: acc_is_present_l ++ logical acc_is_present_array_h ++ type (*), dimension (..), contiguous :: a ++ acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1 ++end function diff --git a/gcc48-libgomp-omp_h-multilib.patch b/gcc48-libgomp-omp_h-multilib.patch new file mode 100644 index 0000000..d0e98d1 --- /dev/null +++ b/gcc48-libgomp-omp_h-multilib.patch @@ -0,0 +1,17 @@ +2008-06-09 Jakub Jelinek + + * omp.h.in (omp_nest_lock_t): Fix up for Linux multilibs. + +--- libgomp/omp.h.in.jj 2008-06-09 13:34:05.000000000 +0200 ++++ libgomp/omp.h.in 2008-06-09 13:34:48.000000000 +0200 +@@ -42,8 +42,8 @@ typedef struct + + typedef struct + { +- unsigned char _x[@OMP_NEST_LOCK_SIZE@] +- __attribute__((__aligned__(@OMP_NEST_LOCK_ALIGN@))); ++ unsigned char _x[8 + sizeof (void *)] ++ __attribute__((__aligned__(sizeof (void *)))); + } omp_nest_lock_t; + #endif + diff --git a/gcc48-libtool-no-rpath.patch b/gcc48-libtool-no-rpath.patch new file mode 100644 index 0000000..466c661 --- /dev/null +++ b/gcc48-libtool-no-rpath.patch @@ -0,0 +1,27 @@ +libtool sucks. +--- ltmain.sh.jj 2007-12-07 14:53:21.000000000 +0100 ++++ ltmain.sh 2008-09-05 21:51:48.000000000 +0200 +@@ -5394,6 +5394,7 @@ EOF + rpath="$finalize_rpath" + test "$mode" != relink && rpath="$compile_rpath$rpath" + for libdir in $rpath; do ++ case "$libdir" in /usr/lib|/usr/lib64|/usr/lib/../lib|/usr/lib/../lib64) continue;; esac + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then +@@ -6071,6 +6072,7 @@ EOF + rpath= + hardcode_libdirs= + for libdir in $compile_rpath $finalize_rpath; do ++ case "$libdir" in /usr/lib|/usr/lib64|/usr/lib/../lib|/usr/lib/../lib64) continue;; esac + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then +@@ -6120,6 +6122,7 @@ EOF + rpath= + hardcode_libdirs= + for libdir in $finalize_rpath; do ++ case "$libdir" in /usr/lib|/usr/lib64|/usr/lib/../lib|/usr/lib/../lib64) continue;; esac + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then diff --git a/gcc48-no-add-needed.patch b/gcc48-no-add-needed.patch new file mode 100644 index 0000000..b6ca777 --- /dev/null +++ b/gcc48-no-add-needed.patch @@ -0,0 +1,50 @@ +2010-02-08 Roland McGrath + + * config/rs6000/sysv4.h (LINK_EH_SPEC): Pass --no-add-needed to the + linker. + * config/gnu-user.h (LINK_EH_SPEC): Likewise. + * config/alpha/elf.h (LINK_EH_SPEC): Likewise. + * config/ia64/linux.h (LINK_EH_SPEC): Likewise. + +--- gcc/config/alpha/elf.h.jj 2011-01-03 12:52:31.118056764 +0100 ++++ gcc/config/alpha/elf.h 2011-01-04 18:14:10.931874160 +0100 +@@ -165,5 +165,5 @@ extern int alpha_this_gpdisp_sequence_nu + I imagine that other systems will catch up. In the meantime, it + doesn't harm to make sure that the data exists to be used later. */ + #if defined(HAVE_LD_EH_FRAME_HDR) +-#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} " ++#define LINK_EH_SPEC "--no-add-needed %{!static:--eh-frame-hdr} " + #endif +--- gcc/config/ia64/linux.h.jj 2011-01-03 13:02:11.462994522 +0100 ++++ gcc/config/ia64/linux.h 2011-01-04 18:14:10.931874160 +0100 +@@ -77,7 +77,7 @@ do { \ + Signalize that because we have fde-glibc, we don't need all C shared libs + linked against -lgcc_s. */ + #undef LINK_EH_SPEC +-#define LINK_EH_SPEC "" ++#define LINK_EH_SPEC "--no-add-needed " + + /* Put all *tf routines in libgcc. */ + #undef LIBGCC2_HAS_TF_MODE +--- gcc/config/gnu-user.h.jj 2011-01-03 12:53:03.739057299 +0100 ++++ gcc/config/gnu-user.h 2011-01-04 18:14:10.932814884 +0100 +@@ -82,7 +82,7 @@ see the files COPYING3 and COPYING.RUNTI + #define LIB_SPEC GNU_USER_TARGET_LIB_SPEC + + #if defined(HAVE_LD_EH_FRAME_HDR) +-#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} " ++#define LINK_EH_SPEC "--no-add-needed %{!static:--eh-frame-hdr} " + #endif + + #undef LINK_GCC_C_SEQUENCE_SPEC +--- gcc/config/rs6000/sysv4.h.jj 2011-01-03 13:02:18.255994215 +0100 ++++ gcc/config/rs6000/sysv4.h 2011-01-04 18:14:10.933888871 +0100 +@@ -820,7 +820,7 @@ extern int fixuplabelno; + -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}" + + #if defined(HAVE_LD_EH_FRAME_HDR) +-# define LINK_EH_SPEC "%{!static:--eh-frame-hdr} " ++# define LINK_EH_SPEC "--no-add-needed %{!static:--eh-frame-hdr} " + #endif + + #define CPP_OS_LINUX_SPEC "-D__unix__ -D__gnu_linux__ -D__linux__ \ diff --git a/gcc48-ppc32-retaddr.patch b/gcc48-ppc32-retaddr.patch new file mode 100644 index 0000000..e906dad --- /dev/null +++ b/gcc48-ppc32-retaddr.patch @@ -0,0 +1,87 @@ +2005-11-28 Jakub Jelinek + + * config/rs6000/rs6000.c (rs6000_return_addr): If COUNT == 0, + read word RETURN_ADDRESS_OFFSET bytes above arg_pointer_rtx + instead of doing an extran indirection from frame_pointer_rtx. + + * gcc.dg/20051128-1.c: New test. + +--- gcc/config/rs6000/rs6000.c.jj 2005-11-26 14:38:01.000000000 +0100 ++++ gcc/config/rs6000/rs6000.c 2005-11-28 20:32:18.000000000 +0100 +@@ -21423,18 +21423,22 @@ rs6000_return_addr (int count, rtx frame + if (count != 0 + || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic)) + { ++ rtx x; + cfun->machine->ra_needs_full_frame = 1; + +- return +- gen_rtx_MEM +- (Pmode, +- memory_address +- (Pmode, +- plus_constant (Pmode, +- copy_to_reg +- (gen_rtx_MEM (Pmode, +- memory_address (Pmode, frame))), +- RETURN_ADDRESS_OFFSET))); ++ if (count == 0) ++ { ++ gcc_assert (frame == frame_pointer_rtx); ++ x = arg_pointer_rtx; ++ } ++ else ++ { ++ x = memory_address (Pmode, frame); ++ x = copy_to_reg (gen_rtx_MEM (Pmode, x)); ++ } ++ ++ x = plus_constant (Pmode, x, RETURN_ADDRESS_OFFSET); ++ return gen_rtx_MEM (Pmode, memory_address (Pmode, x)); + } + + cfun->machine->ra_need_lr = 1; +--- gcc/testsuite/gcc.dg/20051128-1.c.jj 2005-10-10 11:21:41.096999000 +0200 ++++ gcc/testsuite/gcc.dg/20051128-1.c 2005-11-28 12:30:57.000000000 +0100 +@@ -0,0 +1,41 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fpic" } */ ++ ++extern void exit (int); ++extern void abort (void); ++ ++int b; ++ ++struct A ++{ ++ void *pad[147]; ++ void *ra, *h; ++ long o; ++}; ++ ++void ++__attribute__((noinline)) ++foo (struct A *a, void *x) ++{ ++ __builtin_memset (a, 0, sizeof (a)); ++ if (!b) ++ exit (0); ++} ++ ++void ++__attribute__((noinline)) ++bar (void) ++{ ++ struct A a; ++ ++ __builtin_unwind_init (); ++ foo (&a, __builtin_return_address (0)); ++} ++ ++int ++main (void) ++{ ++ bar (); ++ abort (); ++ return 0; ++} diff --git a/gcc48-pr28865.patch b/gcc48-pr28865.patch new file mode 100644 index 0000000..8e07e94 --- /dev/null +++ b/gcc48-pr28865.patch @@ -0,0 +1,190 @@ +2014-01-16 Nick Clifton + + PR middle-end/28865 + * varasm.c (output_constant): Return the number of bytes actually + emitted. + (output_constructor_array_range): Update the field size with the + number of bytes emitted by output_constant. + (output_constructor_regular_field): Likewise. Also do not + complain if the total number of bytes emitted is now greater + than the expected fieldpos. + * output.h (output_constant): Update prototype and descriptive + comment. + + * gcc.c-torture/compile/pr28865.c: New. + * gcc.c-torture/execute/pr28865.c: New. + +--- gcc/varasm.c (revision 206660) ++++ gcc/varasm.c (revision 206661) +@@ -4474,8 +4474,10 @@ static unsigned HOST_WIDE_INT + This includes the pseudo-op such as ".int" or ".byte", and a newline. + Assumes output_addressed_constants has been done on EXP already. + +- Generate exactly SIZE bytes of assembler data, padding at the end +- with zeros if necessary. SIZE must always be specified. ++ Generate at least SIZE bytes of assembler data, padding at the end ++ with zeros if necessary. SIZE must always be specified. The returned ++ value is the actual number of bytes of assembler data generated, which ++ may be bigger than SIZE if the object contains a variable length field. + + SIZE is important for structure constructors, + since trailing members may have been omitted from the constructor. +@@ -4490,14 +4492,14 @@ static unsigned HOST_WIDE_INT + + ALIGN is the alignment of the data in bits. */ + +-void ++unsigned HOST_WIDE_INT + output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align) + { + enum tree_code code; + unsigned HOST_WIDE_INT thissize; + + if (size == 0 || flag_syntax_only) +- return; ++ return size; + + /* See if we're trying to initialize a pointer in a non-default mode + to the address of some declaration somewhere. If the target says +@@ -4562,7 +4564,7 @@ output_constant (tree exp, unsigned HOST + && vec_safe_is_empty (CONSTRUCTOR_ELTS (exp))) + { + assemble_zeros (size); +- return; ++ return size; + } + + if (TREE_CODE (exp) == FDESC_EXPR) +@@ -4574,7 +4576,7 @@ output_constant (tree exp, unsigned HOST + #else + gcc_unreachable (); + #endif +- return; ++ return size; + } + + /* Now output the underlying data. If we've handling the padding, return. +@@ -4612,8 +4614,7 @@ output_constant (tree exp, unsigned HOST + switch (TREE_CODE (exp)) + { + case CONSTRUCTOR: +- output_constructor (exp, size, align, NULL); +- return; ++ return output_constructor (exp, size, align, NULL); + case STRING_CST: + thissize = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), + size); +@@ -4648,11 +4649,10 @@ output_constant (tree exp, unsigned HOST + case RECORD_TYPE: + case UNION_TYPE: + gcc_assert (TREE_CODE (exp) == CONSTRUCTOR); +- output_constructor (exp, size, align, NULL); +- return; ++ return output_constructor (exp, size, align, NULL); + + case ERROR_MARK: +- return; ++ return 0; + + default: + gcc_unreachable (); +@@ -4660,6 +4660,8 @@ output_constant (tree exp, unsigned HOST + + if (size > thissize) + assemble_zeros (size - thissize); ++ ++ return size; + } + + +@@ -4759,7 +4761,7 @@ output_constructor_array_range (oc_local + if (local->val == NULL_TREE) + assemble_zeros (fieldsize); + else +- output_constant (local->val, fieldsize, align2); ++ fieldsize = output_constant (local->val, fieldsize, align2); + + /* Count its size. */ + local->total_bytes += fieldsize; +@@ -4808,9 +4810,8 @@ output_constructor_regular_field (oc_loc + Note no alignment needed in an array, since that is guaranteed + if each element has the proper size. */ + if ((local->field != NULL_TREE || local->index != NULL_TREE) +- && fieldpos != local->total_bytes) ++ && fieldpos > local->total_bytes) + { +- gcc_assert (fieldpos >= local->total_bytes); + assemble_zeros (fieldpos - local->total_bytes); + local->total_bytes = fieldpos; + } +@@ -4847,7 +4848,7 @@ output_constructor_regular_field (oc_loc + if (local->val == NULL_TREE) + assemble_zeros (fieldsize); + else +- output_constant (local->val, fieldsize, align2); ++ fieldsize = output_constant (local->val, fieldsize, align2); + + /* Count its size. */ + local->total_bytes += fieldsize; +--- gcc/output.h (revision 206660) ++++ gcc/output.h (revision 206661) +@@ -294,11 +294,13 @@ extern void output_quoted_string (FILE * + This includes the pseudo-op such as ".int" or ".byte", and a newline. + Assumes output_addressed_constants has been done on EXP already. + +- Generate exactly SIZE bytes of assembler data, padding at the end +- with zeros if necessary. SIZE must always be specified. ++ Generate at least SIZE bytes of assembler data, padding at the end ++ with zeros if necessary. SIZE must always be specified. The returned ++ value is the actual number of bytes of assembler data generated, which ++ may be bigger than SIZE if the object contains a variable length field. + + ALIGN is the alignment in bits that may be assumed for the data. */ +-extern void output_constant (tree, unsigned HOST_WIDE_INT, unsigned int); ++extern unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT, unsigned int); + + /* When outputting delayed branch sequences, this rtx holds the + sequence being output. It is null when no delayed branch +--- gcc/testsuite/gcc.c-torture/execute/pr28865.c (revision 0) ++++ gcc/testsuite/gcc.c-torture/execute/pr28865.c (revision 206661) +@@ -0,0 +1,21 @@ ++struct A { int a; char b[]; }; ++union B { struct A a; char b[sizeof (struct A) + 31]; }; ++union B b = { { 1, "123456789012345678901234567890" } }; ++union B c = { { 2, "123456789012345678901234567890" } }; ++ ++__attribute__((noinline, noclone)) void ++foo (int *x[2]) ++{ ++ x[0] = &b.a.a; ++ x[1] = &c.a.a; ++} ++ ++int ++main () ++{ ++ int *x[2]; ++ foo (x); ++ if (*x[0] != 1 || *x[1] != 2) ++ __builtin_abort (); ++ return 0; ++} +--- gcc/testsuite/gcc.c-torture/compile/pr28865.c (revision 0) ++++ gcc/testsuite/gcc.c-torture/compile/pr28865.c (revision 206661) +@@ -0,0 +1,16 @@ ++struct var_len ++{ ++ int field1; ++ const char field2[]; ++}; ++ ++/* Note - strictly speaking this array declaration is illegal ++ since each element has a variable length. GCC allows it ++ (for the moment) because it is used in existing code, such ++ as glibc. */ ++static const struct var_len var_array[] = ++{ ++ { 1, "Long exposure noise reduction" }, ++ { 2, "Shutter/AE lock buttons" }, ++ { 3, "Mirror lockup" } ++}; diff --git a/gcc48-pr38757.patch b/gcc48-pr38757.patch new file mode 100644 index 0000000..4206584 --- /dev/null +++ b/gcc48-pr38757.patch @@ -0,0 +1,106 @@ +2009-03-18 Jakub Jelinek + + PR debug/38757 + * langhooks.h (struct lang_hooks): Add source_language langhook. + * langhooks-def.h (LANG_HOOKS_SOURCE_LANGUAGE): Define to NULL. + (LANG_HOOKS_INITIALIZER): Add LANG_HOOKS_SOURCE_LANGUAGE. + * dwarf2out.c (add_prototyped_attribute): Add DW_AT_prototype + also for DW_LANG_{C,C99,ObjC}. + (gen_compile_unit_die): Use lang_hooks.source_language () to + determine if DW_LANG_C99 or DW_LANG_C89 should be returned. +c/ + * c-lang.c (c_source_language): New function. + (LANG_HOOKS_SOURCE_LANGUAGE): Define. + +--- gcc/langhooks.h.jj 2011-01-03 12:53:05.125745450 +0100 ++++ gcc/langhooks.h 2011-01-04 17:59:43.166744926 +0100 +@@ -467,6 +467,10 @@ struct lang_hooks + gimplification. */ + bool deep_unsharing; + ++ /* Return year of the source language standard version if the FE supports ++ multiple versions of the standard. */ ++ int (*source_language) (void); ++ + /* Whenever you add entries here, make sure you adjust langhooks-def.h + and langhooks.c accordingly. */ + }; +--- gcc/langhooks-def.h.jj 2011-01-03 12:53:05.000000000 +0100 ++++ gcc/langhooks-def.h 2011-01-04 18:00:44.858851030 +0100 +@@ -118,6 +118,7 @@ extern void lhd_omp_firstprivatize_type_ + #define LANG_HOOKS_BLOCK_MAY_FALLTHRU hook_bool_const_tree_true + #define LANG_HOOKS_EH_USE_CXA_END_CLEANUP false + #define LANG_HOOKS_DEEP_UNSHARING false ++#define LANG_HOOKS_SOURCE_LANGUAGE NULL + + /* Attribute hooks. */ + #define LANG_HOOKS_ATTRIBUTE_TABLE NULL +@@ -303,7 +304,8 @@ extern void lhd_end_section (void); + LANG_HOOKS_EH_PROTECT_CLEANUP_ACTIONS, \ + LANG_HOOKS_BLOCK_MAY_FALLTHRU, \ + LANG_HOOKS_EH_USE_CXA_END_CLEANUP, \ +- LANG_HOOKS_DEEP_UNSHARING \ ++ LANG_HOOKS_DEEP_UNSHARING, \ ++ LANG_HOOKS_SOURCE_LANGUAGE \ + } + + #endif /* GCC_LANG_HOOKS_DEF_H */ +--- gcc/c/c-lang.c.jj 2011-01-03 12:53:05.376056936 +0100 ++++ gcc/c/c-lang.c 2011-01-04 17:59:43.167743798 +0100 +@@ -36,6 +36,12 @@ along with GCC; see the file COPYING3. + + enum c_language_kind c_language = clk_c; + ++static int ++c_source_language (void) ++{ ++ return flag_isoc99 ? 1999 : 1989; ++} ++ + /* Lang hooks common to C and ObjC are declared in c-objc-common.h; + consequently, there should be very few hooks below. */ + +@@ -45,6 +51,8 @@ enum c_language_kind c_language = clk_c; + #define LANG_HOOKS_INIT c_objc_common_init + #undef LANG_HOOKS_INIT_TS + #define LANG_HOOKS_INIT_TS c_common_init_ts ++#undef LANG_HOOKS_SOURCE_LANGUAGE ++#define LANG_HOOKS_SOURCE_LANGUAGE c_source_language + + /* Each front end provides its own lang hook initializer. */ + struct lang_hooks lang_hooks = LANG_HOOKS_INITIALIZER; +--- gcc/dwarf2out.c.jj 2011-01-03 12:53:05.102056475 +0100 ++++ gcc/dwarf2out.c 2011-01-04 18:03:14.534151763 +0100 +@@ -16109,9 +16109,18 @@ add_bit_size_attribute (dw_die_ref die, + static inline void + add_prototyped_attribute (dw_die_ref die, tree func_type) + { +- if (get_AT_unsigned (comp_unit_die (), DW_AT_language) == DW_LANG_C89 +- && prototype_p (func_type)) +- add_AT_flag (die, DW_AT_prototyped, 1); ++ switch (get_AT_unsigned (comp_unit_die (), DW_AT_language)) ++ { ++ case DW_LANG_C: ++ case DW_LANG_C89: ++ case DW_LANG_C99: ++ case DW_LANG_ObjC: ++ if (prototype_p (func_type)) ++ add_AT_flag (die, DW_AT_prototyped, 1); ++ break; ++ default: ++ break; ++ } + } + + /* Add an 'abstract_origin' attribute below a given DIE. The DIE is found +@@ -18915,6 +18924,10 @@ gen_compile_unit_die (const char *filena + if (strcmp (language_string, "GNU Go") == 0) + language = DW_LANG_Go; + } ++ else if (strcmp (language_string, "GNU C") == 0 ++ && lang_hooks.source_language ++ && lang_hooks.source_language () >= 1999) ++ language = DW_LANG_C99; + } + /* Use a degraded Fortran setting in strict DWARF2 so is_fortran works. */ + else if (strcmp (language_string, "GNU Fortran") == 0) diff --git a/gcc48-pr52714.patch b/gcc48-pr52714.patch new file mode 100644 index 0000000..2ea553d --- /dev/null +++ b/gcc48-pr52714.patch @@ -0,0 +1,76 @@ +2014-02-27 Jeff Law + + PR rtl-optimization/52714 + * combine.c (try_combine): When splitting an unrecognized PARALLEL + into two independent simple sets, if I3 is a jump, ensure the + pattern we place into I3 is a (set (pc) ...) + + * gcc.c-torture/compile/pr52714.c: New test. + +2016-06-15 Jakub Jelinek + + * gcc.c-torture/compile/20160615-1.c: New test. + +--- gcc/combine.c (revision 208203) ++++ gcc/combine.c (revision 208204) +@@ -3706,6 +3706,9 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx + #ifdef HAVE_cc0 + && !reg_referenced_p (cc0_rtx, XVECEXP (newpat, 0, 0)) + #endif ++ /* If I3 is a jump, ensure that set0 is a jump so that ++ we do not create invalid RTL. */ ++ && (!JUMP_P (i3) || SET_DEST (XVECEXP (newpat, 0, 0)) == pc_rtx) + ) + { + newi2pat = XVECEXP (newpat, 0, 1); +@@ -3716,6 +3719,9 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx + #ifdef HAVE_cc0 + && !reg_referenced_p (cc0_rtx, XVECEXP (newpat, 0, 1)) + #endif ++ /* If I3 is a jump, ensure that set1 is a jump so that ++ we do not create invalid RTL. */ ++ && (!JUMP_P (i3) || SET_DEST (XVECEXP (newpat, 0, 1)) == pc_rtx) + ) + { + newi2pat = XVECEXP (newpat, 0, 0); +--- gcc/testsuite/gcc.c-torture/compile/pr52714.c (revision 0) ++++ gcc/testsuite/gcc.c-torture/compile/pr52714.c (revision 208204) +@@ -0,0 +1,25 @@ ++ ++int __re_compile_fastmap(unsigned char *p) ++{ ++ unsigned char **stack; ++ unsigned size; ++ unsigned avail; ++ ++ stack = __builtin_alloca(5 * sizeof(unsigned char*)); ++ if (stack == 0) ++ return -2; ++ size = 5; ++ avail = 0; ++ ++ for (;;) { ++ switch (*p++) { ++ case 0: ++ if (avail == size) ++ return -2; ++ stack[avail++] = p; ++ } ++ } ++ ++ return 0; ++} ++ +--- gcc/testsuite/gcc.c-torture/compile/20160615-1.c.jj 2016-06-15 11:17:54.690689056 +0200 ++++ gcc/testsuite/gcc.c-torture/compile/20160615-1.c 2016-06-15 11:17:48.811765657 +0200 +@@ -0,0 +1,10 @@ ++int a; ++void bar (int, unsigned, unsigned); ++ ++void ++foo (unsigned x) ++{ ++ unsigned b = a ? x : 0; ++ if (x || b) ++ bar (0, x, b); ++} diff --git a/gcc48-pr53477.patch b/gcc48-pr53477.patch new file mode 100644 index 0000000..70d5d56 --- /dev/null +++ b/gcc48-pr53477.patch @@ -0,0 +1,131 @@ +2013-08-20 Phil Muldoon + + PR libstdc++/53477 + http://sourceware.org/bugzilla/show_bug.cgi?id=15195 + + * python/libstdcxx/v6/printers.py (Printer.__call__): If a value + is a reference, fetch referenced value. + (RxPrinter.invoke): Ditto. + * testsuite/libstdc++-prettyprinters/cxx11.cc (main): Add -O0 + flag. Add referenced value tests. + +--- libstdc++-v3/python/libstdcxx/v6/printers.py (revision 201887) ++++ libstdc++-v3/python/libstdcxx/v6/printers.py (revision 201888) +@@ -786,6 +786,11 @@ class RxPrinter(object): + def invoke(self, value): + if not self.enabled: + return None ++ ++ if value.type.code == gdb.TYPE_CODE_REF: ++ if hasattr(gdb.Value,"referenced_value"): ++ value = value.referenced_value() ++ + return self.function(self.name, value) + + # A pretty-printer that conforms to the "PrettyPrinter" protocol from +@@ -841,6 +846,11 @@ class Printer(object): + return None + + basename = match.group(1) ++ ++ if val.type.code == gdb.TYPE_CODE_REF: ++ if hasattr(gdb.Value,"referenced_value"): ++ val = val.referenced_value() ++ + if basename in self.lookup: + return self.lookup[basename].invoke(val) + +--- libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx11.cc (revision 201887) ++++ libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx11.cc (revision 201888) +@@ -1,5 +1,5 @@ + // { dg-do run } +-// { dg-options "-std=gnu++11 -g" } ++// { dg-options "-std=gnu++11 -g -O0" } + + // Copyright (C) 2011-2013 Free Software Foundation, Inc. + // +@@ -24,6 +24,8 @@ + #include + #include + ++typedef std::tuple ExTuple; ++ + template + void + placeholder(const T &s) +@@ -62,43 +64,75 @@ main() + std::forward_list efl; + // { dg-final { note-test efl "empty std::forward_list" } } + ++ std::forward_list &refl = efl; ++// { dg-final { note-test refl "empty std::forward_list" } } ++ + std::forward_list fl; + fl.push_front(2); + fl.push_front(1); + // { dg-final { note-test fl {std::forward_list = {[0] = 1, [1] = 2}} } } + ++ std::forward_list &rfl = fl; ++// { dg-final { note-test rfl {std::forward_list = {[0] = 1, [1] = 2}} } } ++ + std::unordered_map eum; + // { dg-final { note-test eum "std::unordered_map with 0 elements" } } ++ std::unordered_map &reum = eum; ++// { dg-final { note-test reum "std::unordered_map with 0 elements" } } ++ + std::unordered_multimap eumm; + // { dg-final { note-test eumm "std::unordered_multimap with 0 elements" } } ++ std::unordered_multimap &reumm = eumm; ++// { dg-final { note-test reumm "std::unordered_multimap with 0 elements" } } ++ + std::unordered_set eus; + // { dg-final { note-test eus "std::unordered_set with 0 elements" } } ++ std::unordered_set &reus = eus; ++// { dg-final { note-test reus "std::unordered_set with 0 elements" } } ++ + std::unordered_multiset eums; + // { dg-final { note-test eums "std::unordered_multiset with 0 elements" } } ++ std::unordered_multiset &reums = eums; ++// { dg-final { note-test reums "std::unordered_multiset with 0 elements" } } + + std::unordered_map uom; + uom[5] = "three"; + uom[3] = "seven"; + // { dg-final { note-test uom {std::unordered_map with 2 elements = {[3] = "seven", [5] = "three"}} } } + ++ std::unordered_map &ruom = uom; ++// { dg-final { note-test ruom {std::unordered_map with 2 elements = {[3] = "seven", [5] = "three"}} } } ++ + std::unordered_multimap uomm; + uomm.insert(std::pair (5, "three")); + uomm.insert(std::pair (5, "seven")); + // { dg-final { note-test uomm {std::unordered_multimap with 2 elements = {[5] = "seven", [5] = "three"}} } } ++ std::unordered_multimap &ruomm = uomm; ++// { dg-final { note-test ruomm {std::unordered_multimap with 2 elements = {[5] = "seven", [5] = "three"}} } } + + std::unordered_set uos; + uos.insert(5); + // { dg-final { note-test uos {std::unordered_set with 1 elements = {[0] = 5}} } } ++ std::unordered_set &ruos = uos; ++// { dg-final { note-test ruos {std::unordered_set with 1 elements = {[0] = 5}} } } + + std::unordered_multiset uoms; + uoms.insert(5); + // { dg-final { note-test uoms {std::unordered_multiset with 1 elements = {[0] = 5}} } } ++ std::unordered_multiset &ruoms = uoms; ++// { dg-final { note-test ruoms {std::unordered_multiset with 1 elements = {[0] = 5}} } } + + std::unique_ptr uptr (new datum); + uptr->s = "hi bob"; + uptr->i = 23; + // { dg-final { regexp-test uptr {std::unique_ptr.datum. containing 0x.*} } } ++ std::unique_ptr &ruptr = uptr; ++// { dg-final { regexp-test ruptr {std::unique_ptr.datum. containing 0x.*} } } + ++ ExTuple tpl(6,7); ++// { dg-final { note-test tpl {std::tuple containing = {[1] = 6, [2] = 7}} } } ++ ExTuple &rtpl = tpl; ++// { dg-final { note-test rtpl {std::tuple containing = {[1] = 6, [2] = 7}} } } + placeholder(""); // Mark SPOT + use(efl); + use(fl); diff --git a/gcc48-pr56564.patch b/gcc48-pr56564.patch new file mode 100644 index 0000000..d81e02d --- /dev/null +++ b/gcc48-pr56564.patch @@ -0,0 +1,654 @@ +2013-06-19 Igor Zamyatin + + * gcc.dg/tree-ssa/loop-19.c: Add -fno-common. + +2013-06-12 Jakub Jelinek + + PR target/56564 + * varasm.c (decl_binds_to_current_def_p): Call binds_local_p + target hook even for !TREE_PUBLIC decls. If no resolution info + is available, return false for common and external decls. + + * gcc.target/i386/pr56564-1.c: Skip on darwin, mingw and cygwin. + * gcc.target/i386/pr56564-3.c: Likewise. + +2013-06-11 Jakub Jelinek + + PR target/56564 + * varasm.c (get_variable_align): Move #endif to the right place. + +2013-06-10 Jakub Jelinek + + PR target/56564 + * varasm.c (align_variable): Don't use DATA_ALIGNMENT or + CONSTANT_ALIGNMENT if !decl_binds_to_current_def_p (decl). + Use DATA_ABI_ALIGNMENT for that case instead if defined. + (get_variable_align): New function. + (get_variable_section, emit_bss, emit_common, + assemble_variable_contents, place_block_symbol): Use + get_variable_align instead of DECL_ALIGN. + (assemble_noswitch_variable): Add align argument, use it + instead of DECL_ALIGN. + (assemble_variable): Adjust caller. Use get_variable_align + instead of DECL_ALIGN. + * config/i386/i386.h (DATA_ALIGNMENT): Adjust x86_data_alignment + caller. + (DATA_ABI_ALIGNMENT): Define. + * config/i386/i386-protos.h (x86_data_alignment): Adjust prototype. + * config/i386/i386.c (x86_data_alignment): Add opt argument. If + opt is false, only return the psABI mandated alignment increase. + * config/c6x/c6x.h (DATA_ALIGNMENT): Renamed to... + (DATA_ABI_ALIGNMENT): ... this. + * config/mmix/mmix.h (DATA_ALIGNMENT): Renamed to... + (DATA_ABI_ALIGNMENT): ... this. + * config/mmix/mmix.c (mmix_data_alignment): Adjust function comment. + * config/s390/s390.h (DATA_ALIGNMENT): Renamed to... + (DATA_ABI_ALIGNMENT): ... this. + * doc/tm.texi.in (DATA_ABI_ALIGNMENT): Document. + * doc/tm.texi: Regenerated. + + * gcc.target/i386/pr56564-1.c: New test. + * gcc.target/i386/pr56564-2.c: New test. + * gcc.target/i386/pr56564-3.c: New test. + * gcc.target/i386/pr56564-4.c: New test. + * gcc.target/i386/avx256-unaligned-load-4.c: Add -fno-common. + * gcc.target/i386/avx256-unaligned-store-1.c: Likewise. + * gcc.target/i386/avx256-unaligned-store-3.c: Likewise. + * gcc.target/i386/avx256-unaligned-store-4.c: Likewise. + * gcc.target/i386/vect-sizes-1.c: Likewise. + * gcc.target/i386/memcpy-1.c: Likewise. + * gcc.dg/vect/costmodel/i386/costmodel-vect-31.c (tmp): Initialize. + * gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c (tmp): Likewise. + +--- gcc/doc/tm.texi.in (revision 199897) ++++ gcc/doc/tm.texi.in (revision 199898) +@@ -1062,6 +1062,15 @@ arrays to be word-aligned so that @code{ + constants to character arrays can be done inline. + @end defmac + ++@defmac DATA_ABI_ALIGNMENT (@var{type}, @var{basic-align}) ++Similar to @code{DATA_ALIGNMENT}, but for the cases where the ABI mandates ++some alignment increase, instead of optimization only purposes. E.g.@ ++AMD x86-64 psABI says that variables with array type larger than 15 bytes ++must be aligned to 16 byte boundaries. ++ ++If this macro is not defined, then @var{basic-align} is used. ++@end defmac ++ + @defmac CONSTANT_ALIGNMENT (@var{constant}, @var{basic-align}) + If defined, a C expression to compute the alignment given to a constant + that is being placed in memory. @var{constant} is the constant and +--- gcc/doc/tm.texi (revision 199897) ++++ gcc/doc/tm.texi (revision 199898) +@@ -1078,6 +1078,15 @@ arrays to be word-aligned so that @code{ + constants to character arrays can be done inline. + @end defmac + ++@defmac DATA_ABI_ALIGNMENT (@var{type}, @var{basic-align}) ++Similar to @code{DATA_ALIGNMENT}, but for the cases where the ABI mandates ++some alignment increase, instead of optimization only purposes. E.g.@ ++AMD x86-64 psABI says that variables with array type larger than 15 bytes ++must be aligned to 16 byte boundaries. ++ ++If this macro is not defined, then @var{basic-align} is used. ++@end defmac ++ + @defmac CONSTANT_ALIGNMENT (@var{constant}, @var{basic-align}) + If defined, a C expression to compute the alignment given to a constant + that is being placed in memory. @var{constant} is the constant and +--- gcc/varasm.c (revision 199897) ++++ gcc/varasm.c (revision 199984) +@@ -966,13 +966,80 @@ align_variable (tree decl, bool dont_out + align = MAX_OFILE_ALIGNMENT; + } + +- /* On some machines, it is good to increase alignment sometimes. */ + if (! DECL_USER_ALIGN (decl)) + { ++#ifdef DATA_ABI_ALIGNMENT ++ unsigned int data_abi_align ++ = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), align); ++ /* For backwards compatibility, don't assume the ABI alignment for ++ TLS variables. */ ++ if (! DECL_THREAD_LOCAL_P (decl) || data_abi_align <= BITS_PER_WORD) ++ align = data_abi_align; ++#endif ++ ++ /* On some machines, it is good to increase alignment sometimes. ++ But as DECL_ALIGN is used both for actually emitting the variable ++ and for code accessing the variable as guaranteed alignment, we ++ can only increase the alignment if it is a performance optimization ++ if the references to it must bind to the current definition. */ ++ if (decl_binds_to_current_def_p (decl)) ++ { ++#ifdef DATA_ALIGNMENT ++ unsigned int data_align = DATA_ALIGNMENT (TREE_TYPE (decl), align); ++ /* Don't increase alignment too much for TLS variables - TLS space ++ is too precious. */ ++ if (! DECL_THREAD_LOCAL_P (decl) || data_align <= BITS_PER_WORD) ++ align = data_align; ++#endif ++#ifdef CONSTANT_ALIGNMENT ++ if (DECL_INITIAL (decl) != 0 ++ && DECL_INITIAL (decl) != error_mark_node) ++ { ++ unsigned int const_align ++ = CONSTANT_ALIGNMENT (DECL_INITIAL (decl), align); ++ /* Don't increase alignment too much for TLS variables - TLS ++ space is too precious. */ ++ if (! DECL_THREAD_LOCAL_P (decl) || const_align <= BITS_PER_WORD) ++ align = const_align; ++ } ++#endif ++ } ++ } ++ ++ /* Reset the alignment in case we have made it tighter, so we can benefit ++ from it in get_pointer_alignment. */ ++ DECL_ALIGN (decl) = align; ++} ++ ++/* Return DECL_ALIGN (decl), possibly increased for optimization purposes ++ beyond what align_variable returned. */ ++ ++static unsigned int ++get_variable_align (tree decl) ++{ ++ unsigned int align = DECL_ALIGN (decl); ++ ++ /* For user aligned vars or static vars align_variable already did ++ everything. */ ++ if (DECL_USER_ALIGN (decl) || !TREE_PUBLIC (decl)) ++ return align; ++ ++#ifdef DATA_ABI_ALIGNMENT ++ if (DECL_THREAD_LOCAL_P (decl)) ++ align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), align); ++#endif ++ ++ /* For decls that bind to the current definition, align_variable ++ did also everything, except for not assuming ABI required alignment ++ of TLS variables. For other vars, increase the alignment here ++ as an optimization. */ ++ if (!decl_binds_to_current_def_p (decl)) ++ { ++ /* On some machines, it is good to increase alignment sometimes. */ + #ifdef DATA_ALIGNMENT + unsigned int data_align = DATA_ALIGNMENT (TREE_TYPE (decl), align); + /* Don't increase alignment too much for TLS variables - TLS space +- is too precious. */ ++ is too precious. */ + if (! DECL_THREAD_LOCAL_P (decl) || data_align <= BITS_PER_WORD) + align = data_align; + #endif +@@ -989,9 +1056,7 @@ align_variable (tree decl, bool dont_out + #endif + } + +- /* Reset the alignment in case we have made it tighter, so we can benefit +- from it in get_pointer_alignment. */ +- DECL_ALIGN (decl) = align; ++ return align; + } + + /* Return the section into which the given VAR_DECL or CONST_DECL +@@ -1043,7 +1108,8 @@ get_variable_section (tree decl, bool pr + return bss_noswitch_section; + } + +- return targetm.asm_out.select_section (decl, reloc, DECL_ALIGN (decl)); ++ return targetm.asm_out.select_section (decl, reloc, ++ get_variable_align (decl)); + } + + /* Return the block into which object_block DECL should be placed. */ +@@ -1780,7 +1846,8 @@ emit_bss (tree decl ATTRIBUTE_UNUSED, + unsigned HOST_WIDE_INT rounded ATTRIBUTE_UNUSED) + { + #if defined ASM_OUTPUT_ALIGNED_BSS +- ASM_OUTPUT_ALIGNED_BSS (asm_out_file, decl, name, size, DECL_ALIGN (decl)); ++ ASM_OUTPUT_ALIGNED_BSS (asm_out_file, decl, name, size, ++ get_variable_align (decl)); + return true; + #endif + } +@@ -1796,10 +1863,11 @@ emit_common (tree decl ATTRIBUTE_UNUSED, + { + #if defined ASM_OUTPUT_ALIGNED_DECL_COMMON + ASM_OUTPUT_ALIGNED_DECL_COMMON (asm_out_file, decl, name, +- size, DECL_ALIGN (decl)); ++ size, get_variable_align (decl)); + return true; + #elif defined ASM_OUTPUT_ALIGNED_COMMON +- ASM_OUTPUT_ALIGNED_COMMON (asm_out_file, name, size, DECL_ALIGN (decl)); ++ ASM_OUTPUT_ALIGNED_COMMON (asm_out_file, name, size, ++ get_variable_align (decl)); + return true; + #else + ASM_OUTPUT_COMMON (asm_out_file, name, size, rounded); +@@ -1828,7 +1896,8 @@ emit_tls_common (tree decl ATTRIBUTE_UNU + NAME is the name of DECL's SYMBOL_REF. */ + + static void +-assemble_noswitch_variable (tree decl, const char *name, section *sect) ++assemble_noswitch_variable (tree decl, const char *name, section *sect, ++ unsigned int align) + { + unsigned HOST_WIDE_INT size, rounded; + +@@ -1850,7 +1919,7 @@ assemble_noswitch_variable (tree decl, c + * (BIGGEST_ALIGNMENT / BITS_PER_UNIT)); + + if (!sect->noswitch.callback (decl, name, size, rounded) +- && (unsigned HOST_WIDE_INT) DECL_ALIGN_UNIT (decl) > rounded) ++ && (unsigned HOST_WIDE_INT) (align / BITS_PER_UNIT) > rounded) + warning (0, "requested alignment for %q+D is greater than " + "implemented alignment of %wu", decl, rounded); + } +@@ -1880,7 +1949,7 @@ assemble_variable_contents (tree decl, c + /* Output the actual data. */ + output_constant (DECL_INITIAL (decl), + tree_low_cst (DECL_SIZE_UNIT (decl), 1), +- DECL_ALIGN (decl)); ++ get_variable_align (decl)); + else + /* Leave space for it. */ + assemble_zeros (tree_low_cst (DECL_SIZE_UNIT (decl), 1)); +@@ -1904,6 +1973,7 @@ assemble_variable (tree decl, int top_le + const char *name; + rtx decl_rtl, symbol; + section *sect; ++ unsigned int align; + bool asan_protected = false; + + /* This function is supposed to handle VARIABLES. Ensure we have one. */ +@@ -2003,6 +2073,8 @@ assemble_variable (tree decl, int top_le + + set_mem_align (decl_rtl, DECL_ALIGN (decl)); + ++ align = get_variable_align (decl); ++ + if (TREE_PUBLIC (decl)) + maybe_assemble_visibility (decl); + +@@ -2032,12 +2104,12 @@ assemble_variable (tree decl, int top_le + place_block_symbol (symbol); + } + else if (SECTION_STYLE (sect) == SECTION_NOSWITCH) +- assemble_noswitch_variable (decl, name, sect); ++ assemble_noswitch_variable (decl, name, sect, align); + else + { + switch_to_section (sect); +- if (DECL_ALIGN (decl) > BITS_PER_UNIT) +- ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (DECL_ALIGN_UNIT (decl))); ++ if (align > BITS_PER_UNIT) ++ ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); + assemble_variable_contents (decl, name, dont_output_data); + if (asan_protected) + { +@@ -6709,10 +6781,10 @@ bool + decl_binds_to_current_def_p (tree decl) + { + gcc_assert (DECL_P (decl)); +- if (!TREE_PUBLIC (decl)) +- return true; + if (!targetm.binds_local_p (decl)) + return false; ++ if (!TREE_PUBLIC (decl)) ++ return true; + /* When resolution is available, just use it. */ + if (TREE_CODE (decl) == VAR_DECL + && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))) +@@ -6730,10 +6802,20 @@ decl_binds_to_current_def_p (tree decl) + return resolution_to_local_definition_p (node->symbol.resolution); + } + /* Otherwise we have to assume the worst for DECL_WEAK (hidden weaks +- binds locally but still can be overwritten). ++ binds locally but still can be overwritten), DECL_COMMON (can be merged ++ with a non-common definition somewhere in the same module) or ++ DECL_EXTERNAL. + This rely on fact that binds_local_p behave as decl_replaceable_p + for all other declaration types. */ +- return !DECL_WEAK (decl); ++ if (DECL_WEAK (decl)) ++ return false; ++ if (DECL_COMMON (decl) ++ && (DECL_INITIAL (decl) == NULL ++ || DECL_INITIAL (decl) == error_mark_node)) ++ return false; ++ if (DECL_EXTERNAL (decl)) ++ return false; ++ return true; + } + + /* A replaceable function or variable is one which may be replaced +@@ -6959,7 +7041,7 @@ place_block_symbol (rtx symbol) + else + { + decl = SYMBOL_REF_DECL (symbol); +- alignment = DECL_ALIGN (decl); ++ alignment = get_variable_align (decl); + size = tree_low_cst (DECL_SIZE_UNIT (decl), 1); + if (flag_asan && asan_protect_global (decl)) + { +--- gcc/config/s390/s390.h (revision 199897) ++++ gcc/config/s390/s390.h (revision 199898) +@@ -221,7 +221,7 @@ enum processor_flags + + /* Alignment on even addresses for LARL instruction. */ + #define CONSTANT_ALIGNMENT(EXP, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN) +-#define DATA_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN) ++#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN) + + /* Alignment is not required by the hardware. */ + #define STRICT_ALIGNMENT 0 +--- gcc/config/i386/i386.h (revision 199897) ++++ gcc/config/i386/i386.h (revision 199898) +@@ -859,7 +859,18 @@ enum target_cpu_default + cause character arrays to be word-aligned so that `strcpy' calls + that copy constants to character arrays can be done inline. */ + +-#define DATA_ALIGNMENT(TYPE, ALIGN) ix86_data_alignment ((TYPE), (ALIGN)) ++#define DATA_ALIGNMENT(TYPE, ALIGN) \ ++ ix86_data_alignment ((TYPE), (ALIGN), true) ++ ++/* Similar to DATA_ALIGNMENT, but for the cases where the ABI mandates ++ some alignment increase, instead of optimization only purposes. E.g. ++ AMD x86-64 psABI says that variables with array type larger than 15 bytes ++ must be aligned to 16 byte boundaries. ++ ++ If this macro is not defined, then ALIGN is used. */ ++ ++#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \ ++ ix86_data_alignment ((TYPE), (ALIGN), false) + + /* If defined, a C expression to compute the alignment for a local + variable. TYPE is the data type, and ALIGN is the alignment that +--- gcc/config/i386/i386-protos.h (revision 199897) ++++ gcc/config/i386/i386-protos.h (revision 199898) +@@ -207,7 +207,7 @@ extern void init_cumulative_args (CUMULA + #endif /* RTX_CODE */ + + #ifdef TREE_CODE +-extern int ix86_data_alignment (tree, int); ++extern int ix86_data_alignment (tree, int, bool); + extern unsigned int ix86_local_alignment (tree, enum machine_mode, + unsigned int); + extern unsigned int ix86_minimum_alignment (tree, enum machine_mode, +--- gcc/config/i386/i386.c (revision 199897) ++++ gcc/config/i386/i386.c (revision 199898) +@@ -25292,12 +25292,13 @@ ix86_constant_alignment (tree exp, int a + instead of that alignment to align the object. */ + + int +-ix86_data_alignment (tree type, int align) ++ix86_data_alignment (tree type, int align, bool opt) + { + int max_align + = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT); + +- if (AGGREGATE_TYPE_P (type) ++ if (opt ++ && AGGREGATE_TYPE_P (type) + && TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align +@@ -25309,14 +25310,17 @@ ix86_data_alignment (tree type, int alig + to 16byte boundary. */ + if (TARGET_64BIT) + { +- if (AGGREGATE_TYPE_P (type) +- && TYPE_SIZE (type) +- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST +- && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 +- || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) ++ if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) ++ && TYPE_SIZE (type) ++ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST ++ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 ++ || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) + return 128; + } + ++ if (!opt) ++ return align; ++ + if (TREE_CODE (type) == ARRAY_TYPE) + { + if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) +--- gcc/config/c6x/c6x.h (revision 199897) ++++ gcc/config/c6x/c6x.h (revision 199898) +@@ -134,7 +134,7 @@ extern c6x_cpu_t c6x_arch; + Really only externally visible arrays must be aligned this way, as + only those are directly visible from another compilation unit. But + we don't have that information available here. */ +-#define DATA_ALIGNMENT(TYPE, ALIGN) \ ++#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \ + (((ALIGN) < BITS_PER_UNIT * 8 && TREE_CODE (TYPE) == ARRAY_TYPE) \ + ? BITS_PER_UNIT * 8 : (ALIGN)) + +--- gcc/config/mmix/mmix.h (revision 199897) ++++ gcc/config/mmix/mmix.h (revision 199898) +@@ -164,7 +164,7 @@ struct GTY(()) machine_function + /* Copied from elfos.h. */ + #define MAX_OFILE_ALIGNMENT (32768 * 8) + +-#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN) \ ++#define DATA_ABI_ALIGNMENT(TYPE, BASIC_ALIGN) \ + mmix_data_alignment (TYPE, BASIC_ALIGN) + + #define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN) \ +--- gcc/config/mmix/mmix.c (revision 199897) ++++ gcc/config/mmix/mmix.c (revision 199898) +@@ -313,7 +313,7 @@ mmix_init_machine_status (void) + return ggc_alloc_cleared_machine_function (); + } + +-/* DATA_ALIGNMENT. ++/* DATA_ABI_ALIGNMENT. + We have trouble getting the address of stuff that is located at other + than 32-bit alignments (GETA requirements), so try to give everything + at least 32-bit alignment. */ +--- gcc/testsuite/gcc.target/i386/memcpy-1.c (revision 199897) ++++ gcc/testsuite/gcc.target/i386/memcpy-1.c (revision 199898) +@@ -1,6 +1,6 @@ + /* { dg-do compile } */ + /* { dg-require-effective-target ia32 } */ +-/* { dg-options "-O2 -march=pentiumpro -minline-all-stringops" } */ ++/* { dg-options "-O2 -march=pentiumpro -minline-all-stringops -fno-common" } */ + /* { dg-final { scan-assembler "rep" } } */ + /* { dg-final { scan-assembler "movs" } } */ + /* { dg-final { scan-assembler-not "test" } } */ +--- gcc/testsuite/gcc.target/i386/vect-sizes-1.c (revision 199897) ++++ gcc/testsuite/gcc.target/i386/vect-sizes-1.c (revision 199898) +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -ffast-math -mavx -mtune=generic" } */ ++/* { dg-options "-O3 -ffast-math -mavx -mtune=generic -fno-common" } */ + + double a[1024]; + +--- gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c (revision 199897) ++++ gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c (revision 199898) +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" } */ ++/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store -fno-common" } */ + + #define N 1024 + +--- gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c (revision 199897) ++++ gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c (revision 199898) +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */ ++/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store -fno-common" } */ + + #define N 1024 + +--- gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c (revision 199897) ++++ gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c (revision 199898) +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store -mtune=generic" } */ ++/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store -mtune=generic -fno-common" } */ + + #define N 1024 + +--- gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c (revision 199897) ++++ gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c (revision 199898) +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" } */ ++/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store -fno-common" } */ + + #define N 1024 + +--- gcc/testsuite/gcc.target/i386/pr56564-1.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/pr56564-1.c (revision 199985) +@@ -0,0 +1,26 @@ ++/* PR target/56564 */ ++/* { dg-do compile { target { fpic && lp64 } } } */ ++/* { dg-skip-if "No symbol interposition for PIC" { *-*-mingw* *-*-cygwin* *-*-darwin* } } */ ++/* { dg-options "-O3 -fpic -fdump-tree-optimized" } */ ++ ++struct S { long a, b; } s = { 5, 6 }; ++char t[16] = { 7 }; ++ ++int ++foo (void) ++{ ++ return ((__UINTPTR_TYPE__) &s) & 15; ++} ++ ++int ++bar (void) ++{ ++ return ((__UINTPTR_TYPE__) &t[0]) & 15; ++} ++ ++/* { dg-final { scan-tree-dump-times "&s" 1 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "&t" 0 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "return 0" 1 "optimized" } } */ ++/* { dg-final { scan-assembler ".align\[ \t]*16\[^:]*\[\n\r]s:" { target { *-*-linux* } } } } */ ++/* { dg-final { scan-assembler ".align\[ \t]*16\[^:]*\[\n\r]t:" { target { *-*-linux* } } } } */ ++/* { dg-final { cleanup-tree-dump "optimized" } } */ +--- gcc/testsuite/gcc.target/i386/pr56564-2.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/pr56564-2.c (revision 199898) +@@ -0,0 +1,25 @@ ++/* PR target/56564 */ ++/* { dg-do compile { target { *-*-linux* && lp64 } } } */ ++/* { dg-options "-O3 -fno-pic -fdump-tree-optimized" } */ ++ ++struct S { long a, b; } s = { 5, 6 }; ++char t[16] = { 7 }; ++ ++int ++foo (void) ++{ ++ return ((__UINTPTR_TYPE__) &s) & 15; ++} ++ ++int ++bar (void) ++{ ++ return ((__UINTPTR_TYPE__) &t[0]) & 15; ++} ++ ++/* { dg-final { scan-tree-dump-times "&s" 0 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "&t" 0 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "return 0" 2 "optimized" } } */ ++/* { dg-final { scan-assembler ".align\[ \t]*16\[^:]*\[\n\r]s:" { target { *-*-linux* } } } } */ ++/* { dg-final { scan-assembler ".align\[ \t]*16\[^:]*\[\n\r]t:" { target { *-*-linux* } } } } */ ++/* { dg-final { cleanup-tree-dump "optimized" } } */ +--- gcc/testsuite/gcc.target/i386/pr56564-3.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/pr56564-3.c (revision 199985) +@@ -0,0 +1,29 @@ ++/* PR target/56564 */ ++/* { dg-do compile { target { fpic && lp64 } } } */ ++/* { dg-skip-if "No symbol interposition for PIC" { *-*-mingw* *-*-cygwin* *-*-darwin* } } */ ++/* { dg-options "-O3 -fpic -fdump-tree-optimized" } */ ++ ++__thread struct S { long a, b; } s = { 5, 6 }; ++__thread char t[16] = { 7 }; ++ ++int ++foo (void) ++{ ++ return ((__UINTPTR_TYPE__) &s) & 15; ++} ++ ++/* For backwards compatibility we don't assume that t must ++ be aligned to 16 bytes, but align it anyway. */ ++ ++int ++bar (void) ++{ ++ return ((__UINTPTR_TYPE__) &t[0]) & 15; ++} ++ ++/* { dg-final { scan-tree-dump-times "&s" 1 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "&t" 1 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "return 0" 0 "optimized" } } */ ++/* { dg-final { scan-assembler-not ".align\[ \t]*16\[^:]*\[\n\r]s:" { target { *-*-linux* } } } } */ ++/* { dg-final { scan-assembler ".align\[ \t]*16\[^:]*\[\n\r]t:" { target { *-*-linux* } } } } */ ++/* { dg-final { cleanup-tree-dump "optimized" } } */ +--- gcc/testsuite/gcc.target/i386/pr56564-4.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/pr56564-4.c (revision 199898) +@@ -0,0 +1,22 @@ ++/* PR target/56564 */ ++/* { dg-do compile { target { *-*-linux* && lp64 } } } */ ++/* { dg-options "-O3 -fno-pic -fdump-tree-optimized" } */ ++ ++__thread struct S { long a, b; } s = { 5, 6 }; ++__thread char t[16] = { 7 }; ++ ++int ++foo (void) ++{ ++ return ((__UINTPTR_TYPE__) &s) & 15; ++} ++ ++int ++bar (void) ++{ ++ return ((__UINTPTR_TYPE__) &t[0]) & 15; ++} ++ ++/* { dg-final { scan-assembler-not ".align\[ \t]*16\[^:]*\[\n\r]s:" { target { *-*-linux* } } } } */ ++/* { dg-final { scan-assembler ".align\[ \t]*16\[^:]*\[\n\r]t:" { target { *-*-linux* } } } } */ ++/* { dg-final { cleanup-tree-dump "optimized" } } */ +--- gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-31.c (revision 199897) ++++ gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-31.c (revision 199898) +@@ -18,7 +18,7 @@ struct s{ + struct t e; /* unaligned (offset 2N+4N+4 B) */ + }; + +-struct s tmp; ++struct s tmp = { 1 }; + + int main1 () + { +--- gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c (revision 199897) ++++ gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c (revision 199898) +@@ -18,7 +18,7 @@ struct s{ + struct t e; /* unaligned (offset 2N+4N+4 B) */ + }; + +-struct s tmp; ++struct s tmp = { 1 }; + + int main1 () + { +--- gcc/testsuite/gcc.dg/tree-ssa/loop-19.c (revision 200212) ++++ gcc/testsuite/gcc.dg/tree-ssa/loop-19.c (revision 200213) +@@ -6,7 +6,7 @@ + + /* { dg-do compile { target { i?86-*-* || { x86_64-*-* || powerpc_hard_double } } } } */ + /* { dg-require-effective-target nonpic } */ +-/* { dg-options "-O3 -fno-tree-loop-distribute-patterns -fno-prefetch-loop-arrays -fdump-tree-optimized" } */ ++/* { dg-options "-O3 -fno-tree-loop-distribute-patterns -fno-prefetch-loop-arrays -fdump-tree-optimized -fno-common" } */ + + # define N 2000000 + double a[N],c[N]; diff --git a/gcc48-pr60010.patch b/gcc48-pr60010.patch new file mode 100644 index 0000000..0baa553 --- /dev/null +++ b/gcc48-pr60010.patch @@ -0,0 +1,16 @@ +2014-02-14 Kyle McMartin + + PR pch/60010 + * config/host-linux.c (TRY_EMPTY_VM_SPACE): Define for AArch64. + +--- gcc/config/host-linux.c (revision 207784) ++++ gcc/config/host-linux.c (revision 207785) +@@ -86,6 +86,8 @@ + # define TRY_EMPTY_VM_SPACE 0x60000000 + #elif defined(__mc68000__) + # define TRY_EMPTY_VM_SPACE 0x40000000 ++#elif defined(__aarch64__) ++# define TRY_EMPTY_VM_SPACE 0x1000000000 + #elif defined(__ARM_EABI__) + # define TRY_EMPTY_VM_SPACE 0x60000000 + #elif defined(__mips__) && defined(__LP64__) diff --git a/gcc48-pr62258.patch b/gcc48-pr62258.patch new file mode 100644 index 0000000..4589214 --- /dev/null +++ b/gcc48-pr62258.patch @@ -0,0 +1,87 @@ +2015-09-03 Jonathan Wakely + + Backport from mainline + 2015-04-27 Dmitry Prokoptsev + Michael Hanselmann + + PR libstdc++/62258 + * libsupc++/eh_ptr.cc (rethrow_exception): Increment count of + uncaught exceptions. + * testsuite/18_support/exception_ptr/62258.cc: New. + +--- libstdc++-v3/libsupc++/eh_ptr.cc (revision 227455) ++++ libstdc++-v3/libsupc++/eh_ptr.cc (revision 227456) +@@ -245,6 +245,9 @@ std::rethrow_exception(std::exception_pt + __GXX_INIT_DEPENDENT_EXCEPTION_CLASS(dep->unwindHeader.exception_class); + dep->unwindHeader.exception_cleanup = __gxx_dependent_exception_cleanup; + ++ __cxa_eh_globals *globals = __cxa_get_globals (); ++ globals->uncaughtExceptions += 1; ++ + #ifdef _GLIBCXX_SJLJ_EXCEPTIONS + _Unwind_SjLj_RaiseException (&dep->unwindHeader); + #else +--- libstdc++-v3/testsuite/18_support/exception_ptr/62258.cc (revision 0) ++++ libstdc++-v3/testsuite/18_support/exception_ptr/62258.cc (revision 227456) +@@ -0,0 +1,61 @@ ++// { dg-options "-std=gnu++11" } ++// { dg-require-atomic-builtins "" } ++ ++// Copyright (C) 2015 Free Software Foundation, Inc. ++// ++// This file is part of the GNU ISO C++ Library. This library is free ++// software; you can redistribute it and/or modify it under the ++// terms of the GNU General Public License as published by the ++// Free Software Foundation; either version 3, or (at your option) ++// any later version. ++ ++// This library is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU General Public License for more details. ++ ++// You should have received a copy of the GNU General Public License along ++// with this library; see the file COPYING3. If not see ++// . ++ ++// PR libstdc++/62258 ++ ++#include ++#include ++ ++struct check_on_destruct ++{ ++ ~check_on_destruct(); ++}; ++ ++check_on_destruct::~check_on_destruct() ++{ ++ VERIFY(std::uncaught_exception()); ++} ++ ++int main () ++{ ++ VERIFY(!std::uncaught_exception()); ++ ++ try ++ { ++ check_on_destruct check; ++ ++ try ++ { ++ throw 1; ++ } ++ catch (...) ++ { ++ VERIFY(!std::uncaught_exception()); ++ ++ std::rethrow_exception(std::current_exception()); ++ } ++ } ++ catch (...) ++ { ++ VERIFY(!std::uncaught_exception()); ++ } ++ ++ VERIFY(!std::uncaught_exception()); ++} diff --git a/gcc48-pr63293.patch b/gcc48-pr63293.patch new file mode 100644 index 0000000..4b11a8c --- /dev/null +++ b/gcc48-pr63293.patch @@ -0,0 +1,60 @@ +2014-11-04 Jiong Wang + Wilco Dijkstra + + PR target/63293 + * config/aarch64/aarch64.c (aarch64_expand_epiloue): Add barriers before + stack adjustment. + +--- gcc/config/aarch64/aarch64.c (revision 217090) ++++ gcc/config/aarch64/aarch64.c (revision 217091) +@@ -1989,6 +1989,9 @@ aarch64_expand_epilogue (bool for_sibcal + rtx insn; + rtx cfa_reg; + rtx cfi_ops = NULL; ++ /* We need to add memory barrier to prevent read from deallocated stack. */ ++ bool need_barrier_p = (get_frame_size () != 0 ++ || cfun->machine->saved_varargs_size); + + aarch64_layout_frame (); + original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; +@@ -2030,6 +2033,9 @@ aarch64_expand_epilogue (bool for_sibcal + if (frame_pointer_needed + && (crtl->outgoing_args_size || cfun->calls_alloca)) + { ++ if (cfun->calls_alloca) ++ emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); ++ + insn = emit_insn (gen_add3_insn (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (- fp_offset))); +@@ -2048,6 +2054,9 @@ aarch64_expand_epilogue (bool for_sibcal + /* Restore the frame pointer and lr if the frame pointer is needed. */ + if (offset > 0) + { ++ if (need_barrier_p && (!frame_pointer_needed || !fp_offset)) ++ emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); ++ + if (frame_pointer_needed) + { + rtx mem_fp, mem_lr; +@@ -2067,6 +2076,10 @@ aarch64_expand_epilogue (bool for_sibcal + + UNITS_PER_WORD)); + emit_insn (gen_load_pairdi (reg_fp, mem_fp, reg_lr, mem_lr)); + ++ if (need_barrier_p) ++ emit_insn (gen_stack_tie (stack_pointer_rtx, ++ stack_pointer_rtx)); ++ + insn = emit_insn (gen_add2_insn (stack_pointer_rtx, + GEN_INT (offset))); + } +@@ -2128,6 +2141,9 @@ aarch64_expand_epilogue (bool for_sibcal + + if (frame_size > -1) + { ++ if (need_barrier_p) ++ emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); ++ + if (frame_size >= 0x1000000) + { + rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); diff --git a/gcc48-pr65142.patch b/gcc48-pr65142.patch new file mode 100644 index 0000000..367ec16 --- /dev/null +++ b/gcc48-pr65142.patch @@ -0,0 +1,23 @@ +2016-06-01 Jakub Jelinek + + Backported from mainline + 2015-10-02 Jonathan Wakely + + PR libstdc++/65142 + * src/c++11/random.cc (random_device::_M_getval()): Check read result. + +--- libstdc++-v3/src/c++11/random.cc (revision 228423) ++++ libstdc++-v3/src/c++11/random.cc (revision 228424) +@@ -126,8 +126,10 @@ namespace std _GLIBCXX_VISIBILITY(defaul + #endif + + result_type __ret; +- std::fread(reinterpret_cast(&__ret), sizeof(result_type), +- 1, _M_file); ++ const size_t e = std::fread(reinterpret_cast(&__ret), ++ sizeof(result_type), 1, _M_file); ++ if (e != 1) ++ std::__throw_runtime_error(__N("random_device could not be read")); + return __ret; + } + diff --git a/gcc48-pr66731.patch b/gcc48-pr66731.patch new file mode 100644 index 0000000..daf20b3 --- /dev/null +++ b/gcc48-pr66731.patch @@ -0,0 +1,111 @@ +2015-08-04 Szabolcs Nagy + + Backport from mainline: + 2015-07-06 Szabolcs Nagy + + PR target/66731 + * config/aarch64/aarch64.md (fnmul3): Handle -frounding-math. + + * gcc.target/aarch64/fnmul-1.c: New. + * gcc.target/aarch64/fnmul-2.c: New. + * gcc.target/aarch64/fnmul-3.c: New. + * gcc.target/aarch64/fnmul-4.c: New. + +--- gcc/config/aarch64/aarch64.md (revision 226591) ++++ gcc/config/aarch64/aarch64.md (revision 226592) +@@ -3101,6 +3101,17 @@ + (mult:GPF + (neg:GPF (match_operand:GPF 1 "register_operand" "w")) + (match_operand:GPF 2 "register_operand" "w")))] ++ "TARGET_FLOAT && !flag_rounding_math" ++ "fnmul\\t%0, %1, %2" ++ [(set_attr "v8type" "fmul") ++ (set_attr "mode" "")] ++) ++ ++(define_insn "*fnmul3" ++ [(set (match_operand:GPF 0 "register_operand" "=w") ++ (neg:GPF (mult:GPF ++ (match_operand:GPF 1 "register_operand" "w") ++ (match_operand:GPF 2 "register_operand" "w"))))] + "TARGET_FLOAT" + "fnmul\\t%0, %1, %2" + [(set_attr "v8type" "fmul") +--- gcc/testsuite/gcc.target/aarch64/fnmul-1.c (nonexistent) ++++ gcc/testsuite/gcc.target/aarch64/fnmul-1.c (revision 226592) +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++double ++foo_d (double a, double b) ++{ ++ /* { dg-final { scan-assembler "fnmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */ ++ return -a * b; ++} ++ ++float ++foo_s (float a, float b) ++{ ++ /* { dg-final { scan-assembler "fnmul\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */ ++ return -a * b; ++} +--- gcc/testsuite/gcc.target/aarch64/fnmul-2.c (nonexistent) ++++ gcc/testsuite/gcc.target/aarch64/fnmul-2.c (revision 226592) +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -frounding-math" } */ ++ ++double ++foo_d (double a, double b) ++{ ++ /* { dg-final { scan-assembler "fneg\\td\[0-9\]+, d\[0-9\]+" } } */ ++ /* { dg-final { scan-assembler "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */ ++ return -a * b; ++} ++ ++float ++foo_s (float a, float b) ++{ ++ /* { dg-final { scan-assembler "fneg\\ts\[0-9\]+, s\[0-9\]+" } } */ ++ /* { dg-final { scan-assembler "fmul\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */ ++ return -a * b; ++} +--- gcc/testsuite/gcc.target/aarch64/fnmul-3.c (nonexistent) ++++ gcc/testsuite/gcc.target/aarch64/fnmul-3.c (revision 226592) +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++double ++foo_d (double a, double b) ++{ ++ /* { dg-final { scan-assembler "fnmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */ ++ return -(a * b); ++} ++ ++float ++foo_s (float a, float b) ++{ ++ /* { dg-final { scan-assembler "fnmul\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */ ++ return -(a * b); ++} +--- gcc/testsuite/gcc.target/aarch64/fnmul-4.c (nonexistent) ++++ gcc/testsuite/gcc.target/aarch64/fnmul-4.c (revision 226592) +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -frounding-math" } */ ++ ++double ++foo_d (double a, double b) ++{ ++ /* { dg-final { scan-assembler "fnmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */ ++ return -(a * b); ++} ++ ++float ++foo_s (float a, float b) ++{ ++ /* { dg-final { scan-assembler "fnmul\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */ ++ return -(a * b); ++} diff --git a/gcc48-pr66840.patch b/gcc48-pr66840.patch new file mode 100644 index 0000000..721ee03 --- /dev/null +++ b/gcc48-pr66840.patch @@ -0,0 +1,16 @@ +2015-07-14 Matthias Klose + + PR target/66840 + * config/rs6000/t-rs6000 (TM_H): Add rs6000-cpus.def. + +diff -Nrup a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 +--- /gcc/config/rs6000/t-rs6000 2013-08-14 05:55:11.000000000 -0600 ++++ gcc/config/rs6000/t-rs6000 2018-04-18 12:09:30.614737081 -0600 +@@ -19,6 +19,7 @@ + # . + + TM_H += $(srcdir)/config/rs6000/rs6000-builtin.def ++TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def + + rs6000.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(REGS_H) hard-reg-set.h \ diff --git a/gcc48-pr67281.patch b/gcc48-pr67281.patch new file mode 100644 index 0000000..9637e08 --- /dev/null +++ b/gcc48-pr67281.patch @@ -0,0 +1,348 @@ +2015-10-14 Peter Bergner + Torvald Riegel + + PR target/67281 + * config/rs6000/htm.md (UNSPEC_HTM_FENCE): New. + (tabort, tabortc, tabortci, tbegin, tcheck, tend, + trechkpt, treclaim, tsr, ttest): Rename define_insns from this... + (*tabort, *tabortc, *tabortci, *tbegin, *tcheck, *tend, + *trechkpt, *treclaim, *tsr, *ttest): ...to this. Add memory barrier. + (tabort, tabortc, tabortci, tbegin, tcheck, tend, + trechkpt, treclaim, tsr, ttest): New define_expands. + * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define + __TM_FENCE__ for htm. + * doc/extend.texi: Update documentation for htm builtins. + +2015-08-03 Peter Bergner + + * config/rs6000/htm.md (tabort.): Restrict the source operand to + using a base register. + + * gcc.target/powerpc/htm-tabort-no-r0.c: New test. + +--- gcc/doc/extend.texi (revision 228826) ++++ gcc/doc/extend.texi (revision 228827) +@@ -16092,6 +16092,28 @@ unsigned int __builtin_tresume (void) + unsigned int __builtin_tsuspend (void) + @end smallexample + ++Note that the semantics of the above HTM builtins are required to mimic ++the locking semantics used for critical sections. Builtins that are used ++to create a new transaction or restart a suspended transaction must have ++lock acquisition like semantics while those builtins that end or suspend a ++transaction must have lock release like semantics. Specifically, this must ++mimic lock semantics as specified by C++11, for example: Lock acquisition is ++as-if an execution of __atomic_exchange_n(&globallock,1,__ATOMIC_ACQUIRE) ++that returns 0, and lock release is as-if an execution of ++__atomic_store(&globallock,0,__ATOMIC_RELEASE), with globallock being an ++implicit implementation-defined lock used for all transactions. The HTM ++instructions associated with with the builtins inherently provide the ++correct acquisition and release hardware barriers required. However, ++the compiler must also be prohibited from moving loads and stores across ++the builtins in a way that would violate their semantics. This has been ++accomplished by adding memory barriers to the associated HTM instructions ++(which is a conservative approach to provide acquire and release semantics). ++Earlier versions of the compiler did not treat the HTM instructions as ++memory barriers. A @code{__TM_FENCE__} macro has been added, which can ++be used to determine whether the current compiler treats HTM instructions ++as memory barriers or not. This allows the user to explicitly add memory ++barriers to their code when using an older version of the compiler. ++ + The following set of built-in functions are available to gain access + to the HTM specific special purpose registers. + +--- gcc/config/rs6000/htm.md (revision 226531) ++++ gcc/config/rs6000/htm.md (revision 228827) +@@ -27,6 +27,14 @@ (define_constants + ]) + + ;; ++;; UNSPEC usage ++;; ++ ++(define_c_enum "unspec" ++ [UNSPEC_HTM_FENCE ++ ]) ++ ++;; + ;; UNSPEC_VOLATILE usage + ;; + +@@ -45,96 +53,223 @@ (define_c_enum "unspecv" + UNSPECV_HTM_MTSPR + ]) + ++(define_expand "tabort" ++ [(parallel ++ [(set (match_operand:CC 1 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand:SI 0 "base_reg_operand" "b")] ++ UNSPECV_HTM_TABORT)) ++ (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[2]) = 1; ++}) + +-(define_insn "tabort" ++(define_insn "*tabort" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") +- (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] +- UNSPECV_HTM_TABORT))] ++ (unspec_volatile:CC [(match_operand:SI 0 "base_reg_operand" "b")] ++ UNSPECV_HTM_TABORT)) ++ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabort. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tabortc" ++(define_expand "tabortc" ++ [(parallel ++ [(set (match_operand:CC 3 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") ++ (match_operand:GPR 1 "gpc_reg_operand" "r") ++ (match_operand:GPR 2 "gpc_reg_operand" "r")] ++ UNSPECV_HTM_TABORTXC)) ++ (set (match_dup 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[4] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[4]) = 1; ++}) ++ ++(define_insn "*tabortc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") + (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")] +- UNSPECV_HTM_TABORTXC))] ++ UNSPECV_HTM_TABORTXC)) ++ (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabortc. %0,%1,%2" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tabortci" ++(define_expand "tabortci" ++ [(parallel ++ [(set (match_operand:CC 3 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") ++ (match_operand:GPR 1 "gpc_reg_operand" "r") ++ (match_operand 2 "s5bit_cint_operand" "n")] ++ UNSPECV_HTM_TABORTXCI)) ++ (set (match_dup 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[4] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[4]) = 1; ++}) ++ ++(define_insn "*tabortci" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") + (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand 2 "s5bit_cint_operand" "n")] +- UNSPECV_HTM_TABORTXCI))] ++ UNSPECV_HTM_TABORTXCI)) ++ (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabortci. %0,%1,%2" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tbegin" ++(define_expand "tbegin" ++ [(parallel ++ [(set (match_operand:CC 1 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] ++ UNSPECV_HTM_TBEGIN)) ++ (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[2]) = 1; ++}) ++ ++(define_insn "*tbegin" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] +- UNSPECV_HTM_TBEGIN))] ++ UNSPECV_HTM_TBEGIN)) ++ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tbegin. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tcheck" ++(define_expand "tcheck" ++ [(parallel ++ [(set (match_operand:CC 0 "cc_reg_operand" "=y") ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TCHECK)) ++ (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[1]) = 1; ++}) ++ ++(define_insn "*tcheck" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") +- (unspec_volatile:CC [(const_int 0)] +- UNSPECV_HTM_TCHECK))] ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TCHECK)) ++ (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tcheck %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tend" ++(define_expand "tend" ++ [(parallel ++ [(set (match_operand:CC 1 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] ++ UNSPECV_HTM_TEND)) ++ (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[2]) = 1; ++}) ++ ++(define_insn "*tend" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] +- UNSPECV_HTM_TEND))] ++ UNSPECV_HTM_TEND)) ++ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tend. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "trechkpt" ++(define_expand "trechkpt" ++ [(parallel ++ [(set (match_operand:CC 0 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TRECHKPT)) ++ (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[1]) = 1; ++}) ++ ++(define_insn "*trechkpt" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") +- (unspec_volatile:CC [(const_int 0)] +- UNSPECV_HTM_TRECHKPT))] ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TRECHKPT)) ++ (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "trechkpt." + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "treclaim" ++(define_expand "treclaim" ++ [(parallel ++ [(set (match_operand:CC 1 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] ++ UNSPECV_HTM_TRECLAIM)) ++ (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[2]) = 1; ++}) ++ ++(define_insn "*treclaim" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] +- UNSPECV_HTM_TRECLAIM))] ++ UNSPECV_HTM_TRECLAIM)) ++ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "treclaim. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tsr" ++(define_expand "tsr" ++ [(parallel ++ [(set (match_operand:CC 1 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] ++ UNSPECV_HTM_TSR)) ++ (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[2]) = 1; ++}) ++ ++(define_insn "*tsr" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] +- UNSPECV_HTM_TSR))] ++ UNSPECV_HTM_TSR)) ++ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tsr. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "ttest" ++(define_expand "ttest" ++ [(parallel ++ [(set (match_operand:CC 0 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TTEST)) ++ (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[1]) = 1; ++}) ++ ++(define_insn "*ttest" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") +- (unspec_volatile:CC [(const_int 0)] +- UNSPECV_HTM_TTEST))] ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TTEST)) ++ (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabortwci. 0,1,0" + [(set_attr "type" "htm") +--- gcc/config/rs6000/rs6000-c.c (revision 228826) ++++ gcc/config/rs6000/rs6000-c.c (revision 228827) +@@ -372,7 +372,11 @@ rs6000_target_modify_macros (bool define + if ((flags & OPTION_MASK_VSX) != 0) + rs6000_define_or_undefine_macro (define_p, "__VSX__"); + if ((flags & OPTION_MASK_HTM) != 0) +- rs6000_define_or_undefine_macro (define_p, "__HTM__"); ++ { ++ rs6000_define_or_undefine_macro (define_p, "__HTM__"); ++ /* Tell the user that our HTM insn patterns act as memory barriers. */ ++ rs6000_define_or_undefine_macro (define_p, "__TM_FENCE__"); ++ } + if ((flags & OPTION_MASK_P8_VECTOR) != 0) + rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__"); + if ((flags & OPTION_MASK_QUAD_MEMORY) != 0) +--- gcc/testsuite/gcc.target/powerpc/htm-tabort-no-r0.c (revision 0) ++++ gcc/testsuite/gcc.target/powerpc/htm-tabort-no-r0.c (revision 226532) +@@ -0,0 +1,12 @@ ++/* { dg-do compile { target { powerpc*-*-* } } } */ ++/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ ++/* { dg-require-effective-target powerpc_htm_ok } */ ++/* { dg-options "-O2 -mhtm -ffixed-r3 -ffixed-r4 -ffixed-r5 -ffixed-r6 -ffixed-r7 -ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12" } */ ++ ++/* { dg-final { scan-assembler-not "tabort\\.\[ \t\]0" } } */ ++ ++int ++foo (void) ++{ ++ return __builtin_tabort (10); ++} diff --git a/gcc48-pr68184.patch b/gcc48-pr68184.patch new file mode 100644 index 0000000..445aefd --- /dev/null +++ b/gcc48-pr68184.patch @@ -0,0 +1,59 @@ +2017-02-28 Jakub Jelinek + + Backport from mainline + 2015-12-02 Jan Hubicka + + PR ipa/68184 + * cgraphunit.c (cgraph_node::analyze): Set can_throw_external. + + * g++.dg/torture/pr68184.C: New testcase. + +--- gcc/cgraphunit.c.jj 2014-09-10 09:15:51.000000000 +0200 ++++ gcc/cgraphunit.c 2017-02-28 08:24:44.387385510 +0100 +@@ -626,8 +626,10 @@ cgraph_analyze_function (struct cgraph_n + } + else if (node->thunk.thunk_p) + { +- cgraph_create_edge (node, cgraph_get_node (node->thunk.alias), +- NULL, 0, CGRAPH_FREQ_BASE); ++ struct cgraph_node *t = cgraph_get_node (node->thunk.alias); ++ cgraph_create_edge (node, t, NULL, 0, ++ CGRAPH_FREQ_BASE)->can_throw_external ++ = !TREE_NOTHROW (t->symbol.decl); + } + else if (node->dispatcher_function) + { +--- gcc/testsuite/g++.dg/torture/pr68184.C.jj 2017-02-28 08:26:09.205246069 +0100 ++++ gcc/testsuite/g++.dg/torture/pr68184.C 2015-12-03 16:39:34.589010321 +0100 +@@ -0,0 +1,31 @@ ++// { dg-do run } ++namespace { ++struct IFoo { virtual void foo() = 0; }; ++struct IBar { virtual void bar() = 0; }; ++ ++struct FooBar : private IBar, private IFoo ++{ ++ void call_foo() ++ { ++ try ++ { ++ static_cast(this)->foo(); ++ } ++ catch( ... ) {} ++ } ++ void foo() { throw 1; } ++ void bar() {} ++}; ++ ++void test() ++{ ++ FooBar foobar; ++ foobar.call_foo(); ++} ++} ++int main() ++{ ++ test(); ++ return 0; ++} ++ diff --git a/gcc48-pr68680.patch b/gcc48-pr68680.patch new file mode 100644 index 0000000..59f6ffe --- /dev/null +++ b/gcc48-pr68680.patch @@ -0,0 +1,46 @@ +2015-12-04 Jakub Jelinek + + PR tree-optimization/68680 + * calls.c (special_function_p): Return ECF_MAY_BE_ALLOCA for + BUILT_IN_ALLOCA{,_WITH_ALIGN}. + + * gcc.target/i386/pr68680.c: New test. + +--- gcc/calls.c (revision 231278) ++++ gcc/calls.c (revision 231279) +@@ -564,6 +564,17 @@ special_function_p (const_tree fndecl, i + flags |= ECF_NORETURN; + } + ++ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) ++ switch (DECL_FUNCTION_CODE (fndecl)) ++ { ++ case BUILT_IN_ALLOCA: ++ case BUILT_IN_ALLOCA_WITH_ALIGN: ++ flags |= ECF_MAY_BE_ALLOCA; ++ break; ++ default: ++ break; ++ } ++ + return flags; + } + +--- gcc/testsuite/gcc.target/i386/pr68680.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/pr68680.c (revision 231279) +@@ -0,0 +1,15 @@ ++/* PR tree-optimization/68680 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-protector-strong" } */ ++ ++int foo (char *); ++ ++int ++bar (unsigned long x) ++{ ++ char a[x]; ++ return foo (a); ++} ++ ++/* Verify that this function is stack protected. */ ++/* { dg-final { scan-assembler "stack_chk_fail" } } */ diff --git a/gcc48-pr69116.patch b/gcc48-pr69116.patch new file mode 100644 index 0000000..9b93c79 --- /dev/null +++ b/gcc48-pr69116.patch @@ -0,0 +1,92 @@ +2016-02-10 Jonathan Wakely + + PR libstdc++/69116 + * include/bits/valarray_before.h (__fun, __fun_with_valarray): Only + define result_type for types which can be safely used with valarrays. + * testsuite/26_numerics/valarray/69116.cc: New. + +--- libstdc++-v3/include/bits/valarray_before.h (revision 233264) ++++ libstdc++-v3/include/bits/valarray_before.h (revision 233265) +@@ -331,14 +331,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION + { return pow(__x, __y); } + }; + ++ template ++ struct __fun_with_valarray ++ { ++ typedef _Tp result_type; ++ }; ++ ++ template ++ struct __fun_with_valarray<_Tp, false> ++ { ++ // No result type defined for invalid value types. ++ }; + + // We need these bits in order to recover the return type of + // some functions/operators now that we're no longer using + // function templates. + template +- struct __fun ++ struct __fun : __fun_with_valarray<_Tp> + { +- typedef _Tp result_type; + }; + + // several specializations for relational operators. +--- libstdc++-v3/testsuite/26_numerics/valarray/69116.cc (nonexistent) ++++ libstdc++-v3/testsuite/26_numerics/valarray/69116.cc (revision 233265) +@@ -0,0 +1,53 @@ ++// Copyright (C) 2016 Free Software Foundation, Inc. ++// ++// This file is part of the GNU ISO C++ Library. This library is free ++// software; you can redistribute it and/or modify it under the ++// terms of the GNU General Public License as published by the ++// Free Software Foundation; either version 3, or (at your option) ++// any later version. ++ ++// This library is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU General Public License for more details. ++ ++// You should have received a copy of the GNU General Public License along ++// with this library; see the file COPYING3. If not see ++// . ++ ++// { dg-do compile } ++// { dg-options "-std=gnu++98" } ++ ++// libstdc++/69116 ++ ++#include ++#include ++ ++template ++ void foo(const T&) { } ++ ++struct X : std::exception // makes namespace std an associated namespace ++{ ++ virtual void pure() = 0; ++ ++ typedef void(*func_type)(const X&); ++ ++ void operator+(func_type) const; ++ void operator-(func_type) const; ++ void operator*(func_type) const; ++ void operator/(func_type) const; ++ void operator%(func_type) const; ++ void operator<<(func_type) const; ++ void operator>>(func_type) const; ++}; ++ ++void foo(X& x) ++{ ++ x + foo; ++ x - foo; ++ x * foo; ++ x / foo; ++ x % foo; ++ x << foo; ++ x >> foo; ++} diff --git a/gcc48-pr69644.patch b/gcc48-pr69644.patch new file mode 100644 index 0000000..d6731e5 --- /dev/null +++ b/gcc48-pr69644.patch @@ -0,0 +1,51 @@ +2016-02-04 Jakub Jelinek + + Backported from mainline + 2016-02-03 Jakub Jelinek + + PR target/69644 + * config/rs6000/rs6000.c (rs6000_expand_atomic_compare_and_swap): + Force oldval into register if it does not satisfy reg_or_short_operand + predicate. Fix up formatting. + + * gcc.dg/pr69644.c: New test. + +--- gcc/config/rs6000/rs6000.c ++++ gcc/config/rs6000/rs6000.c +@@ -20263,6 +20263,9 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) + else if (reg_overlap_mentioned_p (retval, oldval)) + oldval = copy_to_reg (oldval); + ++ if (mode != TImode && !reg_or_short_operand (oldval, mode)) ++ oldval = copy_to_mode_reg (mode, oldval); ++ + mem = rs6000_pre_atomic_barrier (mem, mod_s); + + label1 = NULL_RTX; +@@ -20277,10 +20280,8 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[]) + + x = retval; + if (mask) +- { +- x = expand_simple_binop (SImode, AND, retval, mask, +- NULL_RTX, 1, OPTAB_LIB_WIDEN); +- } ++ x = expand_simple_binop (SImode, AND, retval, mask, ++ NULL_RTX, 1, OPTAB_LIB_WIDEN); + + cond = gen_reg_rtx (CCmode); + /* If we have TImode, synthesize a comparison. */ +--- /dev/null ++++ gcc/testsuite/gcc.dg/pr69644.c +@@ -0,0 +1,11 @@ ++/* PR target/69644 */ ++/* { dg-do compile } */ ++ ++int ++main () ++{ ++ unsigned short x = 0x8000; ++ if (!__sync_bool_compare_and_swap (&x, 0x8000, 0) || x) ++ __builtin_abort (); ++ return 0; ++} diff --git a/gcc48-pr70549.patch b/gcc48-pr70549.patch new file mode 100644 index 0000000..429893f --- /dev/null +++ b/gcc48-pr70549.patch @@ -0,0 +1,61 @@ +2017-03-08 Bernd Schmidt + + PR target/70549 + * config/aarch64/aarch64.c (aarch64_secondary_reload): Reload + CORE_REGS rclass constants in [SD]Fmode through FP_REGS. + + * g++.dg/opt/pr70549.C: New test. + +--- gcc/config/aarch64/aarch64.c.jj 2017-03-08 15:50:55.000000000 +0100 ++++ gcc/config/aarch64/aarch64.c 2017-03-08 16:01:15.426080172 +0100 +@@ -3846,8 +3846,13 @@ aarch64_secondary_reload (bool in_p ATTR + && GET_MODE_SIZE (mode) == 16 && MEM_P (x)) + return FP_REGS; + ++ if (rclass == CORE_REGS ++ && (mode == SFmode || mode == DFmode) ++ && CONSTANT_P (x)) ++ return FP_REGS; ++ + if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) +- return CORE_REGS; ++ return CORE_REGS; + + return NO_REGS; + } +--- gcc/testsuite/g++.dg/opt/pr70549.C.jj 2017-03-08 16:02:45.104918249 +0100 ++++ gcc/testsuite/g++.dg/opt/pr70549.C 2017-03-08 16:02:14.000000000 +0100 +@@ -0,0 +1,33 @@ ++// PR target/70549 ++// { dg-do compile } ++// { dg-options "-O2" } ++// { dg-additional-options "-fPIC" { target fpic } } ++ ++struct A { float x; float y; }; ++A a, b, c; ++int d, e; ++A bar (); ++void foo (A, A); ++inline A operator/ (A, A p2) { if (p2.x) return a; } ++struct B { A dval; }; ++int baz (A, B, A, int); ++ ++void ++test () ++{ ++ B q; ++ A f, g, h, k; ++ h.x = 1.0; ++ f = h; ++ struct A i, j = f; ++ do { ++ i = bar (); ++ g = i / j; ++ foo (g, c); ++ int l = baz (k, q, b, e); ++ if (l) ++ goto cleanup; ++ j = i; ++ } while (d); ++cleanup:; ++} diff --git a/gcc48-pr72717.patch b/gcc48-pr72717.patch new file mode 100644 index 0000000..0aa4d2e --- /dev/null +++ b/gcc48-pr72717.patch @@ -0,0 +1,87 @@ +2016-12-13 Michael Meissner + + Backport from mainline + 2016-12-07 Michael Meissner + + PR target/72717 + * config/rs6000/rs6000.c (rs6000_expand_vector_init): If the + V2DImode elements are SUBREG's convert the result into DImode + rather than failing in emit_move_insn. + +--- gcc/testsuite/gcc.target/powerpc/pr72717.c (nonexistent) ++++ gcc/testsuite/gcc.target/powerpc/pr72717.c (revision 243626) +@@ -0,0 +1,18 @@ ++/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ ++/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ ++/* { dg-require-effective-target powerpc_p8vector_ok } */ ++/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ ++/* { dg-options "-mcpu=power8 -O2" } */ ++ ++typedef long V __attribute__((__vector_size__(32))); ++ ++extern void foo (V *, V*); ++ ++/* This test generated an failure in emit_move_insn. */ ++ ++void ++foo(V *p, V *q) ++{ ++ V v = *q; ++ *p = v << v[0]; ++} +--- gcc/config/rs6000/rs6000.c (revision 243625) ++++ gcc/config/rs6000/rs6000.c (revision 243626) +@@ -6667,25 +6667,43 @@ + /* Double word values on VSX can use xxpermdi or lxvdsx. */ + if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode)) + { +- rtx op0 = XVECEXP (vals, 0, 0); +- rtx op1 = XVECEXP (vals, 0, 1); ++ rtx op[2]; ++ size_t i; ++ size_t num_elements = (all_same) ? 1 : 2; ++ for (i = 0; i < num_elements; i++) ++ { ++ op[i] = XVECEXP (vals, 0, i); ++ /* Just in case there is a SUBREG with a smaller mode, do a ++ conversion. */ ++ if (GET_MODE (op[i]) != inner_mode) ++ { ++ rtx tmp = gen_reg_rtx (inner_mode); ++ convert_move (tmp, op[i], 0); ++ op[i] = tmp; ++ } ++ /* Allow load with splat double word. */ ++ else if (MEM_P (op[i])) ++ { ++ if (!all_same) ++ op[i] = force_reg (inner_mode, op[i]); ++ } ++ else if (!REG_P (op[i])) ++ op[i] = force_reg (inner_mode, op[i]); ++ } ++ + if (all_same) + { +- if (!MEM_P (op0) && !REG_P (op0)) +- op0 = force_reg (inner_mode, op0); + if (mode == V2DFmode) +- emit_insn (gen_vsx_splat_v2df (target, op0)); ++ emit_insn (gen_vsx_splat_v2df (target, op[0])); + else +- emit_insn (gen_vsx_splat_v2di (target, op0)); ++ emit_insn (gen_vsx_splat_v2di (target, op[0])); + } + else + { +- op0 = force_reg (inner_mode, op0); +- op1 = force_reg (inner_mode, op1); + if (mode == V2DFmode) +- emit_insn (gen_vsx_concat_v2df (target, op0, op1)); ++ emit_insn (gen_vsx_concat_v2df (target, op[0], op[1])); + else +- emit_insn (gen_vsx_concat_v2di (target, op0, op1)); ++ emit_insn (gen_vsx_concat_v2di (target, op[0], op[1])); + } + return; + } diff --git a/gcc48-pr72747.patch b/gcc48-pr72747.patch new file mode 100644 index 0000000..4216ada --- /dev/null +++ b/gcc48-pr72747.patch @@ -0,0 +1,90 @@ +2016-11-02 Will Schmidt + + Backport from trunk + 2016-10-26 Will Schmidt + + PR middle-end/72747 + * gimplify.c (gimplify_init_constructor): Move emit of constructor + assignment to earlier in the if/else logic. + + * c-c++-common/pr72747-1.c: New test. + * c-c++-common/pr72747-2.c: Likewise. + +--- gcc/gimplify.c (revision 241792) ++++ gcc/gimplify.c (revision 241793) +@@ -4273,24 +4273,23 @@ gimplify_init_constructor (tree *expr_p, + + if (ret == GS_ERROR) + return GS_ERROR; +- else if (want_value) ++ /* If we have gimplified both sides of the initializer but have ++ not emitted an assignment, do so now. */ ++ if (*expr_p) ++ { ++ tree lhs = TREE_OPERAND (*expr_p, 0); ++ tree rhs = TREE_OPERAND (*expr_p, 1); ++ gimple init = gimple_build_assign (lhs, rhs); ++ gimplify_seq_add_stmt (pre_p, init); ++ } ++ if (want_value) + { + *expr_p = object; + return GS_OK; + } + else + { +- /* If we have gimplified both sides of the initializer but have +- not emitted an assignment, do so now. */ +- if (*expr_p) +- { +- tree lhs = TREE_OPERAND (*expr_p, 0); +- tree rhs = TREE_OPERAND (*expr_p, 1); +- gimple init = gimple_build_assign (lhs, rhs); +- gimplify_seq_add_stmt (pre_p, init); +- *expr_p = NULL; +- } +- ++ *expr_p = NULL; + return GS_ALL_DONE; + } + } +--- gcc/testsuite/c-c++-common/pr72747-1.c (nonexistent) ++++ gcc/testsuite/c-c++-common/pr72747-1.c (revision 241793) +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target powerpc_altivec_ok } */ ++/* { dg-options "-maltivec -fdump-tree-gimple" } */ ++ ++/* PR 72747: Test that cascaded definition is happening for constant vectors. */ ++ ++#include ++ ++int main (int argc, char *argv[]) ++{ ++ __vector int v1,v2; ++ v1 = v2 = vec_splats ((int) 42); ++ return 0; ++} ++/* { dg-final { scan-tree-dump-times " v2 = { 42, 42, 42, 42 }" 1 "gimple" } } */ ++ +--- gcc/testsuite/c-c++-common/pr72747-2.c (nonexistent) ++++ gcc/testsuite/c-c++-common/pr72747-2.c (revision 241793) +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target powerpc_altivec_ok } */ ++/* { dg-options "-c -maltivec -fdump-tree-gimple" } */ ++ ++/* PR 72747: test that cascaded definition is happening for non constants. */ ++ ++void foo () ++{ ++ extern int i; ++ __vector int v,w; ++ v = w = (vector int) { i }; ++} ++ ++int main (int argc, char *argv[]) ++{ ++ return 0; ++} ++/* { dg-final { scan-tree-dump-times " w = " 1 "gimple" } } */ diff --git a/gcc48-pr72863.patch b/gcc48-pr72863.patch new file mode 100644 index 0000000..87547f7 --- /dev/null +++ b/gcc48-pr72863.patch @@ -0,0 +1,73 @@ +2016-08-25 Bill Schmidt + + Backport from mainline (minus test for POWER9 support) + 2016-08-11 Bill Schmidt + + PR target/72863 + * vsx.md (vsx_load_): For P8LE, emit swaps at expand time. + (vsx_store_): Likewise. + + * gcc.target/powerpc/pr72863.c: New test. + +--- gcc/config/rs6000/vsx.md (revision 239761) ++++ gcc/config/rs6000/vsx.md (revision 239762) +@@ -716,13 +716,27 @@ (define_expand "vsx_load_" + [(set (match_operand:VSX_M 0 "vsx_register_operand" "") + (match_operand:VSX_M 1 "memory_operand" ""))] + "VECTOR_MEM_VSX_P (mode)" +- "") ++{ ++ /* Expand to swaps if needed, prior to swap optimization. */ ++ if (!BYTES_BIG_ENDIAN) ++ { ++ rs6000_emit_le_vsx_move (operands[0], operands[1], mode); ++ DONE; ++ } ++}) + + (define_expand "vsx_store_" + [(set (match_operand:VSX_M 0 "memory_operand" "") + (match_operand:VSX_M 1 "vsx_register_operand" ""))] + "VECTOR_MEM_VSX_P (mode)" +- "") ++{ ++ /* Expand to swaps if needed, prior to swap optimization. */ ++ if (!BYTES_BIG_ENDIAN) ++ { ++ rs6000_emit_le_vsx_move (operands[0], operands[1], mode); ++ DONE; ++ } ++}) + + + ;; VSX vector floating point arithmetic instructions. The VSX scalar +--- gcc/testsuite/gcc.target/powerpc/pr72863.c (nonexistent) ++++ gcc/testsuite/gcc.target/powerpc/pr72863.c (revision 239762) +@@ -0,0 +1,27 @@ ++/* { dg-do compile { target { powerpc64le-*-* } } } */ ++/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ ++/* { dg-options "-mcpu=power8 -O3" } */ ++/* { dg-final { scan-assembler "lxvd2x" } } */ ++/* { dg-final { scan-assembler "stxvd2x" } } */ ++/* { dg-final { scan-assembler-not "xxpermdi" } } */ ++ ++#include ++ ++extern unsigned char *src, *dst; ++ ++void b(void) ++{ ++ int i; ++ ++ unsigned char *s8 = src; ++ unsigned char *d8 = dst; ++ ++ for (i = 0; i < 100; i++) { ++ vector unsigned char vs = vec_vsx_ld(0, s8); ++ vector unsigned char vd = vec_vsx_ld(0, d8); ++ vector unsigned char vr = vec_xor(vs, vd); ++ vec_vsx_st(vr, 0, d8); ++ s8 += 16; ++ d8 += 16; ++ } ++} diff --git a/gcc48-pr77375.patch b/gcc48-pr77375.patch new file mode 100644 index 0000000..09158e2 --- /dev/null +++ b/gcc48-pr77375.patch @@ -0,0 +1,37 @@ +2017-05-30 Jakub Jelinek + + Backported from mainline + 2016-09-16 Jakub Jelinek + + PR c++/77375 + * class.c (check_bases): Set CLASSTYPE_HAS_MUTABLE if any + TYPE_HAS_MUTABLE_P for any bases. + + * g++.dg/cpp0x/mutable1.C: New test. + +--- gcc/cp/class.c 2017-10-17 17:27:32.287980595 +0200 ++++ gcc/cp/class.c 2017-10-17 17:29:11.104213281 +0200 +@@ -1479,6 +1479,8 @@ check_bases (tree t, + |= CLASSTYPE_CONTAINS_EMPTY_CLASS_P (basetype); + TYPE_HAS_COMPLEX_DFLT (t) |= (!TYPE_HAS_DEFAULT_CONSTRUCTOR (basetype) + || TYPE_HAS_COMPLEX_DFLT (basetype)); ++ if (TYPE_HAS_MUTABLE_P (basetype)) ++ CLASSTYPE_HAS_MUTABLE (t) = 1; + + /* A standard-layout class is a class that: + ... +--- /dev/null ++++ gcc/testsuite/g++.dg/cpp0x/mutable1.C +@@ -0,0 +1,12 @@ ++// PR c++/77375 ++// { dg-do run { target c++11 } } ++ ++struct Base { mutable int i; }; ++struct Derived : Base {}; ++const Derived foo{}; ++ ++int ++main () ++{ ++ foo.i = 42; ++} diff --git a/gcc48-pr77767.patch b/gcc48-pr77767.patch new file mode 100644 index 0000000..ba80ac1 --- /dev/null +++ b/gcc48-pr77767.patch @@ -0,0 +1,56 @@ +2017-05-30 Jakub Jelinek + + Backported from mainline + 2016-12-21 Jakub Jelinek + + PR c/77767 + * c-decl.c (grokdeclarator): If *expr is non-NULL, append expression + to *expr instead of overwriting it. + + * gcc.c-torture/execute/pr77767.c: New test. + +--- gcc/c/c-decl.c ++++ gcc/c/c-decl.c +@@ -5409,11 +5409,21 @@ grokdeclarator (const struct c_declarator *declarator, + if (TREE_CODE (type) == ERROR_MARK) + return error_mark_node; + if (expr == NULL) +- expr = &expr_dummy; ++ { ++ expr = &expr_dummy; ++ expr_dummy = NULL_TREE; ++ } + if (expr_const_operands == NULL) + expr_const_operands = &expr_const_operands_dummy; + +- *expr = declspecs->expr; ++ if (declspecs->expr) ++ { ++ if (*expr) ++ *expr = build2 (COMPOUND_EXPR, TREE_TYPE (declspecs->expr), *expr, ++ declspecs->expr); ++ else ++ *expr = declspecs->expr; ++ } + *expr_const_operands = declspecs->expr_const_operands; + + if (decl_context == FUNCDEF) +--- /dev/null ++++ gcc/testsuite/gcc.c-torture/execute/pr77767.c +@@ -0,0 +1,16 @@ ++/* PR c/77767 */ ++ ++void ++foo (int a, int b[a++], int c, int d[c++]) ++{ ++ if (a != 2 || c != 2) ++ __builtin_abort (); ++} ++ ++int ++main () ++{ ++ int e[10]; ++ foo (1, e, 1, e); ++ return 0; ++} diff --git a/gcc48-pr78064.patch b/gcc48-pr78064.patch new file mode 100644 index 0000000..0773493 --- /dev/null +++ b/gcc48-pr78064.patch @@ -0,0 +1,15 @@ +2016-10-24 Florian Weimer + + PR libgcc/78064 + * unwind-c.c: Include auto-target.h. + +--- libgcc/unwind-c.c (revision 241490) ++++ libgcc/unwind-c.c (revision 241491) +@@ -26,6 +26,7 @@ see the files COPYING3 and COPYING.RUNTI + + #include "tconfig.h" + #include "tsystem.h" ++#include "auto-target.h" + #include "unwind.h" + #define NO_SIZE_OF_ENCODED_VALUE + #include "unwind-pe.h" diff --git a/gcc48-pr78378.patch b/gcc48-pr78378.patch new file mode 100644 index 0000000..bc78b81 --- /dev/null +++ b/gcc48-pr78378.patch @@ -0,0 +1,43 @@ +2017-05-30 Jakub Jelinek + + Backported from mainline + 2016-11-16 Jakub Jelinek + + PR rtl-optimization/78378 + * combine.c (make_extraction): Use force_to_mode for non-{REG,MEM} + inner only if pos is 0. + + * gcc.c-torture/execute/pr78378.c: New test. + +--- gcc/combine.c ++++ gcc/combine.c +@@ -7342,6 +7342,7 @@ make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos, + if (tmode != BLKmode + && ((pos_rtx == 0 && (pos % BITS_PER_WORD) == 0 + && !MEM_P (inner) ++ && (pos == 0 || REG_P (inner)) + && (inner_mode == tmode + || !REG_P (inner) + || TRULY_NOOP_TRUNCATION_MODES_P (tmode, inner_mode) +--- /dev/null ++++ gcc/testsuite/gcc.c-torture/execute/pr78378.c +@@ -0,0 +1,18 @@ ++/* PR rtl-optimization/78378 */ ++ ++unsigned long long __attribute__ ((noinline, noclone)) ++foo (unsigned long long x) ++{ ++ x <<= 41; ++ x /= 232; ++ return 1 + (unsigned short) x; ++} ++ ++int ++main () ++{ ++ unsigned long long x = foo (1); ++ if (x != 0x2c24) ++ __builtin_abort(); ++ return 0; ++} + diff --git a/gcc48-pr78416.patch b/gcc48-pr78416.patch new file mode 100644 index 0000000..76e6e93 --- /dev/null +++ b/gcc48-pr78416.patch @@ -0,0 +1,108 @@ +2016-11-18 Jakub Jelinek + + PR middle-end/78416 + * expmed.c (expand_divmod): For modes wider than HWI, take into + account implicit 1 bits above msb for EXACT_POWER_OF_2_OR_ZERO_P. + + * gcc.dg/torture/pr78416.c: New test. + +--- gcc/expmed.c ++++ gcc/expmed.c +@@ -3844,7 +3844,15 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, + if (unsignedp) + ext_op1 &= GET_MODE_MASK (mode); + op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1) +- || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1)))); ++ /* If mode is wider than HWI and op1 has msb set, ++ then it has there extra implicit 1 bits above it. */ ++ && (GET_MODE_PRECISION (mode) <= HOST_BITS_PER_WIDE_INT ++ || INTVAL (op1) >= 0)) ++ || (! unsignedp ++ && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1) ++ && (GET_MODE_PRECISION (mode) ++ <= HOST_BITS_PER_WIDE_INT ++ || INTVAL (op1) < 0))); + } + + /* +@@ -3987,8 +3995,17 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, + op1_is_constant = CONST_INT_P (op1); + op1_is_pow2 = (op1_is_constant + && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) +- || (! unsignedp +- && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1)))))); ++ /* If mode is wider than HWI and op1 has msb set, ++ then it has there extra implicit 1 bits above ++ it. */ ++ && (GET_MODE_PRECISION (compute_mode) ++ <= HOST_BITS_PER_WIDE_INT ++ || INTVAL (op1) >= 0)) ++ || (! unsignedp ++ && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1)) ++ && (GET_MODE_PRECISION (compute_mode) ++ <= HOST_BITS_PER_WIDE_INT ++ || INTVAL (op1) < 0)))); + } + + /* If one of the operands is a volatile MEM, copy it into a register. */ +@@ -4031,7 +4048,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, + unsigned HOST_WIDE_INT d = (INTVAL (op1) + & GET_MODE_MASK (compute_mode)); + +- if (EXACT_POWER_OF_2_OR_ZERO_P (d)) ++ if (EXACT_POWER_OF_2_OR_ZERO_P (d) ++ && (INTVAL (op1) >= 0 || size <= HOST_BITS_PER_WIDE_INT)) + { + pre_shift = floor_log2 (d); + if (rem_flag) +@@ -4179,6 +4197,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, + goto fail1; + } + else if (EXACT_POWER_OF_2_OR_ZERO_P (d) ++ && (size <= HOST_BITS_PER_WIDE_INT || d >= 0) + && (rem_flag + ? smod_pow2_cheap (speed, compute_mode) + : sdiv_pow2_cheap (speed, compute_mode)) +@@ -4192,7 +4211,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, + compute_mode) + != CODE_FOR_nothing))) + ; +- else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) ++ else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d) ++ && (size <= HOST_BITS_PER_WIDE_INT ++ || abs_d != (unsigned HOST_WIDE_INT) d)) + { + if (rem_flag) + { +@@ -4504,7 +4525,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, + case CEIL_MOD_EXPR: + if (unsignedp) + { +- if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))) ++ if (op1_is_constant ++ && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) ++ && (size <= HOST_BITS_PER_WIDE_INT ++ || INTVAL (op1) >= 0)) + { + rtx t1, t2, t3; + unsigned HOST_WIDE_INT d = INTVAL (op1); +--- gcc/testsuite/gcc.dg/torture/pr78416.c ++++ gcc/testsuite/gcc.dg/torture/pr78416.c +@@ -0,0 +1,17 @@ ++/* PR middle-end/78416 */ ++/* { dg-do run { target int128 } } */ ++ ++int ++main () ++{ ++ unsigned __int128 x; ++ x = 0xFFFFFFFFFFFFFFFFULL; ++ x /= ~0x7FFFFFFFFFFFFFFFLL; ++ if (x != 0) ++ __builtin_abort (); ++ x = ~0x7FFFFFFFFFFFFFFELL; ++ x /= ~0x7FFFFFFFFFFFFFFFLL; ++ if (x != 1) ++ __builtin_abort (); ++ return 0; ++} diff --git a/gcc48-pr78796.patch b/gcc48-pr78796.patch new file mode 100644 index 0000000..6afd47a --- /dev/null +++ b/gcc48-pr78796.patch @@ -0,0 +1,56 @@ +2016-12-14 Wilco Dijkstra + Jakub Jelinek + + PR target/78796 + * config/aarch64/aarch64.c (aarch64_classify_symbol): Merge large + model checks into switch. + + * gcc.dg/tls/pr78796.c: New test. + +--- gcc/config/aarch64/aarch64.c (revision 243645) ++++ gcc/config/aarch64/aarch64.c (revision 243646) +@@ -4986,6 +4986,9 @@ aarch64_classify_symbol (rtx x, + switch (aarch64_cmodel) + { + case AARCH64_CMODEL_LARGE: ++ if (aarch64_tls_symbol_p (x)) ++ return aarch64_classify_tls_symbol (x); ++ + return SYMBOL_FORCE_TO_MEM; + + case AARCH64_CMODEL_TINY: +--- gcc/testsuite/gcc.dg/tls/pr78796.c (nonexistent) ++++ gcc/testsuite/gcc.dg/tls/pr78796.c (revision 243646) +@@ -0,0 +1,32 @@ ++/* PR target/78796 */ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++/* { dg-additional-options "-mcmodel=large" { target aarch64-*-* } } */ ++/* { dg-require-effective-target tls } */ ++ ++struct S { int a, b, c, d, e; }; ++struct S t; ++__thread struct S s; ++ ++__attribute__((used, noinline, noclone)) void ++foo (int *x, int *y) ++{ ++ asm volatile ("" : : "g" (x), "g" (y) : "memory"); ++ if (*x != 1 || *y != 2) ++ __builtin_abort (); ++} ++ ++__attribute__((used, noinline, noclone)) void ++bar (void) ++{ ++ foo (&t.c, &s.c); ++} ++ ++int ++main () ++{ ++ t.c = 1; ++ s.c = 2; ++ bar (); ++ return 0; ++} diff --git a/gcc48-pr78875.patch b/gcc48-pr78875.patch new file mode 100644 index 0000000..12dd0eb --- /dev/null +++ b/gcc48-pr78875.patch @@ -0,0 +1,254 @@ +2017-01-17 Segher Boessenkool + + PR target/78875 + * config/rs6000/rs6000-opts.h (stack_protector_guard): New enum. + * config/rs6000/rs6000.c (rs6000_option_override_internal): Handle + the new options. + * config/rs6000/rs6000.md (stack_protect_set): Handle the new more + flexible settings. + (stack_protect_test): Ditto. + * config/rs6000/rs6000.opt (mstack-protector-guard=, + mstack-protector-guard-reg=, mstack-protector-guard-offset=): New + options. + * doc/invoke.texi (Option Summary) [RS/6000 and PowerPC Options]: + Add -mstack-protector-guard=, -mstack-protector-guard-reg=, and + -mstack-protector-guard-offset=. + (RS/6000 and PowerPC Options): Ditto. + + * gcc.target/powerpc/ssp-1.c: New testcase. + * gcc.target/powerpc/ssp-2.c: New testcase. + +--- gcc/config/rs6000/rs6000.opt (revision 244555) ++++ gcc/config/rs6000/rs6000.opt (revision 244556) +@@ -593,3 +593,31 @@ Allow float variables in upper registers + moptimize-swaps + Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save + Analyze and remove doubleword swaps from VSX computations. ++ ++mstack-protector-guard= ++Target RejectNegative Joined Enum(stack_protector_guard) Var(rs6000_stack_protector_guard) Init(SSP_TLS) ++Use given stack-protector guard. ++ ++Enum ++Name(stack_protector_guard) Type(enum stack_protector_guard) ++Valid arguments to -mstack-protector-guard=: ++ ++EnumValue ++Enum(stack_protector_guard) String(tls) Value(SSP_TLS) ++ ++EnumValue ++Enum(stack_protector_guard) String(global) Value(SSP_GLOBAL) ++ ++mstack-protector-guard-reg= ++Target RejectNegative Joined Var(rs6000_stack_protector_guard_reg_str) ++Use the given base register for addressing the stack-protector guard. ++ ++TargetVariable ++int rs6000_stack_protector_guard_reg = 0 ++ ++mstack-protector-guard-offset= ++Target RejectNegative Joined Integer Var(rs6000_stack_protector_guard_offset_str) ++Use the given offset for addressing the stack-protector guard. ++ ++TargetVariable ++long rs6000_stack_protector_guard_offset = 0 +--- gcc/config/rs6000/rs6000.c (revision 244555) ++++ gcc/config/rs6000/rs6000.c (revision 244556) +@@ -3727,6 +3727,54 @@ rs6000_option_override_internal (bool gl + atoi (rs6000_sched_insert_nops_str)); + } + ++ /* Handle stack protector */ ++ if (!global_options_set.x_rs6000_stack_protector_guard) ++#ifdef TARGET_THREAD_SSP_OFFSET ++ rs6000_stack_protector_guard = SSP_TLS; ++#else ++ rs6000_stack_protector_guard = SSP_GLOBAL; ++#endif ++ ++#ifdef TARGET_THREAD_SSP_OFFSET ++ rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; ++ rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2; ++#endif ++ ++ if (global_options_set.x_rs6000_stack_protector_guard_offset_str) ++ { ++ char *endp; ++ const char *str = rs6000_stack_protector_guard_offset_str; ++ ++ errno = 0; ++ long offset = strtol (str, &endp, 0); ++ if (!*str || *endp || errno) ++ error ("%qs is not a valid number " ++ "in -mstack-protector-guard-offset=", str); ++ ++ if (!IN_RANGE (offset, -0x8000, 0x7fff) ++ || (TARGET_64BIT && (offset & 3))) ++ error ("%qs is not a valid offset " ++ "in -mstack-protector-guard-offset=", str); ++ ++ rs6000_stack_protector_guard_offset = offset; ++ } ++ ++ if (global_options_set.x_rs6000_stack_protector_guard_reg_str) ++ { ++ const char *str = rs6000_stack_protector_guard_reg_str; ++ int reg = decode_reg_name (str); ++ ++ if (!IN_RANGE (reg, 1, 31)) ++ error ("%qs is not a valid base register " ++ "in -mstack-protector-guard-reg=", str); ++ ++ rs6000_stack_protector_guard_reg = reg; ++ } ++ ++ if (rs6000_stack_protector_guard == SSP_TLS ++ && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31)) ++ error ("-mstack-protector-guard=tls needs a valid base register"); ++ + if (global_init_p) + { + #ifdef TARGET_REGNAMES +--- gcc/config/rs6000/rs6000.md (revision 244555) ++++ gcc/config/rs6000/rs6000.md (revision 244556) +@@ -13092,19 +13092,23 @@ + + + (define_expand "stack_protect_set" +- [(match_operand 0 "memory_operand" "") +- (match_operand 1 "memory_operand" "")] ++ [(match_operand 0 "memory_operand") ++ (match_operand 1 "memory_operand")] + "" + { +-#ifdef TARGET_THREAD_SSP_OFFSET +- rtx tlsreg = gen_rtx_REG (Pmode, TARGET_64BIT ? 13 : 2); +- rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET)); +- operands[1] = gen_rtx_MEM (Pmode, addr); +-#endif ++ if (rs6000_stack_protector_guard == SSP_TLS) ++ { ++ rtx reg = gen_rtx_REG (Pmode, rs6000_stack_protector_guard_reg); ++ rtx offset = GEN_INT (rs6000_stack_protector_guard_offset); ++ rtx addr = gen_rtx_PLUS (Pmode, reg, offset); ++ operands[1] = gen_rtx_MEM (Pmode, addr); ++ } ++ + if (TARGET_64BIT) + emit_insn (gen_stack_protect_setdi (operands[0], operands[1])); + else + emit_insn (gen_stack_protect_setsi (operands[0], operands[1])); ++ + DONE; + }) + +@@ -13127,21 +13131,26 @@ + (set_attr "length" "12")]) + + (define_expand "stack_protect_test" +- [(match_operand 0 "memory_operand" "") +- (match_operand 1 "memory_operand" "") +- (match_operand 2 "" "")] ++ [(match_operand 0 "memory_operand") ++ (match_operand 1 "memory_operand") ++ (match_operand 2 "")] + "" + { +- rtx test, op0, op1; +-#ifdef TARGET_THREAD_SSP_OFFSET +- rtx tlsreg = gen_rtx_REG (Pmode, TARGET_64BIT ? 13 : 2); +- rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET)); +- operands[1] = gen_rtx_MEM (Pmode, addr); +-#endif +- op0 = operands[0]; +- op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[1]), UNSPEC_SP_TEST); +- test = gen_rtx_EQ (VOIDmode, op0, op1); +- emit_jump_insn (gen_cbranchsi4 (test, op0, op1, operands[2])); ++ rtx guard = operands[1]; ++ ++ if (rs6000_stack_protector_guard == SSP_TLS) ++ { ++ rtx reg = gen_rtx_REG (Pmode, rs6000_stack_protector_guard_reg); ++ rtx offset = GEN_INT (rs6000_stack_protector_guard_offset); ++ rtx addr = gen_rtx_PLUS (Pmode, reg, offset); ++ guard = gen_rtx_MEM (Pmode, addr); ++ } ++ ++ operands[1] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, guard), UNSPEC_SP_TEST); ++ rtx test = gen_rtx_EQ (VOIDmode, operands[0], operands[1]); ++ rtx jump = gen_cbranchsi4 (test, operands[0], operands[1], operands[2]); ++ emit_jump_insn (jump); ++ + DONE; + }) + +--- gcc/config/rs6000/rs6000-opts.h (revision 244555) ++++ gcc/config/rs6000/rs6000-opts.h (revision 244556) +@@ -154,6 +154,12 @@ enum rs6000_vector { + VECTOR_OTHER /* Some other vector unit */ + }; + ++/* Where to get the canary for the stack protector. */ ++enum stack_protector_guard { ++ SSP_TLS, /* per-thread canary in TLS block */ ++ SSP_GLOBAL /* global canary */ ++}; ++ + /* No enumeration is defined to index the -mcpu= values (entries in + processor_target_table), with the type int being used instead, but + we need to distinguish the special "native" value. */ +--- gcc/doc/invoke.texi (revision 244555) ++++ gcc/doc/invoke.texi (revision 244556) +@@ -862,7 +862,9 @@ See RS/6000 and PowerPC Options. + -mcrypto -mno-crypto -mdirect-move -mno-direct-move @gol + -mquad-memory -mno-quad-memory @gol + -mquad-memory-atomic -mno-quad-memory-atomic @gol +--mcompat-align-parm -mno-compat-align-parm} ++-mcompat-align-parm -mno-compat-align-parm @gol ++-mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol ++-mstack-protector-guard-offset=@var{offset}} + + @emph{RX Options} + @gccoptlist{-m64bit-doubles -m32bit-doubles -fpu -nofpu@gol +@@ -18295,6 +18297,23 @@ GCC. + + In this version of the compiler, the @option{-mcompat-align-parm} + is the default, except when using the Linux ELFv2 ABI. ++ ++@item -mstack-protector-guard=@var{guard} ++@itemx -mstack-protector-guard-reg=@var{reg} ++@itemx -mstack-protector-guard-offset=@var{offset} ++@opindex mstack-protector-guard ++@opindex mstack-protector-guard-reg ++@opindex mstack-protector-guard-offset ++Generate stack protection code using canary at @var{guard}. Supported ++locations are @samp{global} for global canary or @samp{tls} for per-thread ++canary in the TLS block (the default with GNU libc version 2.4 or later). ++ ++With the latter choice the options ++@option{-mstack-protector-guard-reg=@var{reg}} and ++@option{-mstack-protector-guard-offset=@var{offset}} furthermore specify ++which register to use as base register for reading the canary, and from what ++offset from that base register. The default for those is as specified in the ++relevant ABI. + @end table + + @node RX Options +--- gcc/testsuite/gcc.target/powerpc/ssp-1.c (nonexistent) ++++ gcc/testsuite/gcc.target/powerpc/ssp-1.c (revision 244562) +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-protector-all -mstack-protector-guard=global" } */ ++ ++/* { dg-final { scan-assembler "__stack_chk_guard" } } */ ++ ++void f(void) { } +--- gcc/testsuite/gcc.target/powerpc/ssp-2.c (nonexistent) ++++ gcc/testsuite/gcc.target/powerpc/ssp-2.c (revision 244562) +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-protector-all -mstack-protector-guard=tls -mstack-protector-guard-reg=r18 -mstack-protector-guard-offset=0x3038" } */ ++ ++/* { dg-final { scan-assembler {\m12344\(r?18\)} } } */ ++ ++void f(void) { } diff --git a/gcc48-pr79439.patch b/gcc48-pr79439.patch new file mode 100644 index 0000000..c593dfc --- /dev/null +++ b/gcc48-pr79439.patch @@ -0,0 +1,53 @@ +2017-03-01 Michael Meissner + + PR target/79439 + * config/rs6000/predicates.md (current_file_function_operand): Do + not allow self calls to be local if the function is replaceable. + + * gcc.target/powerpc/pr79439.c: New test. + +--- gcc/config/rs6000/predicates.md (revision 245812) ++++ gcc/config/rs6000/predicates.md (revision 245813) +@@ -1086,8 +1086,8 @@ + && ((DEFAULT_ABI != ABI_AIX + && DEFAULT_ABI != ABI_ELFv2) + || !SYMBOL_REF_EXTERNAL_P (op))) +- || (op == XEXP (DECL_RTL (current_function_decl), +- 0)))"))) ++ || (op == XEXP (DECL_RTL (current_function_decl), 0) ++ && !decl_replaceable_p (current_function_decl)))"))) + + ;; Return 1 if this operand is a valid input for a move insn. + (define_predicate "input_operand" +--- gcc/testsuite/gcc.target/powerpc/pr79439.c (nonexistent) ++++ gcc/testsuite/gcc.target/powerpc/pr79439.c (revision 245813) +@@ -0,0 +1,29 @@ ++/* { dg-do compile { target { powerpc64*-*-linux* && lp64 } } } */ ++/* { dg-options "-O2 -fpic" } */ ++ ++/* On the Linux 64-bit ABIs, we should not eliminate NOP in the 'rec' call if ++ -fpic is used because rec can be interposed at link time (since it is ++ external), and the recursive call should call the interposed function. The ++ Linux 32-bit ABIs do not require NOPs after the BL instruction. */ ++ ++int f (void); ++ ++void ++g (void) ++{ ++} ++ ++int ++rec (int a) ++{ ++ int ret = 0; ++ if (a > 10 && f ()) ++ ret += rec (a - 1); ++ g (); ++ return a + ret; ++} ++ ++/* { dg-final { scan-assembler-times {\mbl f\M} 1 } } */ ++/* { dg-final { scan-assembler-times {\mbl g\M} 2 } } */ ++/* { dg-final { scan-assembler-times {\mbl rec\M} 1 } } */ ++/* { dg-final { scan-assembler-times {\mnop\M} 4 } } */ diff --git a/gcc48-pr79969.patch b/gcc48-pr79969.patch new file mode 100644 index 0000000..53a4ed7 --- /dev/null +++ b/gcc48-pr79969.patch @@ -0,0 +1,43 @@ +2017-03-09 Jakub Jelinek + + PR c/79969 + * c-decl.c (start_enum): Adjust DECL_SOURCE_LOCATION of + TYPE_STUB_DECL. + + * gcc.dg/debug/dwarf2/enum-loc1.c: New test. + +--- gcc/c/c-decl.c.jj 2017-03-05 22:39:45.000000000 +0100 ++++ gcc/c/c-decl.c 2017-03-09 08:19:33.100042166 +0100 +@@ -8201,6 +8201,10 @@ start_enum (location_t loc, struct c_enu + enumtype = make_node (ENUMERAL_TYPE); + pushtag (loc, name, enumtype); + } ++ /* Update type location to the one of the definition, instead of e.g. ++ a forward declaration. */ ++ else if (TYPE_STUB_DECL (enumtype)) ++ DECL_SOURCE_LOCATION (TYPE_STUB_DECL (enumtype)) = loc; + + if (C_TYPE_BEING_DEFINED (enumtype)) + error_at (loc, "nested redefinition of %", name); +--- gcc/testsuite/gcc.dg/debug/dwarf2/enum-loc1.c.jj 2017-03-09 08:09:30.742037844 +0100 ++++ gcc/testsuite/gcc.dg/debug/dwarf2/enum-loc1.c 2017-03-09 08:16:45.202268438 +0100 +@@ -0,0 +1,19 @@ ++/* PR c/79969 */ ++/* { dg-do compile } */ ++/* { dg-options "-gdwarf-2 -dA -fno-merge-debug-strings" } */ ++ ++enum ENUMTAG; ++ ++enum ENUMTAG ++{ ++ B = 1, ++ C = 2 ++}; ++ ++void ++bar (void) ++{ ++ enum ENUMTAG a = C; ++} ++ ++/* { dg-final { scan-assembler "DW_TAG_enumeration_type\[^\\r\\n\]*\[\\r\\n\]+\[^\\r\\n\]*\"ENUMTAG\[^\\r\\n\]*DW_AT_name(\[^\\r\\n\]*\[\\r\\n\]+\[^\\r\\n\]*DW_AT_)*\[^\\r\\n\]*\[\\r\\n\]+\[^\\r\\n\]*\[^0-9a-fA-FxX](0x)?7\[^0-9a-fA-FxX]\[^\\r\\n\]*DW_AT_decl_line" } } */ diff --git a/gcc48-pr80129.patch b/gcc48-pr80129.patch new file mode 100644 index 0000000..34e160e --- /dev/null +++ b/gcc48-pr80129.patch @@ -0,0 +1,45 @@ +2017-05-30 Jakub Jelinek + + Backported from mainline + 2017-03-22 Jakub Jelinek + + PR c++/80129 + * gimplify.c (gimplify_modify_expr_rhs) : Clear + TREE_READONLY on result if writing it more than once. + + * g++.dg/torture/pr80129.C: New test. + +--- gcc/gimplify.c ++++ gcc/gimplify.c +@@ -4293,6 +4293,14 @@ gimplify_modify_expr_rhs (tree *expr_p, tree *from_p, tree *to_p, + if (ret != GS_ERROR) + ret = GS_OK; + ++ /* If we are going to write RESULT more than once, clear ++ TREE_READONLY flag, otherwise we might incorrectly promote ++ the variable to static const and initialize it at compile ++ time in one of the branches. */ ++ if (TREE_CODE (result) == VAR_DECL ++ && TREE_TYPE (TREE_OPERAND (cond, 1)) != void_type_node ++ && TREE_TYPE (TREE_OPERAND (cond, 2)) != void_type_node) ++ TREE_READONLY (result) = 0; + if (TREE_TYPE (TREE_OPERAND (cond, 1)) != void_type_node) + TREE_OPERAND (cond, 1) + = build2 (code, void_type_node, result, +--- /dev/null ++++ gcc/testsuite/g++.dg/torture/pr80129.C +@@ -0,0 +1,14 @@ ++// PR c++/80129 ++// { dg-do run } ++// { dg-options "-std=c++11" } ++ ++struct A { bool a; int b; }; ++ ++int ++main () ++{ ++ bool c = false; ++ const A x = c ? A {true, 1} : A {false, 0}; ++ if (x.a) ++ __builtin_abort (); ++} diff --git a/gcc48-pr80362.patch b/gcc48-pr80362.patch new file mode 100644 index 0000000..1137857 --- /dev/null +++ b/gcc48-pr80362.patch @@ -0,0 +1,38 @@ +2017-09-18 Richard Biener + + Backport from mainline + 2017-04-10 Richard Biener + + PR middle-end/80362 + * fold-const.c (fold_binary_loc): Look at unstripped ops when + looking for NEGATE_EXPR in -A / -B to A / B folding. + + * gcc.dg/torture/pr80362.c: New testcase. + +--- gcc/fold-const.c ++++ gcc/fold-const.c +@@ -11940,8 +11942,8 @@ fold_binary_loc (location_t loc, + /* Convert -A / -B to A / B when the type is signed and overflow is + undefined. */ + if ((!INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_UNDEFINED (type)) +- && TREE_CODE (arg0) == NEGATE_EXPR +- && negate_expr_p (arg1)) ++ && TREE_CODE (op0) == NEGATE_EXPR ++ && negate_expr_p (op1)) + { + if (INTEGRAL_TYPE_P (type)) + fold_overflow_warning (("assuming signed overflow does not occur " +--- /dev/null ++++ gcc/testsuite/gcc.dg/torture/pr80362.c +@@ -0,0 +1,11 @@ ++/* { dg-do run } */ ++/* { dg-additional-options "-fstrict-overflow" } */ ++ ++int main() ++{ ++ signed char var_0, var_1 = -128; ++ var_0 = (signed char)(-var_1) / 3; ++ if (var_0 > 0) ++ __builtin_abort(); ++ return 0; ++} diff --git a/gcc48-pr80692.patch b/gcc48-pr80692.patch new file mode 100644 index 0000000..2e3e352 --- /dev/null +++ b/gcc48-pr80692.patch @@ -0,0 +1,53 @@ +2017-06-27 Segher Boessenkool + + Backport from trunk + 2017-05-17 Segher Boessenkool + + PR middle-end/80692 + * real.c (do_compare): Give decimal_do_compare preference over + comparing just the signs. + + * gcc.c-torture/execute/pr80692.c: New testcase. + +--- gcc/real.c ++++ gcc/real.c +@@ -950,12 +950,12 @@ do_compare (const REAL_VALUE_TYPE *a, const REAL_VALUE_TYPE *b, + gcc_unreachable (); + } + +- if (a->sign != b->sign) +- return -a->sign - -b->sign; +- + if (a->decimal || b->decimal) + return decimal_do_compare (a, b, nan_result); + ++ if (a->sign != b->sign) ++ return -a->sign - -b->sign; ++ + if (REAL_EXP (a) > REAL_EXP (b)) + ret = 1; + else if (REAL_EXP (a) < REAL_EXP (b)) +--- /dev/null ++++ gcc/testsuite/gcc.c-torture/execute/pr80692.c +@@ -0,0 +1,11 @@ ++int main () { ++ _Decimal64 d64 = -0.DD; ++ ++ if (d64 != 0.DD) ++ __builtin_abort (); ++ ++ if (d64 != -0.DD) ++ __builtin_abort (); ++ ++ return 0; ++} +--- /dev/null ++++ gcc/testsuite/gcc.c-torture/execute/pr80692.x +@@ -0,0 +1,7 @@ ++load_lib target-supports.exp ++ ++if { ! [check_effective_target_dfp] } { ++ return 1 ++} ++ ++return 0 diff --git a/gcc48-pr81395.patch b/gcc48-pr81395.patch new file mode 100644 index 0000000..b99fb74 --- /dev/null +++ b/gcc48-pr81395.patch @@ -0,0 +1,67 @@ +2017-07-18 Jonathan Wakely + + PR libstdc++/81395 + * include/bits/fstream.tcc (basic_filebuf::xsgetn): Don't set buffer + pointers for write mode after reading. + * testsuite/27_io/basic_filebuf/sgetn/char/81395.cc: New. + +--- libstdc++-v3/include/bits/fstream.tcc (revision 254017) ++++ libstdc++-v3/include/bits/fstream.tcc (revision 254018) +@@ -699,7 +699,7 @@ + + if (__n == 0) + { +- _M_set_buffer(0); ++ // Set _M_reading. Buffer is already in initial 'read' mode. + _M_reading = true; + } + else if (__len == 0) +--- libstdc++-v3/testsuite/27_io/basic_filebuf/sgetn/char/81395.cc (nonexistent) ++++ libstdc++-v3/testsuite/27_io/basic_filebuf/sgetn/char/81395.cc (revision 254018) +@@ -0,0 +1,46 @@ ++// Copyright (C) 2017 Free Software Foundation, Inc. ++// ++// This file is part of the GNU ISO C++ Library. This library is free ++// software; you can redistribute it and/or modify it under the ++// terms of the GNU General Public License as published by the ++// Free Software Foundation; either version 3, or (at your option) ++// any later version. ++ ++// This library is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU General Public License for more details. ++ ++// You should have received a copy of the GNU General Public License along ++// with this library; see the file COPYING3. If not see ++// . ++ ++// { dg-require-fileio "" } ++ ++// PR libstdc++/81395 ++ ++#include ++#include // for std::memset ++#include // For BUFSIZ ++ ++using std::memset; ++ ++int main() ++{ ++ { ++ std::filebuf fb; ++ fb.open("test.txt", std::ios::out); ++ char data[BUFSIZ]; ++ memset(data, 'A', sizeof(data)); ++ fb.sputn(data, sizeof(data)); ++ } ++ ++ std::filebuf fb; ++ fb.open("test.txt", std::ios::in|std::ios::out); ++ char buf[BUFSIZ]; ++ memset(buf, 0, sizeof(buf)); ++ fb.sgetn(buf, sizeof(buf)); ++ // Switch from reading to writing without seeking first: ++ fb.sputn("B", 1); ++ fb.pubsync(); ++} diff --git a/gcc48-pr82274.patch b/gcc48-pr82274.patch new file mode 100644 index 0000000..5a330b9 --- /dev/null +++ b/gcc48-pr82274.patch @@ -0,0 +1,40 @@ +2017-10-13 Jakub Jelinek + + PR target/82274 + * libgcc2.c (__mulvDI3): If both operands have + the same highpart of -1 and the topmost bit of lowpart is 0, + multiplication overflows even if both lowparts are 0. + + * gcc.dg/pr82274-1.c: New test. + +--- libgcc/libgcc2.c 2017/10/13 16:50:13 253733 ++++ libgcc/libgcc2.c 2017/10/13 17:19:12 253734 +@@ -375,7 +375,8 @@ + } + else + { +- if (uu.s.high == (Wtype) -1 && vv.s.high == (Wtype) - 1) ++ if ((uu.s.high & vv.s.high) == (Wtype) -1 ++ && (uu.s.low | vv.s.low) != 0) + { + DWunion ww = {.ll = (UDWtype) (UWtype) uu.s.low + * (UDWtype) (UWtype) vv.s.low}; +--- /dev/null ++++ gcc/testsuite/gcc.dg/pr82274-1.c +@@ -0,0 +1,16 @@ ++/* PR target/82274 */ ++/* { dg-do run } */ ++/* { dg-shouldfail "trapv" } */ ++/* { dg-options "-ftrapv" } */ ++ ++int ++main () ++{ ++#ifdef __SIZEOF_INT128__ ++ volatile __int128 m = -(((__int128) 1) << (__CHAR_BIT__ * __SIZEOF_INT128__ / 2)); ++#else ++ volatile long long m = -(1LL << (__CHAR_BIT__ * __SIZEOF_LONG_LONG__ / 2)); ++#endif ++ m = m * m; ++ return 0; ++} diff --git a/gcc48-rh1180633.patch b/gcc48-rh1180633.patch new file mode 100644 index 0000000..f3898f9 --- /dev/null +++ b/gcc48-rh1180633.patch @@ -0,0 +1,338 @@ +2016-01-22 Torvald Riegel + + * beginend.cc (GTM::gtm_thread::serial_lock): Put on cacheline + boundary. + (htm_fastpath): Remove. + (gtm_thread::begin_transaction): Fix HTM fastpath. + (_ITM_commitTransaction): Adapt. + (_ITM_commitTransactionEH): Adapt. + * libitm/config/linux/rwlock.h (gtm_rwlock): Add htm_fastpath member + and accessors. + * libitm/config/posix/rwlock.h (gtm_rwlock): Likewise. + * libitm/config/posix/rwlock.cc (gtm_rwlock::gtm_rwlock): Adapt. + * libitm/libitm_i.h (htm_fastpath): Remove declaration. + * libitm/method-serial.cc (htm_mg): Adapt. + (gtm_thread::serialirr_mode): Adapt. + * libitm/query.cc (_ITM_inTransaction, _ITM_getTransactionId): Adapt. + +--- libitm/beginend.cc ++++ libitm/beginend.cc +@@ -32,7 +32,11 @@ using namespace GTM; + extern __thread gtm_thread_tls _gtm_thr_tls; + #endif + +-gtm_rwlock GTM::gtm_thread::serial_lock; ++// Put this at the start of a cacheline so that serial_lock's writers and ++// htm_fastpath fields are on the same cacheline, so that HW transactions ++// only have to pay one cacheline capacity to monitor both. ++gtm_rwlock GTM::gtm_thread::serial_lock ++ __attribute__((aligned(HW_CACHELINE_SIZE))); + gtm_thread *GTM::gtm_thread::list_of_threads = 0; + unsigned GTM::gtm_thread::number_of_threads = 0; + +@@ -54,9 +58,6 @@ static pthread_mutex_t global_tid_lock = PTHREAD_MUTEX_INITIALIZER; + static pthread_key_t thr_release_key; + static pthread_once_t thr_release_once = PTHREAD_ONCE_INIT; + +-// See gtm_thread::begin_transaction. +-uint32_t GTM::htm_fastpath = 0; +- + /* Allocate a transaction structure. */ + void * + GTM::gtm_thread::operator new (size_t s) +@@ -174,9 +175,11 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) + // lock's writer flag and thus abort if another thread is or becomes a + // serial transaction. Therefore, if the fastpath is enabled, then a + // transaction is not executing as a HW transaction iff the serial lock is +- // write-locked. This allows us to use htm_fastpath and the serial lock's +- // writer flag to reliable determine whether the current thread runs a HW +- // transaction, and thus we do not need to maintain this information in ++ // write-locked. Also, HW transactions monitor the fastpath control ++ // variable, so that they will only execute if dispatch_htm is still the ++ // current method group. This allows us to use htm_fastpath and the serial ++ // lock's writers flag to reliable determine whether the current thread runs ++ // a HW transaction, and thus we do not need to maintain this information in + // per-thread state. + // If an uninstrumented code path is not available, we can still run + // instrumented code from a HW transaction because the HTM fastpath kicks +@@ -187,9 +190,14 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) + // indeed in serial mode, and HW transactions should never need serial mode + // for any internal changes (e.g., they never abort visibly to the STM code + // and thus do not trigger the standard retry handling). +- if (likely(htm_fastpath && (prop & pr_hasNoAbort))) ++ if (likely(serial_lock.get_htm_fastpath() && (prop & pr_hasNoAbort))) + { +- for (uint32_t t = htm_fastpath; t; t--) ++ // Note that the snapshot of htm_fastpath that we take here could be ++ // outdated, and a different method group than dispatch_htm may have ++ // been chosen in the meantime. Therefore, take care not not touch ++ // anything besides the serial lock, which is independent of method ++ // groups. ++ for (uint32_t t = serial_lock.get_htm_fastpath(); t; t--) + { + uint32_t ret = htm_begin(); + if (htm_begin_success(ret)) +@@ -197,9 +205,11 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) + // We are executing a transaction now. + // Monitor the writer flag in the serial-mode lock, and abort + // if there is an active or waiting serial-mode transaction. ++ // Also checks that htm_fastpath is still nonzero and thus ++ // HW transactions are allowed to run. + // Note that this can also happen due to an enclosing + // serial-mode transaction; we handle this case below. +- if (unlikely(serial_lock.is_write_locked())) ++ if (unlikely(serial_lock.htm_fastpath_disabled())) + htm_abort(); + else + // We do not need to set a_saveLiveVariables because of HTM. +@@ -210,9 +220,12 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) + // retrying the transaction will be successful. + if (!htm_abort_should_retry(ret)) + break; ++ // Check whether the HTM fastpath has been disabled. ++ if (!serial_lock.get_htm_fastpath()) ++ break; + // Wait until any concurrent serial-mode transactions have finished. + // This is an empty critical section, but won't be elided. +- if (serial_lock.is_write_locked()) ++ if (serial_lock.htm_fastpath_disabled()) + { + tx = gtm_thr(); + if (unlikely(tx == NULL)) +@@ -618,7 +631,7 @@ _ITM_commitTransaction(void) + // a serial-mode transaction. If we are, then there will be no other + // concurrent serial-mode transaction. + // See gtm_thread::begin_transaction. +- if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked())) ++ if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled())) + { + htm_commit(); + return; +@@ -634,7 +647,7 @@ _ITM_commitTransactionEH(void *exc_ptr) + { + #if defined(USE_HTM_FASTPATH) + // See _ITM_commitTransaction. +- if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked())) ++ if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled())) + { + htm_commit(); + return; +--- libitm/config/linux/rwlock.h ++++ libitm/config/linux/rwlock.h +@@ -39,16 +39,29 @@ struct gtm_thread; + // + // In this implementation, writers are given highest priority access but + // read-to-write upgrades do not have a higher priority than writers. ++// ++// Do not change the layout of this class; it must remain a POD type with ++// standard layout, and the writers field must be first (i.e., so the ++// assembler code can assume that its address is equal to the address of the ++// respective instance of the class), and htm_fastpath must be second. + + class gtm_rwlock + { +- // TODO Put futexes on different cachelines? + std::atomic writers; // Writers' futex. ++ // We put the HTM fastpath control variable here so that HTM fastpath ++ // transactions can check efficiently whether they are allowed to run. ++ // This must be accessed atomically because threads can load this value ++ // when they are neither a registered reader nor writer (i.e., when they ++ // attempt to execute the HTM fastpath). ++ std::atomic htm_fastpath; ++ // TODO Put these futexes on different cachelines? (writers and htm_fastpath ++ // should remain on the same cacheline. + std::atomic writer_readers;// A confirmed writer waits here for readers. + std::atomic readers; // Readers wait here for writers (iff true). + + public: +- gtm_rwlock() : writers(0), writer_readers(0), readers(0) {}; ++ gtm_rwlock() : writers(0), htm_fastpath(0), writer_readers(0), readers(0) ++ { } + + void read_lock (gtm_thread *tx); + void read_unlock (gtm_thread *tx); +@@ -59,12 +72,28 @@ class gtm_rwlock + bool write_upgrade (gtm_thread *tx); + void write_upgrade_finish (gtm_thread *tx); + +- // Returns true iff there is a concurrent active or waiting writer. +- // This is primarily useful for simple HyTM approaches, and the value being +- // checked is loaded with memory_order_relaxed. +- bool is_write_locked() ++ // Returns true iff there is a concurrent active or waiting writer, or ++ // htm_fastpath is zero. This is primarily useful for simple HyTM ++ // approaches, and the values being checked are loaded with ++ // memory_order_relaxed. ++ bool htm_fastpath_disabled () ++ { ++ return writers.load (memory_order_relaxed) != 0 ++ || htm_fastpath.load (memory_order_relaxed) == 0; ++ } ++ ++ // This does not need to return an exact value, hence relaxed MO is ++ // sufficient. ++ uint32_t get_htm_fastpath () ++ { ++ return htm_fastpath.load (memory_order_relaxed); ++ } ++ // This must only be called while having acquired the write lock, and other ++ // threads do not need to load an exact value; hence relaxed MO is ++ // sufficient. ++ void set_htm_fastpath (uint32_t val) + { +- return writers.load (memory_order_relaxed) != 0; ++ htm_fastpath.store (val, memory_order_relaxed); + } + + protected: +--- libitm/config/posix/rwlock.h ++++ libitm/config/posix/rwlock.h +@@ -44,19 +44,32 @@ struct gtm_thread; + // + // In this implementation, writers are given highest priority access but + // read-to-write upgrades do not have a higher priority than writers. ++// ++// Do not change the layout of this class; it must remain a POD type with ++// standard layout, and the summary field must be first (i.e., so the ++// assembler code can assume that its address is equal to the address of the ++// respective instance of the class), and htm_fastpath must be second. + + class gtm_rwlock + { +- pthread_mutex_t mutex; // Held if manipulating any field. +- pthread_cond_t c_readers; // Readers wait here +- pthread_cond_t c_writers; // Writers wait here for writers +- pthread_cond_t c_confirmed_writers; // Writers wait here for readers +- + static const unsigned a_writer = 1; // An active writer. + static const unsigned w_writer = 2; // The w_writers field != 0 + static const unsigned w_reader = 4; // The w_readers field != 0 + + std::atomic summary; // Bitmask of the above. ++ ++ // We put the HTM fastpath control variable here so that HTM fastpath ++ // transactions can check efficiently whether they are allowed to run. ++ // This must be accessed atomically because threads can load this value ++ // when they are neither a registered reader nor writer (i.e., when they ++ // attempt to execute the HTM fastpath). ++ std::atomic htm_fastpath; ++ ++ pthread_mutex_t mutex; // Held if manipulating any field. ++ pthread_cond_t c_readers; // Readers wait here ++ pthread_cond_t c_writers; // Writers wait here for writers ++ pthread_cond_t c_confirmed_writers; // Writers wait here for readers ++ + unsigned int a_readers; // Nr active readers as observed by a writer + unsigned int w_readers; // Nr waiting readers + unsigned int w_writers; // Nr waiting writers +@@ -74,12 +87,28 @@ class gtm_rwlock + bool write_upgrade (gtm_thread *tx); + void write_upgrade_finish (gtm_thread *tx); + +- // Returns true iff there is a concurrent active or waiting writer. +- // This is primarily useful for simple HyTM approaches, and the value being +- // checked is loaded with memory_order_relaxed. +- bool is_write_locked() ++ // Returns true iff there is a concurrent active or waiting writer, or ++ // htm_fastpath is zero. This is primarily useful for simple HyTM ++ // approaches, and the values being checked are loaded with ++ // memory_order_relaxed. ++ bool htm_fastpath_disabled () ++ { ++ return (summary.load (memory_order_relaxed) & (a_writer | w_writer)) ++ || htm_fastpath.load (memory_order_relaxed) == 0; ++ } ++ ++ // This does not need to return an exact value, hence relaxed MO is ++ // sufficient. ++ uint32_t get_htm_fastpath () ++ { ++ return htm_fastpath.load (memory_order_relaxed); ++ } ++ // This must only be called while having acquired the write lock, and other ++ // threads do not need to load an exact value; hence relaxed MO is ++ // sufficient. ++ void set_htm_fastpath (uint32_t val) + { +- return summary.load (memory_order_relaxed) & (a_writer | w_writer); ++ htm_fastpath.store (val, memory_order_relaxed); + } + + protected: +--- libitm/config/posix/rwlock.cc ++++ libitm/config/posix/rwlock.cc +@@ -30,11 +30,12 @@ namespace GTM HIDDEN { + // ??? Move this back to the header file when constexpr is implemented. + + gtm_rwlock::gtm_rwlock() +- : mutex (PTHREAD_MUTEX_INITIALIZER), ++ : summary (0), ++ htm_fastpath (0), ++ mutex (PTHREAD_MUTEX_INITIALIZER), + c_readers (PTHREAD_COND_INITIALIZER), + c_writers (PTHREAD_COND_INITIALIZER), + c_confirmed_writers (PTHREAD_COND_INITIALIZER), +- summary (0), + a_readers (0), + w_readers (0), + w_writers (0) +--- libitm/libitm_i.h ++++ libitm/libitm_i.h +@@ -336,10 +336,6 @@ extern abi_dispatch *dispatch_htm(); + + extern gtm_cacheline_mask gtm_mask_stack(gtm_cacheline *, gtm_cacheline_mask); + +-// Control variable for the HTM fastpath that uses serial mode as fallback. +-// Non-zero if the HTM fastpath is enabled. See gtm_thread::begin_transaction. +-extern uint32_t htm_fastpath; +- + } // namespace GTM + + #endif // LIBITM_I_H +--- libitm/method-serial.cc ++++ libitm/method-serial.cc +@@ -222,13 +222,13 @@ struct htm_mg : public method_group + // Enable the HTM fastpath if the HW is available. The fastpath is + // initially disabled. + #ifdef USE_HTM_FASTPATH +- htm_fastpath = htm_init(); ++ gtm_thread::serial_lock.set_htm_fastpath(htm_init()); + #endif + } + virtual void fini() + { + // Disable the HTM fastpath. +- htm_fastpath = 0; ++ gtm_thread::serial_lock.set_htm_fastpath(0); + } + }; + +@@ -288,7 +288,7 @@ GTM::gtm_thread::serialirr_mode () + #if defined(USE_HTM_FASTPATH) + // HTM fastpath. If we are executing a HW transaction, don't go serial but + // continue. See gtm_thread::begin_transaction. +- if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked())) ++ if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled())) + return; + #endif + +--- libitm/query.cc ++++ libitm/query.cc +@@ -49,7 +49,7 @@ _ITM_inTransaction (void) + // a transaction and thus we can't deduce this by looking at just the serial + // lock. This function isn't used in practice currently, so the easiest + // way to handle it is to just abort. +- if (htm_fastpath && htm_transaction_active()) ++ if (gtm_thread::serial_lock.get_htm_fastpath() && htm_transaction_active()) + htm_abort(); + #endif + struct gtm_thread *tx = gtm_thr(); +@@ -69,7 +69,7 @@ _ITM_getTransactionId (void) + { + #if defined(USE_HTM_FASTPATH) + // See ITM_inTransaction. +- if (htm_fastpath && htm_transaction_active()) ++ if (gtm_thread::serial_lock.get_htm_fastpath() && htm_transaction_active()) + htm_abort(); + #endif + struct gtm_thread *tx = gtm_thr(); diff --git a/gcc48-rh1243366.patch b/gcc48-rh1243366.patch new file mode 100644 index 0000000..da137f3 --- /dev/null +++ b/gcc48-rh1243366.patch @@ -0,0 +1,35 @@ +2015-07-15 Jonathan Wakely + + PR libstdc++/57394 + * include/std/streambuf (basic_streambuf(const basic_streambuf&)): + Fix initializer for _M_out_end. + (operator=(const basic_streambuf&)): Replace stub with actual + implementation. + +--- libstdc++-v3/include/std/streambuf ++++ libstdc++-v3/include/std/streambuf +@@ -802,12 +802,22 @@ + basic_streambuf(const basic_streambuf& __sb) + : _M_in_beg(__sb._M_in_beg), _M_in_cur(__sb._M_in_cur), + _M_in_end(__sb._M_in_end), _M_out_beg(__sb._M_out_beg), +- _M_out_cur(__sb._M_out_cur), _M_out_end(__sb._M_out_cur), ++ _M_out_cur(__sb._M_out_cur), _M_out_end(__sb._M_out_end), + _M_buf_locale(__sb._M_buf_locale) + { } + + basic_streambuf& +- operator=(const basic_streambuf&) { return *this; }; ++ operator=(const basic_streambuf& __sb) ++ { ++ _M_in_beg = __sb._M_in_beg; ++ _M_in_cur = __sb._M_in_cur; ++ _M_in_end = __sb._M_in_end; ++ _M_out_beg = __sb._M_out_beg; ++ _M_out_cur = __sb._M_out_cur; ++ _M_out_end = __sb._M_out_end; ++ _M_buf_locale = __sb._M_buf_locale; ++ return *this; ++ }; + }; + + // Explicit specialization declarations, defined in src/streambuf.cc. diff --git a/gcc48-rh1278872.patch b/gcc48-rh1278872.patch new file mode 100644 index 0000000..ce82c3d --- /dev/null +++ b/gcc48-rh1278872.patch @@ -0,0 +1,78 @@ +2013-07-25 Sterling Augustine + + * dwarf2out.c (size_of_pubnames): Move code to... + (include_pubname_in_output): ...here. New. + (output_pubnames): Call include_pubname_in_output. Move assertion. + +--- gcc/dwarf2out.c (revision 201254) ++++ gcc/dwarf2out.c (revision 201255) +@@ -7806,6 +7806,30 @@ unmark_all_dies (dw_die_ref die) + unmark_all_dies (AT_ref (a)); + } + ++/* Calculate if the entry should appear in the final output file. It may be ++ from a pruned a type. */ ++ ++static bool ++include_pubname_in_output (vec *table, pubname_entry *p) ++{ ++ if (table == pubname_table) ++ { ++ /* Enumerator names are part of the pubname table, but the parent ++ DW_TAG_enumeration_type die may have been pruned. Don't output ++ them if that is the case. */ ++ if (p->die->die_tag == DW_TAG_enumerator && !p->die->die_mark) ++ return false; ++ ++ /* Everything else in the pubname table is included. */ ++ return true; ++ } ++ ++ /* The pubtypes table shouldn't include types that have been ++ pruned. */ ++ return (p->die->die_offset != 0 ++ || !flag_eliminate_unused_debug_types); ++} ++ + /* Return the size of the .debug_pubnames or .debug_pubtypes table + generated for the compilation unit. */ + +@@ -7818,9 +7842,7 @@ size_of_pubnames (vecdie->die_offset != 0 +- || !flag_eliminate_unused_debug_types) ++ if (include_pubname_in_output (names, p)) + size += strlen (p->name) + DWARF_OFFSET_SIZE + 1; + + size += DWARF_OFFSET_SIZE; +@@ -8999,22 +9021,14 @@ output_pubnames (vecdie->die_tag == DW_TAG_enumerator && !pub->die->die_mark) +- continue; +- +- /* We shouldn't see pubnames for DIEs outside of the main CU. */ +- if (names == pubname_table) +- gcc_assert (pub->die->die_mark); +- +- if (names != pubtype_table +- || pub->die->die_offset != 0 +- || !flag_eliminate_unused_debug_types) ++ if (include_pubname_in_output (names, pub)) + { + dw_offset die_offset = pub->die->die_offset; + ++ /* We shouldn't see pubnames for DIEs outside of the main CU. */ ++ if (names == pubname_table) ++ gcc_assert (pub->die->die_mark); ++ + /* If we're putting types in their own .debug_types sections, + the .debug_pubtypes table will still point to the compile + unit (not the type unit), so we want to use the offset of diff --git a/gcc48-rh1296211.patch b/gcc48-rh1296211.patch new file mode 100644 index 0000000..f1b084a --- /dev/null +++ b/gcc48-rh1296211.patch @@ -0,0 +1,14 @@ +2015-09-02 Alan Modra + + * config/rs6000/sysv4.h (LINK_SPEC): Delete link_target. + +--- gcc/config/rs6000/sysv4.h (revision 227396) ++++ gcc/config/rs6000/sysv4.h (revision 227397) +@@ -574,7 +574,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEF + %{R*} \ + %(link_shlib) \ + %{!T*: %(link_start) } \ +-%(link_target) \ + %(link_os)" + + /* Shared libraries are not default. */ diff --git a/gcc48-rh1304449.patch b/gcc48-rh1304449.patch new file mode 100644 index 0000000..213ff0c --- /dev/null +++ b/gcc48-rh1304449.patch @@ -0,0 +1,496 @@ +2015-12-24 Kirill Yukhin + + * common/config/i386/i386-common.c (OPTION_MASK_ISA_PKU_SET): New. + (OPTION_MASK_ISA_PKU_UNSET): Ditto. + (ix86_handle_option): Handle OPT_mpku. + * config.gcc: Add pkuintrin.h to i[34567]86-*-* and x86_64-*-* + targets. + * config/i386/cpuid.h (host_detect_local_cpu): Detect PKU feature. + * config/i386/i386-c.c (ix86_target_macros_internal): Handle PKU ISA + flag. + * config/i386/i386.c (ix86_target_string): Add "-mpku" to + ix86_target_opts. + (ix86_option_override_internal): Define PTA_PKU, mention new key + in skylake-avx512. Handle new ISA bits. + (ix86_valid_target_attribute_inner_p): Add "pku". + (enum ix86_builtins): Add IX86_BUILTIN_RDPKRU and IX86_BUILTIN_WRPKRU. + (builtin_description bdesc_special_args[]): Add new built-ins. + * config/i386/i386.h (define TARGET_PKU): New. + (define TARGET_PKU_P): Ditto. + * config/i386/i386.md (define_c_enum "unspecv"): Add UNSPEC_PKU. + (define_expand "rdpkru"): New. + (define_insn "*rdpkru"): Ditto. + (define_expand "wrpkru"): Ditto. + (define_insn "*wrpkru"): Ditto. + * config/i386/i386.opt (mpku): Ditto. + * config/i386/pkuintrin.h: New file. + * config/i386/x86intrin.h: Include pkuintrin.h + * doc/extend.texi: Describe new built-ins. + * doc/invoke.texi: Describe new switches. + + * g++.dg/other/i386-2.C: Add -mpku. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/rdpku-1.c: New test. + * gcc.target/i386/sse-12.c: Add -mpku. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-22.c: Ditto. + * gcc.target/i386/sse-33.c: Ditto. + * gcc.target/i386/wrpku-1.c: New test. + +--- gcc/config.gcc (revision 231943) ++++ gcc/config.gcc (revision 231945) +@@ -368,7 +368,7 @@ i[34567]86-*-*) + lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h + avx2intrin.h fmaintrin.h f16cintrin.h rtmintrin.h + xtestintrin.h rdseedintrin.h prfchwintrin.h adxintrin.h +- fxsrintrin.h xsaveintrin.h xsaveoptintrin.h" ++ fxsrintrin.h xsaveintrin.h xsaveoptintrin.h pkuintrin.h" + ;; + x86_64-*-*) + cpu_type=i386 +@@ -383,7 +383,7 @@ x86_64-*-*) + lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h + avx2intrin.h fmaintrin.h f16cintrin.h rtmintrin.h + xtestintrin.h rdseedintrin.h prfchwintrin.h adxintrin.h +- fxsrintrin.h xsaveintrin.h xsaveoptintrin.h" ++ fxsrintrin.h xsaveintrin.h xsaveoptintrin.h pkuintrin.h" + need_64bit_hwint=yes + ;; + ia64-*-*) +--- gcc/common/config/i386/i386-common.c (revision 231943) ++++ gcc/common/config/i386/i386-common.c (revision 231945) +@@ -98,6 +98,7 @@ along with GCC; see the file COPYING3. + #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND + #define OPTION_MASK_ISA_F16C_SET \ + (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET) ++#define OPTION_MASK_ISA_PKU_SET OPTION_MASK_ISA_PKU + + /* Define a set of ISAs which aren't available when a given ISA is + disabled. MMX and SSE ISAs are handled separately. */ +@@ -164,6 +165,7 @@ along with GCC; see the file COPYING3. + #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE + #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND + #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C ++#define OPTION_MASK_ISA_PKU_UNSET OPTION_MASK_ISA_PKU + + /* Implement TARGET_HANDLE_OPTION. */ + +@@ -659,6 +661,19 @@ ix86_handle_option (struct gcc_options * + } + return true; + ++ case OPT_mpku: ++ if (value) ++ { ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU_SET; ++ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PKU_SET; ++ } ++ else ++ { ++ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PKU_UNSET; ++ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PKU_UNSET; ++ } ++ return true; ++ + /* Comes from final.c -- no real reason to change it. */ + #define MAX_CODE_ALIGN 16 + +--- gcc/config/i386/i386.h (revision 231943) ++++ gcc/config/i386/i386.h (revision 231945) +@@ -80,6 +80,7 @@ see the files COPYING3 and COPYING.RUNTI + #define TARGET_FXSR TARGET_ISA_FXSR + #define TARGET_XSAVE TARGET_ISA_XSAVE + #define TARGET_XSAVEOPT TARGET_ISA_XSAVEOPT ++#define TARGET_PKU TARGET_ISA_PKU + + #define TARGET_LP64 TARGET_ABI_64 + #define TARGET_X32 TARGET_ABI_X32 +--- gcc/config/i386/i386.md (revision 231943) ++++ gcc/config/i386/i386.md (revision 231945) +@@ -224,6 +224,9 @@ (define_c_enum "unspecv" [ + UNSPECV_XTEST + + UNSPECV_NLGR ++ ++ ;; For RDPKRU and WRPKRU support ++ UNSPECV_PKU + ]) + + ;; Constants to represent rounding modes in the ROUND instruction +@@ -18289,6 +18292,48 @@ (define_insn "xtest_1" + [(set_attr "type" "other") + (set_attr "length" "3")]) + ++;; RDPKRU and WRPKRU ++ ++(define_expand "rdpkru" ++ [(parallel ++ [(set (match_operand:SI 0 "register_operand") ++ (unspec_volatile:SI [(match_dup 1)] UNSPECV_PKU)) ++ (set (match_dup 2) (const_int 0))])] ++ "TARGET_PKU" ++{ ++ operands[1] = force_reg (SImode, const0_rtx); ++ operands[2] = gen_reg_rtx (SImode); ++}) ++ ++(define_insn "*rdpkru" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (unspec_volatile:SI [(match_operand:SI 2 "register_operand" "c")] ++ UNSPECV_PKU)) ++ (set (match_operand:SI 1 "register_operand" "=d") ++ (const_int 0))] ++ "TARGET_PKU" ++ "rdpkru" ++ [(set_attr "type" "other")]) ++ ++(define_expand "wrpkru" ++ [(unspec_volatile:SI ++ [(match_operand:SI 0 "register_operand") ++ (match_dup 1) (match_dup 2)] UNSPECV_PKU)] ++ "TARGET_PKU" ++{ ++ operands[1] = force_reg (SImode, const0_rtx); ++ operands[2] = force_reg (SImode, const0_rtx); ++}) ++ ++(define_insn "*wrpkru" ++ [(unspec_volatile:SI ++ [(match_operand:SI 0 "register_operand" "a") ++ (match_operand:SI 1 "register_operand" "d") ++ (match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)] ++ "TARGET_PKU" ++ "wrpkru" ++ [(set_attr "type" "other")]) ++ + (include "mmx.md") + (include "sse.md") + (include "sync.md") +--- gcc/config/i386/pkuintrin.h (revision 0) ++++ gcc/config/i386/pkuintrin.h (revision 231945) +@@ -0,0 +1,45 @@ ++/* Copyright (C) 2015 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#if !defined _X86INTRIN_H_INCLUDED ++# error "Never use directly; include instead." ++#endif ++ ++#ifndef _PKUINTRIN_H_INCLUDED ++#define _PKUINTRIN_H_INCLUDED ++ ++extern __inline unsigned int ++__attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++_rdpkru_u32(void) ++{ ++ return __builtin_ia32_rdpkru (); ++} ++ ++extern __inline void ++__attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++_wrpkru(unsigned int key) ++{ ++ return __builtin_ia32_wrpkru (key); ++} ++ ++#endif /* _PKUINTRIN_H_INCLUDED */ +--- gcc/config/i386/cpuid.h (revision 231943) ++++ gcc/config/i386/cpuid.h (revision 231945) +@@ -74,6 +74,10 @@ + #define bit_RDSEED (1 << 18) + #define bit_ADX (1 << 19) + ++/* %ecx */ ++#define bit_PKU (1 << 3) ++#define bit_OSPKE (1 << 4) ++ + /* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */ + #define bit_XSAVEOPT (1 << 0) + +--- gcc/config/i386/x86intrin.h (revision 231943) ++++ gcc/config/i386/x86intrin.h (revision 231945) +@@ -119,4 +119,8 @@ + + #include + ++#ifdef __PKU__ ++#include ++#endif ++ + #endif /* _X86INTRIN_H_INCLUDED */ +--- gcc/config/i386/i386-c.c (revision 231943) ++++ gcc/config/i386/i386-c.c (revision 231945) +@@ -348,6 +348,8 @@ ix86_target_macros_internal (HOST_WIDE_I + def_or_undef (parse_in, "__XSAVE__"); + if (isa_flag & OPTION_MASK_ISA_XSAVEOPT) + def_or_undef (parse_in, "__XSAVEOPT__"); ++ if (isa_flag & OPTION_MASK_ISA_PKU) ++ def_or_undef (parse_in, "__PKU__"); + if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE)) + def_or_undef (parse_in, "__SSE_MATH__"); + if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2)) +--- gcc/config/i386/i386.opt (revision 231943) ++++ gcc/config/i386/i386.opt (revision 231945) +@@ -626,3 +626,7 @@ Split 32-byte AVX unaligned store + mrtm + Target Report Mask(ISA_RTM) Var(ix86_isa_flags) Save + Support RTM built-in functions and code generation ++ ++mpku ++Target Report Mask(ISA_PKU) Var(ix86_isa_flags) Save ++Support PKU built-in functions and code generation +--- gcc/config/i386/driver-i386.c (revision 231943) ++++ gcc/config/i386/driver-i386.c (revision 231945) +@@ -408,6 +408,7 @@ const char *host_detect_local_cpu (int a + unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0; + unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0; + unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0; ++ unsigned int has_pku = 0; + + bool arch; + +@@ -479,6 +480,8 @@ const char *host_detect_local_cpu (int a + has_fsgsbase = ebx & bit_FSGSBASE; + has_rdseed = ebx & bit_RDSEED; + has_adx = ebx & bit_ADX; ++ ++ has_pku = ecx & bit_OSPKE; + } + + if (max_level >= 13) +@@ -855,12 +858,13 @@ const char *host_detect_local_cpu (int a + const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr"; + const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave"; + const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt"; ++ const char *pku = has_pku ? " -mpku" : " -mno-pku"; + + options = concat (options, cx16, sahf, movbe, ase, pclmul, + popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2, + tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm, + hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx, +- fxsr, xsave, xsaveopt, NULL); ++ fxsr, xsave, xsaveopt, pku, NULL); + } + + done: +--- gcc/config/i386/i386.c (revision 231943) ++++ gcc/config/i386/i386.c (revision 231945) +@@ -2632,6 +2632,7 @@ ix86_target_string (HOST_WIDE_INT isa, i + { "-mrtm", OPTION_MASK_ISA_RTM }, + { "-mxsave", OPTION_MASK_ISA_XSAVE }, + { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT }, ++ { "-mpku", OPTION_MASK_ISA_PKU }, + }; + + /* Flag options. */ +@@ -2905,6 +2906,7 @@ ix86_option_override_internal (bool main + #define PTA_FXSR (HOST_WIDE_INT_1 << 37) + #define PTA_XSAVE (HOST_WIDE_INT_1 << 38) + #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39) ++#define PTA_PKU (HOST_WIDE_INT_1 << 60) + + /* if this reaches 64, need to widen struct pta flags below */ + +@@ -3429,6 +3431,9 @@ ix86_option_override_internal (bool main + if (processor_alias_table[i].flags & PTA_XSAVEOPT + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT)) + ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT; ++ if (processor_alias_table[i].flags & PTA_PKU ++ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU)) ++ ix86_isa_flags |= OPTION_MASK_ISA_PKU; + if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) + x86_prefetch_sse = true; + +@@ -4220,6 +4225,7 @@ ix86_valid_target_attribute_inner_p (tre + IX86_ATTR_ISA ("fxsr", OPT_mfxsr), + IX86_ATTR_ISA ("xsave", OPT_mxsave), + IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt), ++ IX86_ATTR_ISA ("pku", OPT_mpku), + + /* enum options */ + IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), +@@ -27042,6 +27048,10 @@ enum ix86_builtins + IX86_BUILTIN_CPU_IS, + IX86_BUILTIN_CPU_SUPPORTS, + ++ /* PKU instructions. */ ++ IX86_BUILTIN_RDPKRU, ++ IX86_BUILTIN_WRPKRU, ++ + IX86_BUILTIN_MAX + }; + +@@ -27357,6 +27367,10 @@ static const struct builtin_description + { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID }, + { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID }, ++ ++ /* RDPKRU and WRPKRU. */ ++ { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID }, ++ { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }, + }; + + /* Builtins with variable number of arguments. */ +--- gcc/doc/extend.texi (revision 231943) ++++ gcc/doc/extend.texi (revision 231945) +@@ -10996,6 +10996,13 @@ void __builtin_ia32_xabort (status) + int __builtin_ia32_xtest () + @end smallexample + ++The following built-in functions are available when @option{-mpku} is used. ++They generate reads and writes to PKRU. ++@smallexample ++void __builtin_ia32_wrpkru (unsigned int) ++unsigned int __builtin_ia32_rdpkru () ++@end smallexample ++ + @node X86 transactional memory intrinsics + @subsection X86 transaction memory intrinsics + +--- gcc/doc/invoke.texi (revision 231943) ++++ gcc/doc/invoke.texi (revision 231945) +@@ -645,7 +645,7 @@ Objective-C and Objective-C++ Dialects}. + -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol + -mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol + -msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol +--mbmi2 -mrtm -mlwp -mthreads @gol ++-mbmi2 -mrtm -mlwp -mpku -mthreads @gol + -mno-align-stringops -minline-all-stringops @gol + -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol + -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol +@@ -14326,6 +14326,8 @@ preferred alignment to @option{-mpreferr + @itemx -mlzcnt + @itemx -mno-lzcnt + @itemx -mrtm ++@itemx -mpku ++@itemx -mno-pku + @itemx -mtbm + @itemx -mno-tbm + @opindex mmmx +@@ -14336,7 +14338,7 @@ preferred alignment to @option{-mpreferr + @opindex mno-3dnow + These switches enable or disable the use of instructions in the MMX, SSE, + SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C, +-FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT, RTM or 3DNow!@: ++FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT, RTM, PKU or 3DNow!@: + extended instruction sets. + These extensions are also available as built-in functions: see + @ref{X86 Built-in Functions}, for details of the functions enabled and +--- gcc/testsuite/gcc.target/i386/sse-12.c (revision 231943) ++++ gcc/testsuite/gcc.target/i386/sse-12.c (revision 231945) +@@ -3,7 +3,7 @@ + popcntintrin.h and mm_malloc.h are usable + with -O -std=c89 -pedantic-errors. */ + /* { dg-do compile } */ +-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */ ++/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */ + + #include + +--- gcc/testsuite/gcc.target/i386/sse-13.c (revision 231943) ++++ gcc/testsuite/gcc.target/i386/sse-13.c (revision 231945) +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */ ++/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */ + + #include + +--- gcc/testsuite/gcc.target/i386/sse-22.c (revision 231943) ++++ gcc/testsuite/gcc.target/i386/sse-22.c (revision 231945) +@@ -268,7 +268,7 @@ test_2 (_mm_clmulepi64_si128, __m128i, _ + + /* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT/FMA). */ + #ifdef DIFFERENT_PRAGMAS +-#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt,fma,rdseed,prfchw,adx,fxsr,xsaveopt") ++#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt,fma,rdseed,prfchw,adx,fxsr,xsaveopt,pku") + #endif + #include + /* xopintrin.h */ +--- gcc/testsuite/gcc.target/i386/sse-23.c (revision 231943) ++++ gcc/testsuite/gcc.target/i386/sse-23.c (revision 231945) +@@ -183,7 +183,7 @@ + /* rtmintrin.h */ + #define __builtin_ia32_xabort(M) __builtin_ia32_xabort(1) + +-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt") ++#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,pku") + #include + #include + #include +--- gcc/testsuite/gcc.target/i386/rdpku-1.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/rdpku-1.c (revision 231945) +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mpku -O2" } */ ++/* { dg-final { scan-assembler "rdpkru\n" } } */ ++ ++#include ++ ++unsigned extern ++rdpku_test (void) ++{ ++ return _rdpkru_u32 (); ++} +--- gcc/testsuite/gcc.target/i386/wrpku-1.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/wrpku-1.c (revision 231945) +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mpku -O2" } */ ++/* { dg-final { scan-assembler "wrpkru\n" } } */ ++ ++#include ++ ++void extern ++wrpku_test (unsigned int key) ++{ ++ _wrpkru (key); ++} +--- gcc/testsuite/g++.dg/other/i386-2.C (revision 231943) ++++ gcc/testsuite/g++.dg/other/i386-2.C (revision 231945) +@@ -1,9 +1,9 @@ + /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */ ++/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */ + + /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, + xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, +- popcntintrin.h, fmaintrin.h and mm_malloc.h.h are usable with ++ popcntintrin.h, fmaintrin.h, pkuintrin.h and mm_malloc.h.h are usable with + -O -pedantic-errors. */ + + #include +--- gcc/testsuite/g++.dg/other/i386-3.C (revision 231943) ++++ gcc/testsuite/g++.dg/other/i386-3.C (revision 231945) +@@ -1,9 +1,9 @@ + /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */ ++/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */ + + /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, + xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, +- popcntintrin.h, fmaintrin.h and mm_malloc.h are usable with ++ popcntintrin.h, fmaintrin.h, pkuintrin.h and mm_malloc.h are usable with + -O -fkeep-inline-functions. */ + + #include diff --git a/gcc48-rh1312436.patch b/gcc48-rh1312436.patch new file mode 100644 index 0000000..6bf9313 --- /dev/null +++ b/gcc48-rh1312436.patch @@ -0,0 +1,76 @@ +2015-12-02 Pierre-Marie de Rodat + + * dwarf2out.c (dwar2out_var_location): Enhance pattern matching to get + the SYMBOL_REF they embed. + (gen_subprogram_die): Handle such calls. + +--- gcc/dwarf2out.c (revision 231184) ++++ gcc/dwarf2out.c (revision 231185) +@@ -18051,18 +18051,23 @@ gen_subprogram_die (tree decl, dw_die_re + } + if (mode == VOIDmode || mode == BLKmode) + continue; +- if (XEXP (XEXP (arg, 0), 0) == pc_rtx) ++ /* Get dynamic information about call target only if we ++ have no static information: we cannot generate both ++ DW_AT_abstract_origin and DW_AT_GNU_call_site_target ++ attributes. */ ++ if (ca_loc->symbol_ref == NULL_RTX) + { +- gcc_assert (ca_loc->symbol_ref == NULL_RTX); +- tloc = XEXP (XEXP (arg, 0), 1); +- continue; +- } +- else if (GET_CODE (XEXP (XEXP (arg, 0), 0)) == CLOBBER +- && XEXP (XEXP (XEXP (arg, 0), 0), 0) == pc_rtx) +- { +- gcc_assert (ca_loc->symbol_ref == NULL_RTX); +- tlocc = XEXP (XEXP (arg, 0), 1); +- continue; ++ if (XEXP (XEXP (arg, 0), 0) == pc_rtx) ++ { ++ tloc = XEXP (XEXP (arg, 0), 1); ++ continue; ++ } ++ else if (GET_CODE (XEXP (XEXP (arg, 0), 0)) == CLOBBER ++ && XEXP (XEXP (XEXP (arg, 0), 0), 0) == pc_rtx) ++ { ++ tlocc = XEXP (XEXP (arg, 0), 1); ++ continue; ++ } + } + reg = NULL; + if (REG_P (XEXP (XEXP (arg, 0), 0))) +@@ -20842,15 +20847,27 @@ dwarf2out_var_location (rtx loc_note) + if (!CALL_P (prev)) + prev = XVECEXP (PATTERN (prev), 0, 0); + ca_loc->tail_call_p = SIBLING_CALL_P (prev); ++ ++ /* Look for a SYMBOL_REF in the "prev" instruction. */ + x = get_call_rtx_from (PATTERN (prev)); + if (x) + { +- x = XEXP (XEXP (x, 0), 0); +- if (GET_CODE (x) == SYMBOL_REF +- && SYMBOL_REF_DECL (x) +- && TREE_CODE (SYMBOL_REF_DECL (x)) == FUNCTION_DECL) +- ca_loc->symbol_ref = x; ++ /* Try to get the call symbol, if any. */ ++ if (MEM_P (XEXP (x, 0))) ++ x = XEXP (x, 0); ++ /* First, look for a memory access to a symbol_ref. */ ++ if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && SYMBOL_REF_DECL (XEXP (x, 0)) ++ && TREE_CODE (SYMBOL_REF_DECL (XEXP (x, 0))) == FUNCTION_DECL) ++ ca_loc->symbol_ref = XEXP (x, 0); ++ /* Otherwise, look at a compile-time known user-level function ++ declaration. */ ++ else if (MEM_P (x) ++ && MEM_EXPR (x) ++ && TREE_CODE (MEM_EXPR (x)) == FUNCTION_DECL) ++ ca_loc->symbol_ref = XEXP (DECL_RTL (MEM_EXPR (x)), 0); + } ++ + ca_loc->block = insn_scope (prev); + if (call_arg_locations) + call_arg_loc_last->next = ca_loc; diff --git a/gcc48-rh1312850.patch b/gcc48-rh1312850.patch new file mode 100644 index 0000000..841627f --- /dev/null +++ b/gcc48-rh1312850.patch @@ -0,0 +1,24 @@ +2016-05-26 Martin Sebor + Jakub Jelinek + + * asan/asan_rtl.cc (InitializeHighMemEnd): Backport part of upstream + r221457 fix and typo fix from r206158. + +--- libsanitizer/asan/asan_rtl.cc.jj 2013-03-04 12:44:18.000000000 +0100 ++++ libsanitizer/asan/asan_rtl.cc 2016-05-26 09:57:10.761973999 +0200 +@@ -308,11 +308,13 @@ static void InitializeHighMemEnd() { + # if defined(__powerpc64__) + // FIXME: + // On PowerPC64 we have two different address space layouts: 44- and 46-bit. +- // We somehow need to figure our which one we are using now and choose ++ // We somehow need to figure out which one we are using now and choose + // one of 0x00000fffffffffffUL and 0x00003fffffffffffUL. + // Note that with 'ulimit -s unlimited' the stack is moved away from the top + // of the address space, so simply checking the stack address is not enough. +- kHighMemEnd = (1ULL << 44) - 1; // 0x00000fffffffffffUL ++ // This should (does) work for both PowerPC64 Endian modes. ++ kHighMemEnd = ++ (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1; + # else + kHighMemEnd = (1ULL << 47) - 1; // 0x00007fffffffffffUL; + # endif diff --git a/gcc48-rh1344807.patch b/gcc48-rh1344807.patch new file mode 100644 index 0000000..130f558 --- /dev/null +++ b/gcc48-rh1344807.patch @@ -0,0 +1,37 @@ +2016-06-14 Jason Merrill + + * call.c (add_function_candidate): Be more careful about + ref-to-ptr conversion. + +2016-06-15 Jakub Jelinek + + * g++.dg/cpp0x/ref-qual17.C: New test. + +--- gcc/cp/call.c.jj 2014-08-06 10:45:03.260163142 +0200 ++++ gcc/cp/call.c 2016-06-15 11:15:06.663878423 +0200 +@@ -1975,7 +1975,9 @@ add_function_candidate (struct z_candida + bool rv = FUNCTION_RVALUE_QUALIFIED (TREE_TYPE (fn)); + parmtype = cp_build_reference_type (parmtype, rv); + if (TREE_CODE (arg) == CONVERT_EXPR +- && TYPE_PTR_P (TREE_TYPE (arg))) ++ && TYPE_PTR_P (TREE_TYPE (arg)) ++ && (TREE_CODE (TREE_TYPE (TREE_OPERAND (arg, 0))) ++ == REFERENCE_TYPE)) + /* Strip conversion from reference to pointer. */ + arg = TREE_OPERAND (arg, 0); + arg = build_fold_indirect_ref (arg); +--- gcc/testsuite/g++.dg/cpp0x/ref-qual17.C.jj 2016-06-15 11:12:57.692558903 +0200 ++++ gcc/testsuite/g++.dg/cpp0x/ref-qual17.C 2016-06-15 11:07:02.000000000 +0200 +@@ -0,0 +1,12 @@ ++// { dg-do compile { target c++11 } } ++ ++struct A ++{ ++ void foo () &; ++}; ++ ++void ++bar (__UINTPTR_TYPE__ a) ++{ ++ reinterpret_cast(a)->foo (); ++} diff --git a/gcc48-rh1369183.patch b/gcc48-rh1369183.patch new file mode 100644 index 0000000..7127d15 --- /dev/null +++ b/gcc48-rh1369183.patch @@ -0,0 +1,52 @@ +2017-03-07 Jakub Jelinek + + Partial backport + 2016-05-07 Fritz Reese + + PR fortran/56226 + * interface.c (gfc_compare_derived_types): Don't ICE if the + derived type or both types have no components. + + * gfortran.dg/rh1369183.f90: New test. + +--- gcc/fortran/interface.c.jj 2015-06-18 16:32:45.000000000 +0200 ++++ gcc/fortran/interface.c 2017-03-07 18:35:38.982302826 +0100 +@@ -418,6 +418,13 @@ gfc_compare_derived_types (gfc_symbol *d + && !(derived1->attr.is_bind_c && derived2->attr.is_bind_c)) + return 0; + ++ /* Protect against null components. */ ++ if (derived1->attr.zero_comp != derived2->attr.zero_comp) ++ return 0; ++ ++ if (derived1->attr.zero_comp) ++ return 1; ++ + dt1 = derived1->components; + dt2 = derived2->components; + +--- gcc/testsuite/gfortran.dg/rh1369183.f90.jj 2017-03-07 18:37:39.574775432 +0100 ++++ gcc/testsuite/gfortran.dg/rh1369183.f90 2017-03-07 18:38:38.423993194 +0100 +@@ -0,0 +1,22 @@ ++! { dg-do compile } ++ ++module mod1369183 ++ implicit none ++ contains ++ subroutine sub(test) ++ type test_t ++ sequence ++ integer(4) type ++ end type test_t ++ type(test_t),intent(inout) :: test ++ end subroutine sub ++end module mod1369183 ++subroutine rh1369183 ++ use mod1369183 ++ implicit none ++ type test_t ++ sequence ++ end type test_t ++ type(test_t) :: tst ++ call sub(tst) ! { dg-error "Type mismatch in argument" } ++end subroutine rh1369183 diff --git a/gcc48-rh1402585.patch b/gcc48-rh1402585.patch new file mode 100644 index 0000000..3aa1cae --- /dev/null +++ b/gcc48-rh1402585.patch @@ -0,0 +1,32 @@ +--- gcc/reload.c (revision 235552) ++++ gcc/reload.c (working copy) +@@ -4054,14 +4054,14 @@ find_reloads (rtx insn, int replace, int + &XEXP (recog_data.operand[i], 0), (rtx*) 0, + base_reg_class (VOIDmode, as, MEM, SCRATCH), + address_mode, +- VOIDmode, 0, 0, i, RELOAD_FOR_INPUT); ++ VOIDmode, 0, 0, i, RELOAD_OTHER); + rld[operand_reloadnum[i]].inc + = GET_MODE_SIZE (GET_MODE (recog_data.operand[i])); + + /* If this operand is an output, we will have made any + reloads for its address as RELOAD_FOR_OUTPUT_ADDRESS, but + now we are treating part of the operand as an input, so +- we must change these to RELOAD_FOR_INPUT_ADDRESS. */ ++ we must change these to RELOAD_FOR_OTHER_ADDRESS. */ + + if (modified[i] == RELOAD_WRITE) + { +@@ -4070,10 +4070,10 @@ find_reloads (rtx insn, int replace, int + if (rld[j].opnum == i) + { + if (rld[j].when_needed == RELOAD_FOR_OUTPUT_ADDRESS) +- rld[j].when_needed = RELOAD_FOR_INPUT_ADDRESS; ++ rld[j].when_needed = RELOAD_FOR_OTHER_ADDRESS; + else if (rld[j].when_needed + == RELOAD_FOR_OUTADDR_ADDRESS) +- rld[j].when_needed = RELOAD_FOR_INPADDR_ADDRESS; ++ rld[j].when_needed = RELOAD_FOR_OTHER_ADDRESS; + } + } + } diff --git a/gcc48-rh1457969.patch b/gcc48-rh1457969.patch new file mode 100644 index 0000000..84178fe --- /dev/null +++ b/gcc48-rh1457969.patch @@ -0,0 +1,16 @@ +2014-01-15 Pat Haugen + + * config/rs6000/rs6000.c (rs6000_output_function_prologue): Check if + current procedure should be profiled. + +--- gcc/config/rs6000/rs6000.c ++++ gcc/config/rs6000/rs6000.c +@@ -23198,7 +23198,7 @@ rs6000_output_function_prologue (FILE *file, + /* Output -mprofile-kernel code. This needs to be done here instead of + in output_function_profile since it must go after the ELFv2 ABI + local entry point. */ +- if (TARGET_PROFILE_KERNEL) ++ if (TARGET_PROFILE_KERNEL && crtl->profile) + { + gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2); + gcc_assert (!TARGET_32BIT); diff --git a/gcc48-rh1468546.patch b/gcc48-rh1468546.patch new file mode 100644 index 0000000..2e8e616 --- /dev/null +++ b/gcc48-rh1468546.patch @@ -0,0 +1,163 @@ +2014-05-05 Andreas Krebbel + + * target.def: Add new target hook. + * doc/tm.texi: Regenerate. + * targhooks.h (default_keep_leaf_when_profiled): Add prototype. + * targhooks.c (default_keep_leaf_when_profiled): New function. + +2015-04-23 Anton Blanchard + + * config/rs6000/rs6000.c (rs6000_output_function_prologue): No + need for -mprofile-kernel to save LR to stack. + +2016-01-21 Anton Blanchard + Bill Schmidt + + PR target/63354 + * config/rs6000/linux64.h (TARGET_KEEP_LEAF_WHEN_PROFILED): New + #define. + * config/rs6000/rs6000.c (rs6000_keep_leaf_when_profiled): New + function. + + * gcc.target/powerpc/pr63354.c: New test. + +--- gcc/doc/tm.texi ++++ gcc/doc/tm.texi +@@ -4953,6 +4953,10 @@ Define this macro if the code for function profiling should come before + the function prologue. Normally, the profiling code comes after. + @end defmac + ++@deftypefn {Target Hook} bool TARGET_KEEP_LEAF_WHEN_PROFILED (void) ++This target hook returns true if the target wants the leaf flag for the current function to stay true even if it calls mcount. This might make sense for targets using the leaf flag only to determine whether a stack frame needs to be generated or not and for which the call to mcount is generated before the function prologue. ++@end deftypefn ++ + @node Tail Calls + @subsection Permitting tail calls + @cindex tail calls +--- gcc/doc/tm.texi.in ++++ gcc/doc/tm.texi.in +@@ -3963,6 +3963,8 @@ Define this macro if the code for function profiling should come before + the function prologue. Normally, the profiling code comes after. + @end defmac + ++@hook TARGET_KEEP_LEAF_WHEN_PROFILED ++ + @node Tail Calls + @subsection Permitting tail calls + @cindex tail calls +--- gcc/final.c ++++ gcc/final.c +@@ -4241,7 +4241,9 @@ leaf_function_p (void) + { + rtx insn; + +- if (crtl->profile || profile_arc_flag) ++ /* Some back-ends (e.g. s390) want leaf functions to stay leaf ++ functions even if they call mcount. */ ++ if (crtl->profile && !targetm.keep_leaf_when_profiled ()) + return 0; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) +--- gcc/target.def ++++ gcc/target.def +@@ -2658,6 +2658,18 @@ The default version of this hook use the target macro\n\ + bool, (void), + default_profile_before_prologue) + ++/* Return true if a leaf function should stay leaf even with profiling ++ enabled. */ ++DEFHOOK ++(keep_leaf_when_profiled, ++ "This target hook returns true if the target wants the leaf flag for\ ++ the current function to stay true even if it calls mcount. This might\ ++ make sense for targets using the leaf flag only to determine whether a\ ++ stack frame needs to be generated or not and for which the call to\ ++ mcount is generated before the function prologue.", ++ bool, (void), ++ default_keep_leaf_when_profiled) ++ + /* Modify and return the identifier of a DECL's external name, + originally identified by ID, as required by the target, + (eg, append @nn to windows32 stdcall function names). +--- gcc/targhooks.c ++++ gcc/targhooks.c +@@ -1447,6 +1447,15 @@ default_get_reg_raw_mode (int regno) + return reg_raw_mode[regno]; + } + ++/* Return true if a leaf function should stay leaf even with profiling ++ enabled. */ ++ ++bool ++default_keep_leaf_when_profiled () ++{ ++ return false; ++} ++ + /* Return true if the state of option OPTION should be stored in PCH files + and checked by default_pch_valid_p. Store the option's current state + in STATE if so. */ +--- gcc/targhooks.h ++++ gcc/targhooks.h +@@ -188,6 +188,7 @@ extern section * default_function_sectio + bool startup, bool exit); + extern enum machine_mode default_dwarf_frame_reg_mode (int); + extern enum machine_mode default_get_reg_raw_mode(int); ++extern bool default_keep_leaf_when_profiled (); + + extern void *default_get_pch_validity (size_t *); + extern const char *default_pch_valid_p (const void *, size_t); +--- gcc/config/rs6000/rs6000.c ++++ gcc/config/rs6000/rs6000.c +@@ -24433,7 +24433,6 @@ rs6000_output_function_prologue (FILE *file, + gcc_assert (!TARGET_32BIT); + + asm_fprintf (file, "\tmflr %s\n", reg_names[0]); +- asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]); + + /* In the ELFv2 ABI we have no compiler stack word. It must be + the resposibility of _mcount to preserve the static chain +--- gcc/config/rs6000/linux64.h ++++ gcc/config/rs6000/linux64.h +@@ -59,6 +59,9 @@ extern int dot_symbols; + + #define TARGET_PROFILE_KERNEL profile_kernel + ++#undef TARGET_KEEP_LEAF_WHEN_PROFILED ++#define TARGET_KEEP_LEAF_WHEN_PROFILED rs6000_keep_leaf_when_profiled ++ + #define TARGET_USES_LINUX64_OPT 1 + #ifdef HAVE_LD_LARGE_TOC + #undef TARGET_CMODEL +--- gcc/config/rs6000/rs6000.c ++++ gcc/config/rs6000/rs6000.c +@@ -26237,6 +26237,14 @@ rs6000_output_function_prologue (FILE *file, + rs6000_pic_labelno++; + } + ++/* -mprofile-kernel code calls mcount before the function prolog, ++ so a profiled leaf function should stay a leaf function. */ ++static bool ++rs6000_keep_leaf_when_profiled () ++{ ++ return TARGET_PROFILE_KERNEL; ++} ++ + /* Non-zero if vmx regs are restored before the frame pop, zero if + we restore after the pop when possible. */ + #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0 +--- /dev/null ++++ gcc/testsuite/gcc.target/powerpc/pr63354.c +@@ -0,0 +1,12 @@ ++/* Verify that we don't stack a frame for leaf functions when using ++ -pg -mprofile-kernel. */ ++ ++/* { dg-do compile { target { powerpc64*-*-* } } } */ ++/* { dg-options "-O2 -pg -mprofile-kernel" } */ ++/* { dg-require-effective-target lp64 } */ ++/* { dg-final { scan-assembler-not "mtlr" } } */ ++ ++int foo(void) ++{ ++ return 1; ++} diff --git a/gcc48-rh1469384.patch b/gcc48-rh1469384.patch new file mode 100644 index 0000000..c6b9ad5 --- /dev/null +++ b/gcc48-rh1469384.patch @@ -0,0 +1,223 @@ +2017-07-25 Jonathan Wakely + + PR libstdc++/53984 + * include/bits/basic_ios.h (basic_ios::_M_setstate): Adjust comment. + * include/bits/istream.tcc (basic_istream::sentry): Handle exceptions + during construction. + * include/std/istream: Adjust comments for formatted input functions + and unformatted input functions. + * testsuite/27_io/basic_fstream/53984.cc: New. + * testsuite/27_io/basic_istream/sentry/char/53984.cc: New. + +--- libstdc++-v3/include/bits/basic_ios.h ++++ libstdc++-v3/include/bits/basic_ios.h +@@ -157,8 +157,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION + setstate(iostate __state) + { this->clear(this->rdstate() | __state); } + +- // Flip the internal state on for the proper state bits, then re +- // throws the propagated exception if bit also set in ++ // Flip the internal state on for the proper state bits, then ++ // rethrows the propagated exception if bit also set in + // exceptions(). + void + _M_setstate(iostate __state) +--- libstdc++-v3/include/bits/istream.tcc ++++ libstdc++-v3/include/bits/istream.tcc +@@ -48,28 +48,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION + { + ios_base::iostate __err = ios_base::goodbit; + if (__in.good()) +- { +- if (__in.tie()) +- __in.tie()->flush(); +- if (!__noskip && bool(__in.flags() & ios_base::skipws)) +- { +- const __int_type __eof = traits_type::eof(); +- __streambuf_type* __sb = __in.rdbuf(); +- __int_type __c = __sb->sgetc(); +- +- const __ctype_type& __ct = __check_facet(__in._M_ctype); +- while (!traits_type::eq_int_type(__c, __eof) +- && __ct.is(ctype_base::space, +- traits_type::to_char_type(__c))) +- __c = __sb->snextc(); ++ __try ++ { ++ if (__in.tie()) ++ __in.tie()->flush(); ++ if (!__noskip && bool(__in.flags() & ios_base::skipws)) ++ { ++ const __int_type __eof = traits_type::eof(); ++ __streambuf_type* __sb = __in.rdbuf(); ++ __int_type __c = __sb->sgetc(); ++ ++ const __ctype_type& __ct = __check_facet(__in._M_ctype); ++ while (!traits_type::eq_int_type(__c, __eof) ++ && __ct.is(ctype_base::space, ++ traits_type::to_char_type(__c))) ++ __c = __sb->snextc(); + +- // _GLIBCXX_RESOLVE_LIB_DEFECTS +- // 195. Should basic_istream::sentry's constructor ever +- // set eofbit? +- if (traits_type::eq_int_type(__c, __eof)) +- __err |= ios_base::eofbit; +- } +- } ++ // _GLIBCXX_RESOLVE_LIB_DEFECTS ++ // 195. Should basic_istream::sentry's constructor ever ++ // set eofbit? ++ if (traits_type::eq_int_type(__c, __eof)) ++ __err |= ios_base::eofbit; ++ } ++ } ++ __catch(__cxxabiv1::__forced_unwind&) ++ { ++ __in._M_setstate(ios_base::badbit); ++ __throw_exception_again; ++ } ++ __catch(...) ++ { __in._M_setstate(ios_base::badbit); } + + if (__in.good() && __err == ios_base::goodbit) + _M_ok = true; +--- libstdc++-v3/include/std/istream ++++ libstdc++-v3/include/std/istream +@@ -150,9 +150,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION + * whatever data is appropriate for the type of the argument. + * + * If an exception is thrown during extraction, ios_base::badbit +- * will be turned on in the stream's error state without causing an +- * ios_base::failure to be thrown. The original exception will then +- * be rethrown. ++ * will be turned on in the stream's error state (without causing an ++ * ios_base::failure to be thrown) and the original exception will ++ * be rethrown if badbit is set in the exceptions mask. + */ + + //@{ +@@ -286,9 +286,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION + * by gcount(). + * + * If an exception is thrown during extraction, ios_base::badbit +- * will be turned on in the stream's error state without causing an +- * ios_base::failure to be thrown. The original exception will then +- * be rethrown. ++ * will be turned on in the stream's error state (without causing an ++ * ios_base::failure to be thrown) and the original exception will ++ * be rethrown if badbit is set in the exceptions mask. + */ + + /** +--- /dev/null ++++ libstdc++-v3/testsuite/27_io/basic_fstream/53984.cc +@@ -0,0 +1,64 @@ ++// Copyright (C) 2017 Free Software Foundation, Inc. ++// ++// This file is part of the GNU ISO C++ Library. This library is free ++// software; you can redistribute it and/or modify it under the ++// terms of the GNU General Public License as published by the ++// Free Software Foundation; either version 3, or (at your option) ++// any later version. ++ ++// This library is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU General Public License for more details. ++ ++// You should have received a copy of the GNU General Public License along ++// with this library; see the file COPYING3. If not see ++// . ++ ++// { dg-require-fileio "" } ++ ++// PR libstdc++/53984 ++ ++#include ++#include ++ ++void ++test01() ++{ ++ std::ifstream in("."); ++ if (in) ++ { ++ char c; ++ if (in.get(c)) ++ { ++ // Reading a directory doesn't produce an error on this target ++ // so the formatted input functions below wouldn't fail anyway ++ // (see PR libstdc++/81808). ++ return; ++ } ++ int x; ++ in.clear(); ++ // Formatted input function should set badbit, but not throw: ++ in >> x; ++ VERIFY( in.bad() ); ++ ++ in.clear(); ++ in.exceptions(std::ios::badbit); ++ try ++ { ++ // Formatted input function should set badbit, and throw: ++ in >> x; ++ VERIFY( false ); ++ } ++ catch (const std::exception&) ++ { ++ VERIFY( in.bad() ); ++ } ++ } ++} ++ ++int ++main() ++{ ++ test01(); ++} +--- /dev/null ++++ libstdc++-v3/testsuite/27_io/basic_istream/sentry/char/53984.cc +@@ -0,0 +1,41 @@ ++// Copyright (C) 2017 Free Software Foundation, Inc. ++// ++// This file is part of the GNU ISO C++ Library. This library is free ++// software; you can redistribute it and/or modify it under the ++// terms of the GNU General Public License as published by the ++// Free Software Foundation; either version 3, or (at your option) ++// any later version. ++ ++// This library is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU General Public License for more details. ++ ++// You should have received a copy of the GNU General Public License along ++// with this library; see the file COPYING3. If not see ++// . ++ ++#include ++#include ++#include ++ ++struct SB : std::streambuf ++{ ++ virtual int_type underflow() { throw 1; } ++}; ++ ++void ++test01() ++{ ++ SB sb; ++ std::istream is(&sb); ++ int i; ++ is >> i; ++ VERIFY( is.bad() ); ++} ++ ++int ++main() ++{ ++ test01(); ++} diff --git a/gcc48-rh1469697-1.patch b/gcc48-rh1469697-1.patch new file mode 100644 index 0000000..99a0a47 --- /dev/null +++ b/gcc48-rh1469697-1.patch @@ -0,0 +1,60 @@ +2015-10-02 Uros Bizjak + + * system.h (ROUND_UP): New macro definition. + (ROUND_DOWN): Ditto. + * ggc-page.c (ROUND_UP): Remove local macro definition. + (PAGE_ALIGN): Implement using ROUND_UP macro. + +2013-08-24 Marc Glisse + + PR other/57324 + * hwint.h (HOST_WIDE_INT_UC, HOST_WIDE_INT_1U, HOST_WIDE_INT_M1, + HOST_WIDE_INT_M1U): New macros. + + +diff --git a/gcc/ggc-page.c b/gcc/ggc-page.c +index 5b18468439d..4fb41b1112b 100644 +--- a/gcc/ggc-page.c ++++ b/gcc/ggc-page.c +@@ -216,10 +216,6 @@ static const size_t extra_order_size_table[] = { + + #define ROUND_UP_VALUE(x, f) ((f) - 1 - ((f) - 1 + (x)) % (f)) + +-/* Compute the smallest multiple of F that is >= X. */ +- +-#define ROUND_UP(x, f) (CEIL (x, f) * (f)) +- + /* Round X to next multiple of the page size */ + + #define PAGE_ALIGN(x) (((x) + G.pagesize - 1) & ~(G.pagesize - 1)) +diff --git a/gcc/hwint.h b/gcc/hwint.h +index da62fadcc9e..64b1805345d 100644 +--- a/gcc/hwint.h ++++ b/gcc/hwint.h +@@ -76,7 +76,9 @@ extern char sizeof_long_long_must_be_8[sizeof(long long) == 8 ? 1 : -1]; + # endif + #endif + ++#define HOST_WIDE_INT_UC(X) HOST_WIDE_INT_C (X ## U) + #define HOST_WIDE_INT_1 HOST_WIDE_INT_C(1) ++#define HOST_WIDE_INT_1U HOST_WIDE_INT_UC(1) + + /* This is a magic identifier which allows GCC to figure out the type + of HOST_WIDE_INT for %wd specifier checks. You must issue this +diff --git a/gcc/system.h b/gcc/system.h +index 41cd565538a..8230d506fc3 100644 +--- a/gcc/system.h ++++ b/gcc/system.h +@@ -348,6 +348,12 @@ extern int errno; + /* Returns the least number N such that N * Y >= X. */ + #define CEIL(x,y) (((x) + (y) - 1) / (y)) + ++/* This macro rounds x up to the y boundary. */ ++#define ROUND_UP(x,y) (((x) + (y) - 1) & ~((y) - 1)) ++ ++/* This macro rounds x down to the y boundary. */ ++#define ROUND_DOWN(x,y) ((x) & ~((y) - 1)) ++ + #ifdef HAVE_SYS_WAIT_H + #include + #endif diff --git a/gcc48-rh1469697-10.patch b/gcc48-rh1469697-10.patch new file mode 100644 index 0000000..df9ae9f --- /dev/null +++ b/gcc48-rh1469697-10.patch @@ -0,0 +1,52 @@ +commit c22c3dee4bbf4a99b234307c63e4845052a15890 +Author: law +Date: Thu Sep 21 22:03:59 2017 +0000 + + * config/i386/i386.c (ix86_adjust_stack_and_probe_stack_clash): + Fix dump output if the only stack space is for pushed registers. + + * lib/target-supports.exp + (check_effective_target_frame_pointer_for_non_leaf): Add + case for x86 Solaris. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253082 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index a9072f58f50..d8a225195ae 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -9856,7 +9856,16 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) + no probes are needed. */ + if (!size) + { +- dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); ++ struct ix86_frame frame; ++ ix86_compute_frame_layout (&frame); ++ ++ /* However, the allocation of space via pushes for register ++ saves could be viewed as allocating space, but without the ++ need to probe. */ ++ if (frame.nregs || frame.nsseregs || frame_pointer_needed) ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ else ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); + return; + } + +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index f24c5c6e0ac..7c126e4122b 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -5439,6 +5439,12 @@ proc check_effective_target_frame_pointer_for_non_leaf { } { + if { [istarget aarch*-*-*] } { + return 1 + } ++ ++ # Solaris/x86 defaults to -fno-omit-frame-pointer. ++ if { [istarget i?86-*-solaris*] || [istarget x86_64-*-solaris*] } { ++ return 1 ++ } ++ + return 0 + } + diff --git a/gcc48-rh1469697-11.patch b/gcc48-rh1469697-11.patch new file mode 100644 index 0000000..16332f1 --- /dev/null +++ b/gcc48-rh1469697-11.patch @@ -0,0 +1,573 @@ +commit 27d2a2d27f3e0060ade9a1a82ce2292aad6c6931 +Author: law +Date: Mon Sep 25 23:13:55 2017 +0000 + + * config/rs6000/rs6000-protos.h (output_probe_stack_range): Update + prototype for new argument. + * config/rs6000/rs6000.c (rs6000_emit_allocate_stack_1): New function, + mostly extracted from rs6000_emit_allocate_stack. + (rs6000_emit_probe_stack_range_stack_clash): New function. + (rs6000_emit_allocate_stack): Call + rs6000_emit_probe_stack_range_stack_clash as needed. + (rs6000_emit_probe_stack_range): Add additional argument + to call to gen_probe_stack_range{si,di}. + (output_probe_stack_range): New. + (output_probe_stack_range_1): Renamed from output_probe_stack_range. + (output_probe_stack_range_stack_clash): New. + (rs6000_emit_prologue): Emit notes into dump file as requested. + * rs6000.md (allocate_stack): Handle -fstack-clash-protection. + (probe_stack_range): Operand 0 is now early-clobbered. + Add additional operand and pass it to output_probe_stack_range. + + * lib/target-supports.exp + (check_effective_target_supports_stack_clash_protection): Enable for + rs6000 and powerpc targets. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253179 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h +index d4b93d9970d..cfb23ab80cc 100644 +--- a/gcc/config/rs6000/rs6000-protos.h ++++ b/gcc/config/rs6000/rs6000-protos.h +@@ -114,7 +114,7 @@ extern void rs6000_emit_sCOND (enum machine_mode, rtx[]); + extern void rs6000_emit_cbranch (enum machine_mode, rtx[]); + extern char * output_cbranch (rtx, const char *, int, rtx); + extern char * output_e500_flip_gt_bit (rtx, rtx); +-extern const char * output_probe_stack_range (rtx, rtx); ++extern const char * output_probe_stack_range (rtx, rtx, rtx); + extern rtx rs6000_emit_set_const (rtx, enum machine_mode, rtx, int); + extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx); + extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); +diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c +index a9052c6becf..c5d9988c1d9 100644 +--- a/gcc/config/rs6000/rs6000.c ++++ b/gcc/config/rs6000/rs6000.c +@@ -22320,6 +22320,220 @@ rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed) + emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p))); + } + ++/* Allocate SIZE_INT bytes on the stack using a store with update style insn ++ and set the appropriate attributes for the generated insn. Return the ++ first insn which adjusts the stack pointer or the last insn before ++ the stack adjustment loop. ++ ++ SIZE_INT is used to create the CFI note for the allocation. ++ ++ SIZE_RTX is an rtx containing the size of the adjustment. Note that ++ since stacks grow to lower addresses its runtime value is -SIZE_INT. ++ ++ ORIG_SP contains the backchain value that must be stored at *sp. */ ++ ++static rtx ++rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp) ++{ ++ rtx insn; ++ ++ rtx size_rtx = GEN_INT (-size_int); ++ if (size_int > 32767) ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, 0); ++ /* Need a note here so that try_split doesn't get confused. */ ++ if (get_last_insn () == NULL_RTX) ++ emit_note (NOTE_INSN_DELETED); ++ insn = emit_move_insn (tmp_reg, size_rtx); ++ try_split (PATTERN (insn), insn, 0); ++ size_rtx = tmp_reg; ++ } ++ ++ if (Pmode == SImode) ++ insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx, ++ stack_pointer_rtx, ++ size_rtx, ++ orig_sp)); ++ else ++ insn = emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx, ++ stack_pointer_rtx, ++ size_rtx, ++ orig_sp)); ++ rtx par = PATTERN (insn); ++ gcc_assert (GET_CODE (par) == PARALLEL); ++ rtx set = XVECEXP (par, 0, 0); ++ gcc_assert (GET_CODE (set) == SET); ++ rtx mem = SET_DEST (set); ++ gcc_assert (MEM_P (mem)); ++ MEM_NOTRAP_P (mem) = 1; ++ set_mem_alias_set (mem, get_frame_alias_set ()); ++ ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (VOIDmode, stack_pointer_rtx, ++ gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, ++ GEN_INT (-size_int)))); ++ ++ /* Emit a blockage to ensure the allocation/probing insns are ++ not optimized, combined, removed, etc. Add REG_STACK_CHECK ++ note for similar reasons. */ ++ if (flag_stack_clash_protection) ++ { ++ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); ++ emit_insn (gen_blockage ()); ++ } ++ ++ return insn; ++} ++ ++static HOST_WIDE_INT ++get_stack_clash_protection_probe_interval (void) ++{ ++ return (HOST_WIDE_INT_1U ++ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); ++} ++ ++static HOST_WIDE_INT ++get_stack_clash_protection_guard_size (void) ++{ ++ return (HOST_WIDE_INT_1U ++ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)); ++} ++ ++/* Allocate ORIG_SIZE bytes on the stack and probe the newly ++ allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes. ++ ++ COPY_REG, if non-null, should contain a copy of the original ++ stack pointer at exit from this function. ++ ++ This is subtly different than the Ada probing in that it tries hard to ++ prevent attacks that jump the stack guard. Thus it is never allowed to ++ allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack ++ space without a suitable probe. */ ++static rtx ++rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size, ++ rtx copy_reg) ++{ ++ rtx orig_sp = copy_reg; ++ ++ HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval (); ++ ++ /* Round the size down to a multiple of PROBE_INTERVAL. */ ++ HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval); ++ ++ /* If explicitly requested, ++ or the rounded size is not the same as the original size ++ or the the rounded size is greater than a page, ++ then we will need a copy of the original stack pointer. */ ++ if (rounded_size != orig_size ++ || rounded_size > probe_interval ++ || copy_reg) ++ { ++ /* If the caller did not request a copy of the incoming stack ++ pointer, then we use r0 to hold the copy. */ ++ if (!copy_reg) ++ orig_sp = gen_rtx_REG (Pmode, 0); ++ emit_move_insn (orig_sp, stack_pointer_rtx); ++ } ++ ++ /* There's three cases here. ++ ++ One is a single probe which is the most common and most efficiently ++ implemented as it does not have to have a copy of the original ++ stack pointer if there are no residuals. ++ ++ Second is unrolled allocation/probes which we use if there's just ++ a few of them. It needs to save the original stack pointer into a ++ temporary for use as a source register in the allocation/probe. ++ ++ Last is a loop. This is the most uncommon case and least efficient. */ ++ rtx retval = NULL; ++ if (rounded_size == probe_interval) ++ { ++ retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx); ++ ++ dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size); ++ } ++ else if (rounded_size <= 8 * probe_interval) ++ { ++ /* The ABI requires using the store with update insns to allocate ++ space and store the backchain into the stack ++ ++ So we save the current stack pointer into a temporary, then ++ emit the store-with-update insns to store the saved stack pointer ++ into the right location in each new page. */ ++ for (int i = 0; i < rounded_size; i += probe_interval) ++ { ++ rtx insn = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp); ++ ++ /* Save the first stack adjustment in RETVAL. */ ++ if (i == 0) ++ retval = insn; ++ } ++ ++ dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size); ++ } ++ else ++ { ++ /* Compute the ending address. */ ++ rtx end_addr ++ = copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12); ++ rtx rs = GEN_INT (-rounded_size); ++ rtx insn; ++ if (add_operand (rs, Pmode)) ++ insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs)); ++ else ++ { ++ emit_move_insn (end_addr, GEN_INT (-rounded_size)); ++ insn = emit_insn (gen_add3_insn (end_addr, end_addr, ++ stack_pointer_rtx)); ++ /* Describe the effect of INSN to the CFI engine. */ ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (VOIDmode, end_addr, ++ gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ rs))); ++ } ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ /* Emit the loop. */ ++ if (TARGET_64BIT) ++ retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx, ++ stack_pointer_rtx, orig_sp, ++ end_addr)); ++ else ++ retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx, ++ stack_pointer_rtx, orig_sp, ++ end_addr)); ++ RTX_FRAME_RELATED_P (retval) = 1; ++ /* Describe the effect of INSN to the CFI engine. */ ++ add_reg_note (retval, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (VOIDmode, stack_pointer_rtx, end_addr)); ++ ++ /* Emit a blockage to ensure the allocation/probing insns are ++ not optimized, combined, removed, etc. Other cases handle this ++ within their call to rs6000_emit_allocate_stack_1. */ ++ emit_insn (gen_blockage ()); ++ ++ dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size); ++ } ++ ++ if (orig_size != rounded_size) ++ { ++ /* Allocate (and implicitly probe) any residual space. */ ++ HOST_WIDE_INT residual = orig_size - rounded_size; ++ ++ rtx insn = rs6000_emit_allocate_stack_1 (residual, orig_sp); ++ ++ /* If the residual was the only allocation, then we can return the ++ allocating insn. */ ++ if (!retval) ++ retval = insn; ++ } ++ ++ return retval; ++} ++ + /* Emit the correct code for allocating stack space, as insns. + If COPY_REG, make sure a copy of the old frame is left there. + The generated code may use hard register 0 as a temporary. */ +@@ -22331,7 +22545,6 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) + rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + rtx tmp_reg = gen_rtx_REG (Pmode, 0); + rtx todec = gen_int_mode (-size, Pmode); +- rtx par, set, mem; + + if (INTVAL (todec) != -size) + { +@@ -22368,6 +22581,22 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) + warning (0, "stack limit expression is not supported"); + } + ++ if (flag_stack_clash_protection) ++ { ++ if (size < get_stack_clash_protection_guard_size ()) ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ else ++ { ++ rtx insn = rs6000_emit_probe_stack_range_stack_clash (size, copy_reg); ++ ++ /* If we asked for a copy with an offset, then we still need add in ++ the offset. */ ++ if (copy_reg && copy_off) ++ emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off))); ++ return; ++ } ++ } ++ + if (copy_reg) + { + if (copy_off != 0) +@@ -22376,39 +22605,12 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) + emit_move_insn (copy_reg, stack_reg); + } + +- if (size > 32767) +- { +- /* Need a note here so that try_split doesn't get confused. */ +- if (get_last_insn () == NULL_RTX) +- emit_note (NOTE_INSN_DELETED); +- insn = emit_move_insn (tmp_reg, todec); +- try_split (PATTERN (insn), insn, 0); +- todec = tmp_reg; +- } +- +- insn = emit_insn (TARGET_32BIT +- ? gen_movsi_update_stack (stack_reg, stack_reg, +- todec, stack_reg) +- : gen_movdi_di_update_stack (stack_reg, stack_reg, +- todec, stack_reg)); + /* Since we didn't use gen_frame_mem to generate the MEM, grab + it now and set the alias set/attributes. The above gen_*_update + calls will generate a PARALLEL with the MEM set being the first + operation. */ +- par = PATTERN (insn); +- gcc_assert (GET_CODE (par) == PARALLEL); +- set = XVECEXP (par, 0, 0); +- gcc_assert (GET_CODE (set) == SET); +- mem = SET_DEST (set); +- gcc_assert (MEM_P (mem)); +- MEM_NOTRAP_P (mem) = 1; +- set_mem_alias_set (mem, get_frame_alias_set ()); +- +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, +- gen_rtx_SET (VOIDmode, stack_reg, +- gen_rtx_PLUS (Pmode, stack_reg, +- GEN_INT (-size)))); ++ insn = rs6000_emit_allocate_stack_1 (size, stack_reg); ++ return; + } + + #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) +@@ -22490,9 +22692,9 @@ rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) + until it is equal to ROUNDED_SIZE. */ + + if (TARGET_64BIT) +- emit_insn (gen_probe_stack_rangedi (r12, r12, r0)); ++ emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0)); + else +- emit_insn (gen_probe_stack_rangesi (r12, r12, r0)); ++ emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0)); + + + /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time +@@ -22504,10 +22706,10 @@ rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) + } + + /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are +- absolute addresses. */ ++ addresses, not offsets. */ + +-const char * +-output_probe_stack_range (rtx reg1, rtx reg2) ++static const char * ++output_probe_stack_range_1 (rtx reg1, rtx reg2) + { + static int labelno = 0; + char loop_lab[32], end_lab[32]; +@@ -22546,6 +22748,63 @@ output_probe_stack_range (rtx reg1, rtx reg2) + return ""; + } + ++/* Probe a range of stack addresses from REG1 to REG3 inclusive. These are ++ addresses, not offsets. ++ ++ REG2 contains the backchain that must be stored into *sp at each allocation. ++ ++ This is subtly different than the Ada probing above in that it tries hard ++ to prevent attacks that jump the stack guard. Thus, it is never allowed ++ to allocate more than PROBE_INTERVAL bytes of stack space without a ++ suitable probe. */ ++ ++static const char * ++output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3) ++{ ++ static int labelno = 0; ++ char loop_lab[32]; ++ rtx xops[3]; ++ ++ HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval (); ++ ++ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); ++ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); ++ ++ /* This allocates and probes. */ ++ xops[0] = reg1; ++ xops[1] = reg2; ++ xops[2] = GEN_INT (-probe_interval); ++ if (TARGET_64BIT) ++ output_asm_insn ("stdu %1,%2(%0)", xops); ++ else ++ output_asm_insn ("stwu %1,%2(%0)", xops); ++ ++ /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */ ++ xops[0] = reg1; ++ xops[1] = reg3; ++ if (TARGET_64BIT) ++ output_asm_insn ("cmpd 0,%0,%1", xops); ++ else ++ output_asm_insn ("cmpw 0,%0,%1", xops); ++ ++ fputs ("\tbne 0,", asm_out_file); ++ assemble_name_raw (asm_out_file, loop_lab); ++ fputc ('\n', asm_out_file); ++ ++ return ""; ++} ++ ++/* Wrapper around the output_probe_stack_range routines. */ ++const char * ++output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3) ++{ ++ if (flag_stack_clash_protection) ++ return output_probe_stack_range_stack_clash (reg1, reg2, reg3); ++ else ++ return output_probe_stack_range_1 (reg1, reg3); ++} ++ + /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced + with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2 + is not NULL. It would be nice if dwarf2out_frame_debug_expr could +@@ -23857,6 +24116,13 @@ rs6000_emit_prologue (void) + } + } + ++ /* If we are emitting stack probes, but allocate no stack, then ++ just note that in the dump file. */ ++ if (flag_stack_clash_protection ++ && dump_file ++ && !info->push_p) ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); ++ + /* Update stack and set back pointer unless this is V.4, + for which it was done previously. */ + if (!WORLD_SAVE_P (info) && info->push_p +diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md +index cd197213480..3cd70e592c1 100644 +--- a/gcc/config/rs6000/rs6000.md ++++ b/gcc/config/rs6000/rs6000.md +@@ -11822,10 +11822,20 @@ + ;; + ;; First, an insn to allocate new stack space for dynamic use (e.g., alloca). + ;; We move the back-chain and decrement the stack pointer. +- ++;; ++;; Operand1 is more naturally reg_or_short_operand. However, for a large ++;; constant alloca, using that predicate will force the generic code to put ++;; the constant size into a register before calling the expander. ++;; ++;; As a result the expander would not have the constant size information ++;; in those cases and would have to generate less efficient code. ++;; ++;; Thus we allow reg_or_cint_operand instead so that the expander can see ++;; the constant size. The value is forced into a register if necessary. ++;; + (define_expand "allocate_stack" + [(set (match_operand 0 "gpc_reg_operand" "") +- (minus (reg 1) (match_operand 1 "reg_or_short_operand" ""))) ++ (minus (reg 1) (match_operand 1 "reg_or_cint_operand" ""))) + (set (reg 1) + (minus (reg 1) (match_dup 1)))] + "" +@@ -11835,6 +11845,15 @@ + rtx neg_op0; + rtx insn, par, set, mem; + ++ /* By allowing reg_or_cint_operand as the predicate we can get ++ better code for stack-clash-protection because we do not lose ++ size information. But the rest of the code expects the operand ++ to be reg_or_short_operand. If it isn't, then force it into ++ a register. */ ++ rtx orig_op1 = operands[1]; ++ if (!reg_or_short_operand (operands[1], Pmode)) ++ operands[1] = force_reg (Pmode, operands[1]); ++ + emit_move_insn (chain, stack_bot); + + /* Check stack bounds if necessary. */ +@@ -11847,6 +11866,51 @@ + emit_insn (gen_cond_trap (LTU, available, operands[1], const0_rtx)); + } + ++ /* Allocate and probe if requested. ++ This may look similar to the loop we use for prologue allocations, ++ but it is critically different. For the former we know the loop ++ will iterate, but do not know that generally here. The former ++ uses that knowledge to rotate the loop. Combining them would be ++ possible with some performance cost. */ ++ if (flag_stack_clash_protection) ++ { ++ rtx rounded_size, last_addr, residual; ++ HOST_WIDE_INT probe_interval; ++ compute_stack_clash_protection_loop_data (&rounded_size, &last_addr, ++ &residual, &probe_interval, ++ orig_op1); ++ ++ /* We do occasionally get in here with constant sizes, we might ++ as well do a reasonable job when we obviously can. */ ++ if (rounded_size != const0_rtx) ++ { ++ rtx loop_lab, end_loop; ++ bool rotated = CONST_INT_P (rounded_size); ++ ++ emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop, ++ last_addr, rotated); ++ ++ if (Pmode == SImode) ++ emit_insn (gen_movsi_update_stack (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-probe_interval), ++ chain)); ++ else ++ emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-probe_interval), ++ chain)); ++ emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop, ++ last_addr, rotated); ++ } ++ ++ /* Now handle residuals. We just have to set operands[1] correctly ++ and let the rest of the expander run. */ ++ operands[1] = residual; ++ if (!CONST_INT_P (residual)) ++ operands[1] = force_reg (Pmode, operands[1]); ++ } ++ + if (GET_CODE (operands[1]) != CONST_INT + || INTVAL (operands[1]) < -32767 + || INTVAL (operands[1]) > 32768) +@@ -12994,12 +13058,13 @@ + (set_attr "length" "4")]) + + (define_insn "probe_stack_range" +- [(set (match_operand:P 0 "register_operand" "=r") ++ [(set (match_operand:P 0 "register_operand" "=&r") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") +- (match_operand:P 2 "register_operand" "r")] ++ (match_operand:P 2 "register_operand" "r") ++ (match_operand:P 3 "register_operand" "r")] + UNSPECV_PROBE_STACK_RANGE))] + "" +- "* return output_probe_stack_range (operands[0], operands[2]);" ++ "* return output_probe_stack_range (operands[0], operands[2], operands[3]);" + [(set_attr "type" "three")]) + + ;; Compare insns are next. Note that the RS/6000 has two types of compares, +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 7c126e4122b..aba99513ed0 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -5421,12 +5421,12 @@ proc check_effective_target_autoincdec { } { + proc check_effective_target_supports_stack_clash_protection { } { + + # Temporary until the target bits are fully ACK'd. +-# if { [istarget aarch*-*-*] +-# || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { ++# if { [istarget aarch*-*-*] } { + # return 1 + # } + + if { [istarget x86_64-*-*] || [istarget i?86-*-*] ++ || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] + || [istarget s390*-*-*] } { + return 1 + } diff --git a/gcc48-rh1469697-12.patch b/gcc48-rh1469697-12.patch new file mode 100644 index 0000000..2a7dd8f --- /dev/null +++ b/gcc48-rh1469697-12.patch @@ -0,0 +1,47 @@ +commit 15d5202e75021f2c41b8a1cb344c04b8915e9d4e +Author: law +Date: Sun Oct 8 15:44:39 2017 +0000 + + * gcc.dg/stack-check-5.c: Skip with -fstack-protector. + * gcc.dg/stack-check-6.c: Likewise. + * gcc.dg/stack-check-6a.c: Likewise. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253527 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/testsuite/gcc.dg/stack-check-5.c b/gcc/testsuite/gcc.dg/stack-check-5.c +index 3178f5d8ce5..850e023ea4e 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-5.c ++++ b/gcc/testsuite/gcc.dg/stack-check-5.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ + + + /* Otherwise the S/390 back-end might save the stack pointer in f2 () +diff --git a/gcc/testsuite/gcc.dg/stack-check-6.c b/gcc/testsuite/gcc.dg/stack-check-6.c +index ad2021c9037..ab4b0e8894c 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-6.c ++++ b/gcc/testsuite/gcc.dg/stack-check-6.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ + + + extern void foo (char *); +diff --git a/gcc/testsuite/gcc.dg/stack-check-6a.c b/gcc/testsuite/gcc.dg/stack-check-6a.c +index 6f8e7128921..468d649a4fa 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-6a.c ++++ b/gcc/testsuite/gcc.dg/stack-check-6a.c +@@ -4,6 +4,8 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=16" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ ++ + + #include "stack-check-6.c" + diff --git a/gcc48-rh1469697-13.patch b/gcc48-rh1469697-13.patch new file mode 100644 index 0000000..64c98aa --- /dev/null +++ b/gcc48-rh1469697-13.patch @@ -0,0 +1,21 @@ + * config/i386/i386.c (ix86_expand_prologue): Tighten assert + for int_registers_saved. + +diff -rup a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +--- a/gcc/config/i386/i386.c 2017-11-03 10:39:24.585633524 -0600 ++++ b/gcc/config/i386/i386.c 2017-11-03 10:41:10.654774032 -0600 +@@ -10686,8 +10686,12 @@ ix86_expand_prologue (void) + && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK + || flag_stack_clash_protection)) + { +- /* We expect the registers to be saved when probes are used. */ +- gcc_assert (int_registers_saved); ++ /* This assert wants to verify that integer registers were saved ++ prior to probing. This is necessary when probing may be implemented ++ as a function call (Windows). It is not necessary for stack clash ++ protection probing. */ ++ if (!flag_stack_clash_protection) ++ gcc_assert (int_registers_saved); + + if (flag_stack_clash_protection) + { diff --git a/gcc48-rh1469697-14.patch b/gcc48-rh1469697-14.patch new file mode 100644 index 0000000..bb060fd --- /dev/null +++ b/gcc48-rh1469697-14.patch @@ -0,0 +1,301 @@ +commit 21397732bbcef3347c0d5ff8a0ee5163e803e2fb +Author: Jeff Law +Date: Mon Oct 2 12:30:26 2017 -0600 + + Dependencies for aarch64 work + +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 07ff7031b35..91dd5b7fc02 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -181,6 +181,7 @@ unsigned aarch64_dbx_register_number (unsigned); + unsigned aarch64_trampoline_size (void); + void aarch64_asm_output_labelref (FILE *, const char *); + void aarch64_elf_asm_named_section (const char *, unsigned, tree); ++const char * aarch64_output_probe_stack_range (rtx, rtx); + void aarch64_expand_epilogue (bool); + void aarch64_expand_mov_immediate (rtx, rtx); + void aarch64_expand_prologue (void); +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index 5afc167d569..cadf193cfcf 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -969,6 +969,199 @@ aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) + return true; + } + ++static int ++aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, ++ enum machine_mode mode) ++{ ++ int i; ++ unsigned HOST_WIDE_INT val, val2, mask; ++ int one_match, zero_match; ++ int num_insns; ++ ++ val = INTVAL (imm); ++ ++ if (aarch64_move_imm (val, mode)) ++ { ++ if (generate) ++ emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); ++ return 1; ++ } ++ ++ /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff ++ (with XXXX non-zero). In that case check to see if the move can be done in ++ a smaller mode. */ ++ val2 = val & 0xffffffff; ++ if (mode == DImode ++ && aarch64_move_imm (val2, SImode) ++ && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0)) ++ { ++ if (generate) ++ emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val2))); ++ ++ /* Check if we have to emit a second instruction by checking to see ++ if any of the upper 32 bits of the original DI mode value is set. */ ++ if (val == val2) ++ return 1; ++ ++ i = (val >> 48) ? 48 : 32; ++ ++ if (generate) ++ emit_insn (gen_insv_immdi (dest, GEN_INT (i), ++ GEN_INT ((val >> i) & 0xffff))); ++ ++ return 2; ++ } ++ ++ if ((val >> 32) == 0 || mode == SImode) ++ { ++ if (generate) ++ { ++ emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val & 0xffff))); ++ if (mode == SImode) ++ emit_insn (gen_insv_immsi (dest, GEN_INT (16), ++ GEN_INT ((val >> 16) & 0xffff))); ++ else ++ emit_insn (gen_insv_immdi (dest, GEN_INT (16), ++ GEN_INT ((val >> 16) & 0xffff))); ++ } ++ return 2; ++ } ++ ++ /* Remaining cases are all for DImode. */ ++ ++ mask = 0xffff; ++ zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) + ++ ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0); ++ one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) + ++ ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0); ++ ++ if (zero_match != 2 && one_match != 2) ++ { ++ /* Try emitting a bitmask immediate with a movk replacing 16 bits. ++ For a 64-bit bitmask try whether changing 16 bits to all ones or ++ zeroes creates a valid bitmask. To check any repeated bitmask, ++ try using 16 bits from the other 32-bit half of val. */ ++ ++ for (i = 0; i < 64; i += 16, mask <<= 16) ++ { ++ val2 = val & ~mask; ++ if (val2 != val && aarch64_bitmask_imm (val2, mode)) ++ break; ++ val2 = val | mask; ++ if (val2 != val && aarch64_bitmask_imm (val2, mode)) ++ break; ++ val2 = val2 & ~mask; ++ val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask); ++ if (val2 != val && aarch64_bitmask_imm (val2, mode)) ++ break; ++ } ++ if (i != 64) ++ { ++ if (generate) ++ { ++ emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val2))); ++ emit_insn (gen_insv_immdi (dest, GEN_INT (i), ++ GEN_INT ((val >> i) & 0xffff))); ++ } ++ return 2; ++ } ++ } ++ ++ /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which ++ are emitted by the initial mov. If one_match > zero_match, skip set bits, ++ otherwise skip zero bits. */ ++ ++ num_insns = 1; ++ mask = 0xffff; ++ val2 = one_match > zero_match ? ~val : val; ++ i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32; ++ ++ if (generate) ++ emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (one_match > zero_match ++ ? (val | ~(mask << i)) ++ : (val & (mask << i))))); ++ for (i += 16; i < 64; i += 16) ++ { ++ if ((val2 & (mask << i)) == 0) ++ continue; ++ if (generate) ++ emit_insn (gen_insv_immdi (dest, GEN_INT (i), ++ GEN_INT ((val >> i) & 0xffff))); ++ num_insns ++; ++ } ++ ++ return num_insns; ++} ++ ++/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a ++ temporary value if necessary. FRAME_RELATED_P should be true if ++ the RTX_FRAME_RELATED flag should be set and CFA adjustments added ++ to the generated instructions. If SCRATCHREG is known to hold ++ abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the ++ immediate again. ++ ++ Since this function may be used to adjust the stack pointer, we must ++ ensure that it cannot cause transient stack deallocation (for example ++ by first incrementing SP and then decrementing when adjusting by a ++ large immediate). */ ++ ++static void ++aarch64_add_constant_internal (enum machine_mode mode, int regnum, ++ int scratchreg, HOST_WIDE_INT delta, ++ bool frame_related_p, bool emit_move_imm) ++{ ++ HOST_WIDE_INT mdelta = abs_hwi (delta); ++ rtx this_rtx = gen_rtx_REG (mode, regnum); ++ rtx insn; ++ ++ if (!mdelta) ++ return; ++ ++ /* Single instruction adjustment. */ ++ if (aarch64_uimm12_shift (mdelta)) ++ { ++ insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta))); ++ RTX_FRAME_RELATED_P (insn) = frame_related_p; ++ return; ++ } ++ ++ /* Emit 2 additions/subtractions if the adjustment is less than 24 bits. ++ Only do this if mdelta is not a 16-bit move as adjusting using a move ++ is better. */ ++ if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode)) ++ { ++ HOST_WIDE_INT low_off = mdelta & 0xfff; ++ ++ low_off = delta < 0 ? -low_off : low_off; ++ insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off))); ++ RTX_FRAME_RELATED_P (insn) = frame_related_p; ++ insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off))); ++ RTX_FRAME_RELATED_P (insn) = frame_related_p; ++ return; ++ } ++ ++ /* Emit a move immediate if required and an addition/subtraction. */ ++ rtx scratch_rtx = gen_rtx_REG (mode, scratchreg); ++ if (emit_move_imm) ++ aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode); ++ insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx) ++ : gen_add2_insn (this_rtx, scratch_rtx)); ++ if (frame_related_p) ++ { ++ RTX_FRAME_RELATED_P (insn) = frame_related_p; ++ rtx adj = plus_constant (mode, this_rtx, delta); ++ add_reg_note (insn , REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (VOIDmode, this_rtx, adj)); ++ } ++} ++ ++static inline void ++aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p) ++{ ++ aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta, ++ frame_related_p, true); ++} ++ + /* Implement TARGET_PASS_BY_REFERENCE. */ + + static bool +@@ -1476,6 +1669,47 @@ aarch64_libgcc_cmp_return_mode (void) + return SImode; + } + ++#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) ++ ++/* We use the 12-bit shifted immediate arithmetic instructions so values ++ must be multiple of (1 << 12), i.e. 4096. */ ++#define ARITH_FACTOR 4096 ++ ++/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are ++ absolute addresses. */ ++ ++const char * ++aarch64_output_probe_stack_range (rtx reg1, rtx reg2) ++{ ++ static int labelno = 0; ++ char loop_lab[32]; ++ rtx xops[2]; ++ ++ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); ++ ++ /* Loop. */ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); ++ ++ /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ ++ xops[0] = reg1; ++ xops[1] = GEN_INT (PROBE_INTERVAL); ++ output_asm_insn ("sub\t%0, %0, %1", xops); ++ ++ /* Probe at TEST_ADDR. */ ++ output_asm_insn ("str\txzr, [%0]", xops); ++ ++ /* Test if TEST_ADDR == LAST_ADDR. */ ++ xops[1] = reg2; ++ output_asm_insn ("cmp\t%0, %1", xops); ++ ++ /* Branch. */ ++ fputs ("\tb.ne\t", asm_out_file); ++ assemble_name_raw (asm_out_file, loop_lab); ++ fputc ('\n', asm_out_file); ++ ++ return ""; ++} ++ + static bool + aarch64_frame_pointer_required (void) + { +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 91299901bbf..17082486ac8 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -88,6 +88,7 @@ + UNSPEC_ST4 + UNSPEC_TLS + UNSPEC_TLSDESC ++ UNSPECV_PROBE_STACK_RANGE ; Represent stack range probing. + UNSPEC_VSTRUCTDUMMY + ]) + +@@ -3399,6 +3400,18 @@ + [(set_attr "length" "0")] + ) + ++(define_insn "probe_stack_range" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0") ++ (match_operand:DI 2 "register_operand" "r")] ++ UNSPECV_PROBE_STACK_RANGE))] ++ "" ++{ ++ return aarch64_output_probe_stack_range (operands[0], operands[2]); ++} ++ [(set_attr "length" "32")] ++) ++ + ;; Named pattern for expanding thread pointer reference. + (define_expand "get_thread_pointerdi" + [(match_operand:DI 0 "register_operand" "=r")] diff --git a/gcc48-rh1469697-15.patch b/gcc48-rh1469697-15.patch new file mode 100644 index 0000000..e2df531 --- /dev/null +++ b/gcc48-rh1469697-15.patch @@ -0,0 +1,147 @@ +commit 54a2f1efc188660df9da78523b6925aab4c3a668 +Author: rsandifo +Date: Tue Jan 13 14:11:15 2015 +0000 + + gcc/ + * config/aarch64/aarch64.md (subsi3, *subsi3_uxtw, subdi3) + (*sub__, *sub__si_uxtw) + (*sub__shft_) + (*sub__shft_si_uxtw, *sub__multp2) + (*sub_si_multp2_uxtw, *sub_uxt_multp2) + (*sub_uxtsi_multp2_uxtw): Add stack pointer sources. + + gcc/testsuite/ + * gcc.target/aarch64/subsp.c: New test. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@219533 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 17082486ac8..a085c6acaf5 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1610,8 +1610,8 @@ + + (define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=rk") +- (minus:SI (match_operand:SI 1 "register_operand" "r") +- (match_operand:SI 2 "register_operand" "r")))] ++ (minus:SI (match_operand:SI 1 "register_operand" "rk") ++ (match_operand:SI 2 "register_operand" "r")))] + "" + "sub\\t%w0, %w1, %w2" + [(set_attr "v8type" "alu") +@@ -1622,7 +1622,7 @@ + (define_insn "*subsi3_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI +- (minus:SI (match_operand:SI 1 "register_operand" "r") ++ (minus:SI (match_operand:SI 1 "register_operand" "rk") + (match_operand:SI 2 "register_operand" "r"))))] + "" + "sub\\t%w0, %w1, %w2" +@@ -1632,7 +1632,7 @@ + + (define_insn "subdi3" + [(set (match_operand:DI 0 "register_operand" "=rk,!w") +- (minus:DI (match_operand:DI 1 "register_operand" "r,!w") ++ (minus:DI (match_operand:DI 1 "register_operand" "rk,!w") + (match_operand:DI 2 "register_operand" "r,!w")))] + "" + "@ +@@ -1725,7 +1725,7 @@ + + (define_insn "*sub__" + [(set (match_operand:GPI 0 "register_operand" "=rk") +- (minus:GPI (match_operand:GPI 1 "register_operand" "r") ++ (minus:GPI (match_operand:GPI 1 "register_operand" "rk") + (ANY_EXTEND:GPI + (match_operand:ALLX 2 "register_operand" "r"))))] + "" +@@ -1738,7 +1738,7 @@ + (define_insn "*sub__si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI +- (minus:SI (match_operand:SI 1 "register_operand" "r") ++ (minus:SI (match_operand:SI 1 "register_operand" "rk") + (ANY_EXTEND:SI + (match_operand:SHORT 2 "register_operand" "r")))))] + "" +@@ -1749,7 +1749,7 @@ + + (define_insn "*sub__shft_" + [(set (match_operand:GPI 0 "register_operand" "=rk") +- (minus:GPI (match_operand:GPI 1 "register_operand" "r") ++ (minus:GPI (match_operand:GPI 1 "register_operand" "rk") + (ashift:GPI (ANY_EXTEND:GPI + (match_operand:ALLX 2 "register_operand" "r")) + (match_operand 3 "aarch64_imm3" "Ui3"))))] +@@ -1763,7 +1763,7 @@ + (define_insn "*sub__shft_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI +- (minus:SI (match_operand:SI 1 "register_operand" "r") ++ (minus:SI (match_operand:SI 1 "register_operand" "rk") + (ashift:SI (ANY_EXTEND:SI + (match_operand:SHORT 2 "register_operand" "r")) + (match_operand 3 "aarch64_imm3" "Ui3")))))] +@@ -1775,7 +1775,7 @@ + + (define_insn "*sub__multp2" + [(set (match_operand:GPI 0 "register_operand" "=rk") +- (minus:GPI (match_operand:GPI 4 "register_operand" "r") ++ (minus:GPI (match_operand:GPI 4 "register_operand" "rk") + (ANY_EXTRACT:GPI + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) +@@ -1791,7 +1791,7 @@ + (define_insn "*sub_si_multp2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI +- (minus:SI (match_operand:SI 4 "register_operand" "r") ++ (minus:SI (match_operand:SI 4 "register_operand" "rk") + (ANY_EXTRACT:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) +@@ -1805,7 +1805,7 @@ + + (define_insn "*sub_uxt_multp2" + [(set (match_operand:GPI 0 "register_operand" "=rk") +- (minus:GPI (match_operand:GPI 4 "register_operand" "r") ++ (minus:GPI (match_operand:GPI 4 "register_operand" "rk") + (and:GPI + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) +@@ -1823,7 +1823,7 @@ + (define_insn "*sub_uxtsi_multp2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI +- (minus:SI (match_operand:SI 4 "register_operand" "r") ++ (minus:SI (match_operand:SI 4 "register_operand" "rk") + (and:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) +diff --git a/gcc/testsuite/gcc.target/aarch64/subsp.c b/gcc/testsuite/gcc.target/aarch64/subsp.c +new file mode 100644 +index 00000000000..70d848c59d1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/subsp.c +@@ -0,0 +1,19 @@ ++/* { dg-options "-O" } */ ++ ++int foo (void *); ++ ++int ++f1 (int *x, long y) ++{ ++ return foo (__builtin_alloca (y)); ++} ++ ++int ++f2 (int *x, int y) ++{ ++ char a[y + 1][16]; ++ return foo (a); ++} ++ ++/* { dg-final { scan-assembler "sub\tsp, sp, x\[0-9\]*\n" } } */ ++/* { dg-final { scan-assembler "sub\tsp, sp, x\[0-9\]*, sxtw 4\n" } } */ diff --git a/gcc48-rh1469697-16.patch b/gcc48-rh1469697-16.patch new file mode 100644 index 0000000..423f448 --- /dev/null +++ b/gcc48-rh1469697-16.patch @@ -0,0 +1,462 @@ +commit 5d7a77ede3e91948ee125bd82533d7e692543fff +Author: Jeff Law +Date: Mon Oct 2 13:43:01 2017 -0600 + + aarch64 support + +diff --git a/gcc/config/aarch64/.aarch64.c.rej.swp b/gcc/config/aarch64/.aarch64.c.rej.swp +new file mode 100644 +index 00000000000..b899e21b855 +Binary files /dev/null and b/gcc/config/aarch64/.aarch64.c.rej.swp differ +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index cadf193cfcf..e08632ffa88 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -45,6 +45,8 @@ + #include "gimple.h" + #include "optabs.h" + #include "dwarf2.h" ++#include "params.h" ++#include "dumpfile.h" + + /* Classifies an address. + +@@ -1696,7 +1698,14 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2) + output_asm_insn ("sub\t%0, %0, %1", xops); + + /* Probe at TEST_ADDR. */ +- output_asm_insn ("str\txzr, [%0]", xops); ++ if (flag_stack_clash_protection) ++ { ++ gcc_assert (xops[0] == stack_pointer_rtx); ++ xops[1] = GEN_INT (PROBE_INTERVAL - 8); ++ output_asm_insn ("str\txzr, [%0, %1]", xops); ++ } ++ else ++ output_asm_insn ("str\txzr, [%0]", xops); + + /* Test if TEST_ADDR == LAST_ADDR. */ + xops[1] = reg2; +@@ -2001,6 +2010,123 @@ aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset, + base_rtx, cfi_ops); + } + ++/* Allocate SIZE bytes of stack space using SCRATCH_REG as a scratch ++ register. */ ++ ++static void ++aarch64_allocate_and_probe_stack_space (int scratchreg, HOST_WIDE_INT size) ++{ ++ HOST_WIDE_INT probe_interval ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ HOST_WIDE_INT guard_size ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); ++ HOST_WIDE_INT guard_used_by_caller = 1024; ++ ++ /* SIZE should be large enough to require probing here. ie, it ++ must be larger than GUARD_SIZE - GUARD_USED_BY_CALLER. ++ ++ We can allocate GUARD_SIZE - GUARD_USED_BY_CALLER as a single chunk ++ without any probing. */ ++ gcc_assert (size >= guard_size - guard_used_by_caller); ++ aarch64_sub_sp (scratchreg, guard_size - guard_used_by_caller, true); ++ HOST_WIDE_INT orig_size = size; ++ size -= (guard_size - guard_used_by_caller); ++ ++ HOST_WIDE_INT rounded_size = size & -probe_interval; ++ HOST_WIDE_INT residual = size - rounded_size; ++ ++ /* We can handle a small number of allocations/probes inline. Otherwise ++ punt to a loop. */ ++ if (rounded_size && rounded_size <= 4 * probe_interval) ++ { ++ /* We don't use aarch64_sub_sp here because we don't want to ++ repeatedly load SCRATCHREG. */ ++ rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg); ++ if (probe_interval > ARITH_FACTOR) ++ emit_move_insn (scratch_rtx, GEN_INT (-probe_interval)); ++ else ++ scratch_rtx = GEN_INT (-probe_interval); ++ ++ for (HOST_WIDE_INT i = 0; i < rounded_size; i += probe_interval) ++ { ++ rtx insn = emit_insn (gen_add2_insn (stack_pointer_rtx, scratch_rtx)); ++ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); ++ ++ if (probe_interval > ARITH_FACTOR) ++ { ++ RTX_FRAME_RELATED_P (insn) = 1; ++ rtx adj = plus_constant (Pmode, stack_pointer_rtx, -probe_interval); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (VOIDmode, stack_pointer_rtx, adj)); ++ } ++ ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ (probe_interval ++ - GET_MODE_SIZE (word_mode)))); ++ emit_insn (gen_blockage ()); ++ } ++ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size); ++ } ++ else if (rounded_size) ++ { ++ /* Compute the ending address. */ ++ rtx temp = gen_rtx_REG (word_mode, scratchreg); ++ emit_move_insn (temp, GEN_INT (-rounded_size)); ++ rtx insn = emit_insn (gen_add3_insn (temp, stack_pointer_rtx, temp)); ++ ++ /* For the initial allocation, we don't have a frame pointer ++ set up, so we always need CFI notes. If we're doing the ++ final allocation, then we may have a frame pointer, in which ++ case it is the CFA, otherwise we need CFI notes. ++ ++ We can determine which allocation we are doing by looking at ++ the temporary register. IP0 is the initial allocation, IP1 ++ is the final allocation. */ ++ if (scratchreg == IP0_REGNUM || !frame_pointer_needed) ++ { ++ /* We want the CFA independent of the stack pointer for the ++ duration of the loop. */ ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, temp, ++ (rounded_size + (orig_size - size)))); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ ++ /* This allocates and probes the stack. ++ ++ It also probes at a 4k interval regardless of the value of ++ PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL. */ ++ insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx, ++ stack_pointer_rtx, temp)); ++ ++ /* Now reset the CFA register if needed. */ ++ if (scratchreg == IP0_REGNUM || !frame_pointer_needed) ++ { ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, stack_pointer_rtx, ++ (rounded_size + (orig_size - size)))); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ ++ emit_insn (gen_blockage ()); ++ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); ++ } ++ else ++ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size); ++ ++ /* Handle any residuals. ++ Note that any residual must be probed. */ ++ if (residual) ++ { ++ aarch64_sub_sp (scratchreg, residual, true); ++ add_reg_note (get_last_insn (), REG_STACK_CHECK, const0_rtx); ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ (residual - GET_MODE_SIZE (word_mode)))); ++ emit_insn (gen_blockage ()); ++ } ++ return; ++} ++ + /* AArch64 stack frames generated by this compiler look like: + + +-------------------------------+ +@@ -2073,6 +2199,44 @@ aarch64_expand_prologue (void) + - original_frame_size + - cfun->machine->frame.saved_regs_size); + ++ /* We do not fully protect aarch64 against stack clash style attacks ++ as doing so would be prohibitively expensive with less utility over ++ time as newer compilers are deployed. ++ ++ We assume the guard is at least 64k. Furthermore, we assume that ++ the caller has not pushed the stack pointer more than 1k into ++ the guard. A caller that pushes the stack pointer than 1k into ++ the guard is considered invalid. ++ ++ Note that the caller's ability to push the stack pointer into the ++ guard is a function of the number and size of outgoing arguments and/or ++ dynamic stack allocations due to the mandatory save of the link register ++ in the caller's frame. ++ ++ With those assumptions the callee can allocate up to 63k of stack ++ space without probing. ++ ++ When probing is needed, we emit a probe at the start of the prologue ++ and every PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes thereafter. ++ ++ We have to track how much space has been allocated, but we do not ++ track stores into the stack as implicit probes except for the ++ fp/lr store. */ ++ HOST_WIDE_INT guard_size ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); ++ HOST_WIDE_INT guard_used_by_caller = 1024; ++ HOST_WIDE_INT final_adjust = crtl->outgoing_args_size; ++ HOST_WIDE_INT initial_adjust = frame_size; ++ ++ if (flag_stack_clash_protection) ++ { ++ if (initial_adjust == 0) ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); ++ else if (offset < guard_size - guard_used_by_caller ++ && final_adjust < guard_size - guard_used_by_caller) ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ } ++ + /* Store pairs and load pairs have a range only -512 to 504. */ + if (offset >= 512) + { +@@ -2089,7 +2253,10 @@ aarch64_expand_prologue (void) + frame_size -= (offset + crtl->outgoing_args_size); + fp_offset = 0; + +- if (frame_size >= 0x1000000) ++ if (flag_stack_clash_protection ++ && frame_size >= guard_size - guard_used_by_caller) ++ aarch64_allocate_and_probe_stack_space (IP0_REGNUM, frame_size); ++ else if (frame_size >= 0x1000000) + { + rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); + emit_move_insn (op0, GEN_INT (-frame_size)); +@@ -2206,10 +2373,30 @@ aarch64_expand_prologue (void) + { + if (crtl->outgoing_args_size > 0) + { +- insn = emit_insn (gen_add2_insn +- (stack_pointer_rtx, +- GEN_INT (- crtl->outgoing_args_size))); +- RTX_FRAME_RELATED_P (insn) = 1; ++ if (flag_stack_clash_protection) ++ { ++ /* First probe if the final adjustment is larger than the ++ guard size less the amount of guard reserved for use by ++ the caller's outgoing args. */ ++ if (final_adjust >= guard_size - guard_used_by_caller) ++ aarch64_allocate_and_probe_stack_space (IP1_REGNUM, ++ final_adjust); ++ else ++ aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); ++ ++ /* We must also probe if the final adjustment is larger than the ++ guard that is assumed used by the caller. This may be ++ sub-optimal. */ ++ if (final_adjust >= guard_used_by_caller) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "Stack clash aarch64 large outgoing arg, probing\n"); ++ emit_stack_probe (stack_pointer_rtx); ++ } ++ } ++ else ++ aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); + } + } + } +@@ -5088,6 +5275,12 @@ aarch64_override_options (void) + #endif + } + ++ /* We assume the guard page is 64k. */ ++ maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE, ++ 16, ++ global_options.x_param_values, ++ global_options_set.x_param_values); ++ + aarch64_override_options_after_change (); + } + +@@ -8161,6 +8354,28 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, + return ret; + } + ++/* It has been decided that to allow up to 1kb of outgoing argument ++ space to be allocated w/o probing. If more than 1kb of outgoing ++ argment space is allocated, then it must be probed and the last ++ probe must occur no more than 1kbyte away from the end of the ++ allocated space. ++ ++ This implies that the residual part of an alloca allocation may ++ need probing in cases where the generic code might not otherwise ++ think a probe is needed. ++ ++ This target hook returns TRUE when allocating RESIDUAL bytes of ++ alloca space requires an additional probe, otherwise FALSE is ++ returned. */ ++ ++static bool ++aarch64_stack_clash_protection_final_dynamic_probe (rtx residual) ++{ ++ return (residual == CONST0_RTX (Pmode) ++ || GET_CODE (residual) != CONST_INT ++ || INTVAL (residual) >= 1024); ++} ++ + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST aarch64_address_cost + +@@ -8378,6 +8593,10 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, + #undef TARGET_FIXED_CONDITION_CODE_REGS + #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs + ++#undef TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE ++#define TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE \ ++ aarch64_stack_clash_protection_final_dynamic_probe ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + #include "gt-aarch64.h" +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index a085c6acaf5..5485a5f70b1 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -3401,7 +3401,7 @@ + ) + + (define_insn "probe_stack_range" +- [(set (match_operand:DI 0 "register_operand" "=r") ++ [(set (match_operand:DI 0 "register_operand" "=rk") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "register_operand" "r")] + UNSPECV_PROBE_STACK_RANGE))] +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-12.c b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c +new file mode 100644 +index 00000000000..2ce38483b6b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++extern void arf (unsigned long int *, unsigned long int *); ++void ++frob () ++{ ++ unsigned long int num[1000]; ++ unsigned long int den[1000]; ++ arf (den, num); ++} ++ ++/* This verifies that the scheduler did not break the dependencies ++ by adjusting the offsets within the probe and that the scheduler ++ did not reorder around the stack probes. */ ++/* { dg-final { scan-assembler-times "sub\\tsp, sp, #4096\\n\\tstr\\txzr, .sp, 4088." 3 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-13.c b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c +new file mode 100644 +index 00000000000..d8886835989 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++#define ARG32(X) X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X ++#define ARG192(X) ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X) ++void out1(ARG192(__int128)); ++int t1(int); ++ ++int t3(int x) ++{ ++ if (x < 1000) ++ return t1 (x) + 1; ++ ++ out1 (ARG192(1)); ++ return 0; ++} ++ ++ ++ ++/* This test creates a large (> 1k) outgoing argument area that needs ++ to be probed. We don't test the exact size of the space or the ++ exact offset to make the test a little less sensitive to trivial ++ output changes. */ ++/* { dg-final { scan-assembler-times "sub\\tsp, sp, #....\\n\\tstr\\txzr, \\\[sp" 1 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-14.c b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c +new file mode 100644 +index 00000000000..59ffe01376d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++int t1(int); ++ ++int t2(int x) ++{ ++ char *p = __builtin_alloca (4050); ++ x = t1 (x); ++ return p[x]; ++} ++ ++ ++/* This test has a constant sized alloca that is smaller than the ++ probe interval. But it actually requires two probes instead ++ of one because of the optimistic assumptions we made in the ++ aarch64 prologue code WRT probing state. ++ ++ The form can change quite a bit so we just check for two ++ probes without looking at the actual address. */ ++/* { dg-final { scan-assembler-times "str\\txzr," 2 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-15.c b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c +new file mode 100644 +index 00000000000..e06db6dc2f0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++int t1(int); ++ ++int t2(int x) ++{ ++ char *p = __builtin_alloca (x); ++ x = t1 (x); ++ return p[x]; ++} ++ ++ ++/* This test has a variable sized alloca. It requires 3 probes. ++ One in the loop, one for the residual and at the end of the ++ alloca area. ++ ++ The form can change quite a bit so we just check for two ++ probes without looking at the actual address. */ ++/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index aba99513ed0..a8451c98b08 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -5420,14 +5420,9 @@ proc check_effective_target_autoincdec { } { + # + proc check_effective_target_supports_stack_clash_protection { } { + +- # Temporary until the target bits are fully ACK'd. +-# if { [istarget aarch*-*-*] } { +-# return 1 +-# } +- + if { [istarget x86_64-*-*] || [istarget i?86-*-*] + || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] +- || [istarget s390*-*-*] } { ++ || [istarget aarch64*-**] || [istarget s390*-*-*] } { + return 1 + } + return 0 diff --git a/gcc48-rh1469697-17.patch b/gcc48-rh1469697-17.patch new file mode 100644 index 0000000..614a8db --- /dev/null +++ b/gcc48-rh1469697-17.patch @@ -0,0 +1,58 @@ + + * config/i386/i386.c (ix86_emit_restore_reg_using_pop): Prototype. + (ix86_adjust_stack_and_probe_stack_clash): Use a push/pop sequence + to probe at the start of a noreturn function. + + * gcc.target/i386/stack-check-12.c: New test + +diff -Nrup a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +--- a/gcc/config/i386/i386.c 2017-11-03 13:35:17.641528205 -0600 ++++ b/gcc/config/i386/i386.c 2017-11-03 13:37:39.489361692 -0600 +@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. + #include "tree-flow.h" + + static rtx legitimize_dllimport_symbol (rtx, bool); ++static void ix86_emit_restore_reg_using_pop (rtx); + + #ifndef CHECK_STACK_LIMIT + #define CHECK_STACK_LIMIT (-1) +@@ -9884,8 +9885,14 @@ ix86_adjust_stack_and_probe_stack_clash + we just probe when we cross PROBE_INTERVAL. */ + if (TREE_THIS_VOLATILE (cfun->decl)) + { +- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, +- -GET_MODE_SIZE (word_mode))); ++ /* We can safely use any register here since we're just going to push ++ its value and immediately pop it back. But we do try and avoid ++ argument passing registers so as not to introduce dependencies in ++ the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ ++ rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); ++ rtx insn = emit_insn (gen_push (dummy_reg)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ix86_emit_restore_reg_using_pop (dummy_reg); + emit_insn (gen_blockage ()); + } + +diff -Nrup a/gcc/testsuite/gcc.target/i386/stack-check-12.c b/gcc/testsuite/gcc.target/i386/stack-check-12.c +--- a/gcc/testsuite/gcc.target/i386/stack-check-12.c 1969-12-31 17:00:00.000000000 -0700 ++++ b/gcc/testsuite/gcc.target/i386/stack-check-12.c 2017-11-03 13:36:15.104055651 -0600 +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -mtune=generic -fomit-frame-pointer" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++__attribute__ ((noreturn)) void exit (int); ++ ++__attribute__ ((noreturn)) void ++f (void) ++{ ++ asm volatile ("nop" ::: "edi"); ++ exit (1); ++} ++ ++/* { dg-final { scan-assembler-not "or\[ql\]" } } */ ++/* { dg-final { scan-assembler "pushl %esi" { target ia32 } } } */ ++/* { dg-final { scan-assembler "popl %esi" { target ia32 } } }*/ ++/* { dg-final { scan-assembler "pushq %rax" { target { ! ia32 } } } } */ ++/* { dg-final { scan-assembler "popq %rax" { target { ! ia32 } } } }*/ ++ diff --git a/gcc48-rh1469697-18.patch b/gcc48-rh1469697-18.patch new file mode 100644 index 0000000..612fad4 --- /dev/null +++ b/gcc48-rh1469697-18.patch @@ -0,0 +1,191 @@ + + * config/i386/i386.c (PROBE_INTERVAL): Remove. + (get_probe_interval): New function. + (ix86_adjust_stack_and_probe_stack_clash): Use get_probe_interval. + (ix86_adjust_stack_and_probe): Likewise. + (output_adjust_stack_and_probe): Likewise. + (ix86_emit_probe_stack_range): Likewise. + (ix86_expand_prologue): Likewise. + + * gcc.dg/pr82788.c: New test. + +diff -Nrup a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +--- a/gcc/config/i386/i386.c 2017-11-06 09:54:43.814921056 -0700 ++++ b/gcc/config/i386/i386.c 2017-11-06 09:55:25.327589661 -0700 +@@ -9839,7 +9839,17 @@ release_scratch_register_on_entry (struc + } + } + +-#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) ++/* Return the probing interval for -fstack-clash-protection. */ ++ ++static HOST_WIDE_INT ++get_probe_interval (void) ++{ ++ if (flag_stack_clash_protection) ++ return (HOST_WIDE_INT_1U ++ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); ++ else ++ return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); ++} + + /* Emit code to adjust the stack pointer by SIZE bytes while probing it. + +@@ -9911,8 +9921,7 @@ ix86_adjust_stack_and_probe_stack_clash + /* We're allocating a large enough stack frame that we need to + emit probes. Either emit them inline or in a loop depending + on the size. */ +- HOST_WIDE_INT probe_interval +- = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ HOST_WIDE_INT probe_interval = get_probe_interval (); + if (size <= 4 * probe_interval) + { + HOST_WIDE_INT i; +@@ -9921,7 +9930,7 @@ ix86_adjust_stack_and_probe_stack_clash + /* Allocate PROBE_INTERVAL bytes. */ + rtx insn + = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-PROBE_INTERVAL), -1, ++ GEN_INT (-probe_interval), -1, + m->fs.cfa_reg == stack_pointer_rtx); + add_reg_note (insn, REG_STACK_CHECK, const0_rtx); + +@@ -10014,7 +10023,7 @@ ix86_adjust_stack_and_probe (const HOST_ + that's the easy case. The run-time loop is made up of 11 insns in the + generic case while the compile-time loop is made up of 3+2*(n-1) insns + for n # of intervals. */ +- if (size <= 5 * PROBE_INTERVAL) ++ if (size <= 5 * get_probe_interval ()) + { + HOST_WIDE_INT i, adjust; + bool first_probe = true; +@@ -10023,15 +10032,15 @@ ix86_adjust_stack_and_probe (const HOST_ + values of N from 1 until it exceeds SIZE. If only one probe is + needed, this will not generate any code. Then adjust and probe + to PROBE_INTERVAL + SIZE. */ +- for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) ++ for (i = get_probe_interval (); i < size; i += get_probe_interval ()) + { + if (first_probe) + { +- adjust = 2 * PROBE_INTERVAL + dope; ++ adjust = 2 * get_probe_interval () + dope; + first_probe = false; + } + else +- adjust = PROBE_INTERVAL; ++ adjust = get_probe_interval (); + + emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +@@ -10040,9 +10049,9 @@ ix86_adjust_stack_and_probe (const HOST_ + } + + if (first_probe) +- adjust = size + PROBE_INTERVAL + dope; ++ adjust = size + get_probe_interval () + dope; + else +- adjust = size + PROBE_INTERVAL - i; ++ adjust = size + get_probe_interval () - i; + + emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +@@ -10052,7 +10061,8 @@ ix86_adjust_stack_and_probe (const HOST_ + /* Adjust back to account for the additional first interval. */ + last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +- PROBE_INTERVAL + dope))); ++ (get_probe_interval () ++ + dope)))); + } + + /* Otherwise, do the same as above, but in a loop. Note that we must be +@@ -10070,7 +10080,7 @@ ix86_adjust_stack_and_probe (const HOST_ + + /* Step 1: round SIZE to the previous multiple of the interval. */ + +- rounded_size = size & -PROBE_INTERVAL; ++ rounded_size = size & get_probe_interval (); + + + /* Step 2: compute initial and final value of the loop counter. */ +@@ -10078,7 +10088,7 @@ ix86_adjust_stack_and_probe (const HOST_ + /* SP = SP_0 + PROBE_INTERVAL. */ + emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +- - (PROBE_INTERVAL + dope)))); ++ - (get_probe_interval () + dope)))); + + /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */ + emit_move_insn (sr.reg, GEN_INT (-rounded_size)); +@@ -10115,7 +10125,8 @@ ix86_adjust_stack_and_probe (const HOST_ + /* Adjust back to account for the additional first interval. */ + last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +- PROBE_INTERVAL + dope))); ++ (get_probe_interval () ++ + dope)))); + + release_scratch_register_on_entry (&sr); + } +@@ -10134,7 +10145,7 @@ ix86_adjust_stack_and_probe (const HOST_ + XVECEXP (expr, 0, 1) + = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +- PROBE_INTERVAL + dope + size)); ++ get_probe_interval () + dope + size)); + add_reg_note (last, REG_FRAME_RELATED_EXPR, expr); + RTX_FRAME_RELATED_P (last) = 1; + +@@ -10168,7 +10179,7 @@ output_adjust_stack_and_probe (rtx reg) + fputc ('\n', asm_out_file); + + /* SP = SP + PROBE_INTERVAL. */ +- xops[1] = GEN_INT (PROBE_INTERVAL); ++ xops[1] = GEN_INT (get_probe_interval ()); + output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); + + /* Probe at SP. */ +@@ -10194,14 +10205,14 @@ ix86_emit_probe_stack_range (HOST_WIDE_I + that's the easy case. The run-time loop is made up of 7 insns in the + generic case while the compile-time loop is made up of n insns for n # + of intervals. */ +- if (size <= 7 * PROBE_INTERVAL) ++ if (size <= 7 * get_probe_interval ()) + { + HOST_WIDE_INT i; + + /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until + it exceeds SIZE. If only one probe is needed, this will not + generate any code. Then probe at FIRST + SIZE. */ +- for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) ++ for (i = get_probe_interval (); i < size; i += get_probe_interval ()) + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, + -(first + i))); + +@@ -10224,7 +10235,7 @@ ix86_emit_probe_stack_range (HOST_WIDE_I + + /* Step 1: round SIZE to the previous multiple of the interval. */ + +- rounded_size = size & -PROBE_INTERVAL; ++ rounded_size = size & -get_probe_interval (); + + + /* Step 2: compute initial and final value of the loop counter. */ +@@ -10291,7 +10302,7 @@ output_probe_stack_range (rtx reg, rtx e + fputc ('\n', asm_out_file); + + /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ +- xops[1] = GEN_INT (PROBE_INTERVAL); ++ xops[1] = GEN_INT (get_probe_interval ()); + output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); + + /* Probe at TEST_ADDR. */ +diff -Nrup a/gcc/testsuite/gcc.dg/pr82788.c b/gcc/testsuite/gcc.dg/pr82788.c +--- a/gcc/testsuite/gcc.dg/pr82788.c 1969-12-31 17:00:00.000000000 -0700 ++++ b/gcc/testsuite/gcc.dg/pr82788.c 2017-11-06 09:55:10.680706587 -0700 +@@ -0,0 +1,4 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-probe-interval=10 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++int main() { int a[1442]; return 0;} diff --git a/gcc48-rh1469697-19.patch b/gcc48-rh1469697-19.patch new file mode 100644 index 0000000..655097e --- /dev/null +++ b/gcc48-rh1469697-19.patch @@ -0,0 +1,36 @@ +2017-10-31 Segher Boessenkool + + PR target/82674 + * config/rs6000/rs6000.md (allocate_stack): Force update interval + into a register if it does not fit into an immediate offset field. + + +diff -Nrup a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md +--- a/gcc/config/rs6000/rs6000.md 2017-11-06 09:12:21.128237467 -0700 ++++ b/gcc/config/rs6000/rs6000.md 2017-11-06 09:13:40.521601373 -0700 +@@ -11886,6 +11886,9 @@ + { + rtx loop_lab, end_loop; + bool rotated = CONST_INT_P (rounded_size); ++ rtx update = GEN_INT (-probe_interval); ++ if (probe_interval > 32768) ++ update = force_reg (Pmode, update); + + emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop, + last_addr, rotated); +@@ -11893,13 +11896,11 @@ + if (Pmode == SImode) + emit_insn (gen_movsi_update_stack (stack_pointer_rtx, + stack_pointer_rtx, +- GEN_INT (-probe_interval), +- chain)); ++ update, chain)); + else + emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx, + stack_pointer_rtx, +- GEN_INT (-probe_interval), +- chain)); ++ update, chain)); + emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop, + last_addr, rotated); + } diff --git a/gcc48-rh1469697-2.patch b/gcc48-rh1469697-2.patch new file mode 100644 index 0000000..a4b1b9c --- /dev/null +++ b/gcc48-rh1469697-2.patch @@ -0,0 +1,360 @@ +commit 6427208ee82548346a2f42a8ac83fdd2f823fde2 +Author: law +Date: Wed Sep 20 04:56:54 2017 +0000 + + * common.opt (-fstack-clash-protection): New option. + * flag-types.h (enum stack_check_type): Note difference between + -fstack-check= and -fstack-clash-protection. + * params.def (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE): New PARAM. + (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL): Likewise. + * toplev.c (process_options): Issue warnings/errors for cases + not handled with -fstack-clash-protection. + * doc/invoke.texi (-fstack-clash-protection): Document new option. + (-fstack-check): Note additional problem with -fstack-check=generic. + Note that -fstack-check is primarily for Ada and refer users + to -fstack-clash-protection for stack-clash-protection. + Document new params for stack clash protection. + + * gcc.dg/stack-check-2.c: New test. + * lib/target-supports.exp + (check_effective_target_supports_stack_clash_protection): New function. + (check_effective_target_frame_pointer_for_non_leaf): Likewise. + (check_effective_target_caller_implicit_probes): Likewise. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252994 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/common.opt b/gcc/common.opt +index 16846c13b62..0c335cb12cd 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1911,13 +1911,18 @@ Common Report Var(flag_variable_expansion_in_unroller) Optimization + Apply variable expansion when loops are unrolled + + fstack-check= +-Common Report RejectNegative Joined +--fstack-check=[no|generic|specific] Insert stack checking code into the program ++Common Report RejectNegative Joined Optimization ++-fstack-check=[no|generic|specific] Insert stack checking code into the program. + + fstack-check + Common Alias(fstack-check=, specific, no) + Insert stack checking code into the program. Same as -fstack-check=specific + ++fstack-clash-protection ++Common Report Var(flag_stack_clash_protection) Optimization ++Insert code to probe each page of stack space as it is allocated to protect ++from stack-clash style attacks. ++ + fstack-limit + Common Var(common_deferred_options) Defer + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index f7a15ca190e..313a6c5ff76 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -9406,6 +9406,21 @@ compilation for profile feedback and one for compilation without. The value + for compilation with profile feedback needs to be more conservative (higher) in + order to make tracer effective. + ++@item stack-clash-protection-guard-size ++Specify the size of the operating system provided stack guard as ++2 raised to @var{num} bytes. The default value is 12 (4096 bytes). ++Acceptable values are between 12 and 30. Higher values may reduce the ++number of explicit probes, but a value larger than the operating system ++provided guard will leave code vulnerable to stack clash style attacks. ++ ++@item stack-clash-protection-probe-interval ++Stack clash protection involves probing stack space as it is allocated. This ++param controls the maximum distance between probes into the stack as 2 raised ++to @var{num} bytes. Acceptable values are between 10 and 16 and defaults to ++12. Higher values may reduce the number of explicit probes, but a value ++larger than the operating system provided guard will leave code vulnerable to ++stack clash style attacks. ++ + @item max-cse-path-length + + The maximum number of basic blocks on path that CSE considers. +@@ -20949,7 +20964,8 @@ target support in the compiler but comes with the following drawbacks: + @enumerate + @item + Modified allocation strategy for large objects: they are always +-allocated dynamically if their size exceeds a fixed threshold. ++allocated dynamically if their size exceeds a fixed threshold. Note this ++may change the semantics of some code. + + @item + Fixed limit on the size of the static frame of functions: when it is +@@ -20964,6 +20980,27 @@ generic implementation, code performance is hampered. + Note that old-style stack checking is also the fallback method for + @code{specific} if no target support has been added in the compiler. + ++@samp{-fstack-check=} is designed for Ada's needs to detect infinite recursion ++and stack overflows. @samp{specific} is an excellent choice when compiling ++Ada code. It is not generally sufficient to protect against stack-clash ++attacks. To protect against those you want @samp{-fstack-clash-protection}. ++ ++@item -fstack-clash-protection ++@opindex fstack-clash-protection ++Generate code to prevent stack clash style attacks. When this option is ++enabled, the compiler will only allocate one page of stack space at a time ++and each page is accessed immediately after allocation. Thus, it prevents ++allocations from jumping over any stack guard page provided by the ++operating system. ++ ++Most targets do not fully support stack clash protection. However, on ++those targets @option{-fstack-clash-protection} will protect dynamic stack ++allocations. @option{-fstack-clash-protection} may also provide limited ++protection for static stack allocations if the target supports ++@option{-fstack-check=specific}. ++ ++ ++ + @item -fstack-limit-register=@var{reg} + @itemx -fstack-limit-symbol=@var{sym} + @itemx -fno-stack-limit +diff --git a/gcc/flag-types.h b/gcc/flag-types.h +index 4fc5d33348e..21e943d38fa 100644 +--- a/gcc/flag-types.h ++++ b/gcc/flag-types.h +@@ -139,7 +139,14 @@ enum excess_precision + EXCESS_PRECISION_STANDARD + }; + +-/* Type of stack check. */ ++/* Type of stack check. ++ ++ Stack checking is designed to detect infinite recursion and stack ++ overflows for Ada programs. Furthermore stack checking tries to ensure ++ in that scenario that enough stack space is left to run a signal handler. ++ ++ -fstack-check= does not prevent stack-clash style attacks. For that ++ you want -fstack-clash-protection. */ + enum stack_check_type + { + /* Do not check the stack. */ +diff --git a/gcc/params.def b/gcc/params.def +index e51b847a7c4..e668624b0cb 100644 +--- a/gcc/params.def ++++ b/gcc/params.def +@@ -208,6 +208,16 @@ DEFPARAM(PARAM_STACK_FRAME_GROWTH, + "Maximal stack frame growth due to inlining (in percent)", + 1000, 0, 0) + ++DEFPARAM(PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE, ++ "stack-clash-protection-guard-size", ++ "Size of the stack guard expressed as a power of two.", ++ 12, 12, 30) ++ ++DEFPARAM(PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL, ++ "stack-clash-protection-probe-interval", ++ "Interval in which to probe the stack expressed as a power of two.", ++ 12, 10, 16) ++ + /* The GCSE optimization will be disabled if it would require + significantly more memory than this value. */ + DEFPARAM(PARAM_MAX_GCSE_MEMORY, +diff --git a/gcc/testsuite/gcc.dg/stack-check-2.c b/gcc/testsuite/gcc.dg/stack-check-2.c +new file mode 100644 +index 00000000000..196c4bbfbdd +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-2.c +@@ -0,0 +1,66 @@ ++/* The goal here is to ensure that we never consider a call to a noreturn ++ function as a potential tail call. ++ ++ Right now GCC discovers potential tail calls by looking at the ++ predecessors of the exit block. A call to a non-return function ++ has no successors and thus can never match that first filter. ++ ++ But that could change one day and we want to catch it. The problem ++ is the compiler could potentially optimize a tail call to a nonreturn ++ function, even if the caller has a frame. That breaks the assumption ++ that calls probe *sp when saving the return address that some targets ++ depend on to elide stack probes. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-tree-tailc -fdump-tree-optimized" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++extern void foo (void) __attribute__ ((__noreturn__)); ++ ++ ++void ++test_direct_1 (void) ++{ ++ foo (); ++} ++ ++void ++test_direct_2 (void) ++{ ++ return foo (); ++} ++ ++void (*indirect)(void)__attribute__ ((noreturn)); ++ ++ ++void ++test_indirect_1 () ++{ ++ (*indirect)(); ++} ++ ++void ++test_indirect_2 (void) ++{ ++ return (*indirect)();; ++} ++ ++ ++typedef void (*pvfn)() __attribute__ ((noreturn)); ++ ++void (*indirect_casted)(void); ++ ++void ++test_indirect_casted_1 () ++{ ++ (*(pvfn)indirect_casted)(); ++} ++ ++void ++test_indirect_casted_2 (void) ++{ ++ return (*(pvfn)indirect_casted)(); ++} ++/* { dg-final { scan-tree-dump-not "tail call" "tailc" } } */ ++/* { dg-final { scan-tree-dump-not "tail call" "optimized" } } */ ++ +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index ef371ad7efd..821cea9cb33 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -5392,3 +5392,95 @@ proc check_effective_target_fenv_exceptions {} { + } + } "-std=gnu99"] + } ++ ++# Return 1 if the target supports the auto_inc_dec optimization pass. ++proc check_effective_target_autoincdec { } { ++ if { ![check_no_compiler_messages auto_incdec assembly { void f () { } ++ } "-O2 -fdump-rtl-auto_inc_dec" ] } { ++ return 0 ++ } ++ ++ set dumpfile [glob -nocomplain "auto_incdec[pid].c.\[0-9\]\[0-9\]\[0-9\]r.auto_inc_dec"] ++ if { [file exists $dumpfile ] } { ++ file delete $dumpfile ++ return 1 ++ } ++ return 0 ++} ++ ++# Return 1 if the target has support for stack probing designed ++# to avoid stack-clash style attacks. ++# ++# This is used to restrict the stack-clash mitigation tests to ++# just those targets that have been explicitly supported. ++# ++# In addition to the prologue work on those targets, each target's ++# properties should be described in the functions below so that ++# tests do not become a mess of unreadable target conditions. ++# ++proc check_effective_target_supports_stack_clash_protection { } { ++ ++ # Temporary until the target bits are fully ACK'd. ++# if { [istarget aarch*-*-*] || [istarget x86_64-*-*] ++# || [istarget i?86-*-*] || [istarget s390*-*-*] ++# || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { ++# return 1 ++# } ++ return 0 ++} ++ ++# Return 1 if the target creates a frame pointer for non-leaf functions ++# Note we ignore cases where we apply tail call optimization here. ++proc check_effective_target_frame_pointer_for_non_leaf { } { ++ if { [istarget aarch*-*-*] } { ++ return 1 ++ } ++ return 0 ++} ++ ++# Return 1 if the target's calling sequence or its ABI ++# create implicit stack probes at or prior to function entry. ++proc check_effective_target_caller_implicit_probes { } { ++ ++ # On x86/x86_64 the call instruction itself pushes the return ++ # address onto the stack. That is an implicit probe of *sp. ++ if { [istarget x86_64-*-*] || [istarget i?86-*-*] } { ++ return 1 ++ } ++ ++ # On PPC, the ABI mandates that the address of the outer ++ # frame be stored at *sp. Thus each allocation of stack ++ # space is itself an implicit probe of *sp. ++ if { [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { ++ return 1 ++ } ++ ++ # s390's ABI has a register save area allocated by the ++ # caller for use by the callee. The mere existence does ++ # not constitute a probe by the caller, but when the slots ++ # used by the callee those stores are implicit probes. ++ if { [istarget s390*-*-*] } { ++ return 1 ++ } ++ ++ # Not strictly true on aarch64, but we have agreed that we will ++ # consider any function that pushes SP more than 3kbytes into ++ # the guard page as broken. This essentially means that we can ++ # consider the aarch64 as having a caller implicit probe at ++ # *(sp + 1k). ++ if { [istarget aarch64*-*-*] } { ++ return 1; ++ } ++ ++ return 0 ++} ++ ++# Targets that potentially realign the stack pointer often cause residual ++# stack allocations and make it difficult to elimination loops or residual ++# allocations for dynamic stack allocations ++proc check_effective_target_callee_realigns_stack { } { ++ if { [istarget x86_64-*-*] || [istarget i?86-*-*] } { ++ return 1 ++ } ++ return 0 ++} +diff --git a/gcc/toplev.c b/gcc/toplev.c +index 26f2ffb362c..1def163f8b9 100644 +--- a/gcc/toplev.c ++++ b/gcc/toplev.c +@@ -1520,6 +1520,28 @@ process_options (void) + flag_associative_math = 0; + } + ++#ifndef STACK_GROWS_DOWNWARD ++ /* -fstack-clash-protection is not currently supported on targets ++ where the stack grows up. */ ++ if (flag_stack_clash_protection) ++ { ++ warning_at (UNKNOWN_LOCATION, 0, ++ "%<-fstack-clash-protection%> is not supported on targets " ++ "where the stack grows from lower to higher addresses"); ++ flag_stack_clash_protection = 0; ++ } ++#endif ++ ++ /* We can not support -fstack-check= and -fstack-clash-protection at ++ the same time. */ ++ if (flag_stack_check != NO_STACK_CHECK && flag_stack_clash_protection) ++ { ++ warning_at (UNKNOWN_LOCATION, 0, ++ "%<-fstack-check=%> and %<-fstack-clash_protection%> are " ++ "mutually exclusive. Disabling %<-fstack-check=%>"); ++ flag_stack_check = NO_STACK_CHECK; ++ } ++ + /* With -fcx-limited-range, we do cheap and quick complex arithmetic. */ + if (flag_cx_limited_range) + flag_complex_method = 0; diff --git a/gcc48-rh1469697-20.patch b/gcc48-rh1469697-20.patch new file mode 100644 index 0000000..ad82c9b --- /dev/null +++ b/gcc48-rh1469697-20.patch @@ -0,0 +1,37 @@ + + * explow.c (anti_adjust_stack_and_probe_stack_clash): Avoid probing + the red zone for stack_clash_protection_final_dynamic_probe targets + when the total dynamic stack size is zero bytes. + +diff -Nrup a/gcc/explow.c b/gcc/explow.c +--- a/gcc/explow.c 2017-11-14 23:33:15.403557607 -0700 ++++ b/gcc/explow.c 2017-11-14 23:33:56.243239120 -0700 +@@ -1934,6 +1934,13 @@ anti_adjust_stack_and_probe_stack_clash + if (size != CONST0_RTX (Pmode) + && targetm.stack_clash_protection_final_dynamic_probe (residual)) + { ++ /* SIZE could be zero at runtime and in that case *sp could hold ++ live data. Furthermore, we don't want to probe into the red ++ zone. ++ ++ Go ahead and just guard a probe at *sp on SIZE != 0 at runtime ++ if SIZE is not a compile time constant. */ ++ + /* Ideally we would just probe at *sp. However, if SIZE is not + a compile-time constant, but is zero at runtime, then *sp + might hold live data. So probe at *sp if we know that +@@ -1946,9 +1953,12 @@ anti_adjust_stack_and_probe_stack_clash + } + else + { +- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, +- -GET_MODE_SIZE (word_mode))); ++ rtx label = gen_label_rtx (); ++ emit_cmp_and_jump_insns (size, CONST0_RTX (GET_MODE (size)), ++ EQ, NULL_RTX, Pmode, 1, label); ++ emit_stack_probe (stack_pointer_rtx); + emit_insn (gen_blockage ()); ++ emit_label (label); + } + } + } diff --git a/gcc48-rh1469697-21.patch b/gcc48-rh1469697-21.patch new file mode 100644 index 0000000..0c694cc --- /dev/null +++ b/gcc48-rh1469697-21.patch @@ -0,0 +1,144 @@ + PR middle-end/83654 + * explow.c (anti_adjust_stack_and_probe_stack_clash): Test a + non-constant residual for zero at runtime and avoid probing in + that case. Reorganize code for trailing problem to mirror handling + of the residual. + + PR middle-end/83654 + * gcc.target/i386/stack-check-18.c: New test. + * gcc.target/i386/stack-check-19.c: New test. + +diff --git a/gcc/explow.c b/gcc/explow.c +index b6c56602152..042e71904ec 100644 +--- a/gcc/explow.c ++++ b/gcc/explow.c +@@ -1997,11 +1997,27 @@ anti_adjust_stack_and_probe_stack_clash (rtx size) + + if (residual != CONST0_RTX (Pmode)) + { ++ rtx label = NULL_RTX; ++ /* RESIDUAL could be zero at runtime and in that case *sp could ++ hold live data. Furthermore, we do not want to probe into the ++ red zone. ++ ++ Go ahead and just guard the probe at *sp on RESIDUAL != 0 at ++ runtime if RESIDUAL is not a compile time constant. */ ++ if (!CONST_INT_P (residual)) ++ { ++ label = gen_label_rtx (); ++ emit_cmp_and_jump_insns (residual, CONST0_RTX (GET_MODE (residual)), ++ EQ, NULL_RTX, Pmode, 1, label); ++ } ++ + rtx x = force_reg (Pmode, plus_constant (Pmode, residual, + -GET_MODE_SIZE (word_mode))); + anti_adjust_stack (residual); + emit_stack_probe (gen_rtx_PLUS (Pmode, stack_pointer_rtx, x)); + emit_insn (gen_blockage ()); ++ if (!CONST_INT_P (residual)) ++ emit_label (label); + } + + /* Some targets make optimistic assumptions in their prologues about +@@ -2014,28 +2030,20 @@ anti_adjust_stack_and_probe_stack_clash (rtx size) + live data. Furthermore, we don't want to probe into the red + zone. + +- Go ahead and just guard a probe at *sp on SIZE != 0 at runtime ++ Go ahead and just guard the probe at *sp on SIZE != 0 at runtime + if SIZE is not a compile time constant. */ +- +- /* Ideally we would just probe at *sp. However, if SIZE is not +- a compile-time constant, but is zero at runtime, then *sp +- might hold live data. So probe at *sp if we know that +- an allocation was made, otherwise probe into the red zone +- which is obviously undesirable. */ +- if (CONST_INT_P (size)) +- { +- emit_stack_probe (stack_pointer_rtx); +- emit_insn (gen_blockage ()); +- } +- else ++ rtx label = NULL_RTX; ++ if (!CONST_INT_P (size)) + { +- rtx label = gen_label_rtx (); ++ label = gen_label_rtx (); + emit_cmp_and_jump_insns (size, CONST0_RTX (GET_MODE (size)), + EQ, NULL_RTX, Pmode, 1, label); +- emit_stack_probe (stack_pointer_rtx); +- emit_insn (gen_blockage ()); +- emit_label (label); + } ++ ++ emit_stack_probe (stack_pointer_rtx); ++ emit_insn (gen_blockage ()); ++ if (!CONST_INT_P (size)) ++ emit_label (label); + } + } + +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-18.c b/gcc/testsuite/gcc.target/i386/stack-check-18.c +new file mode 100644 +index 00000000000..6dbff4402da +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/stack-check-18.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -mtune=generic -fdump-rtl-expand" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++int f1 (char *); ++ ++int ++f2 (void) ++{ ++ const int size = 4096; ++ char buffer[size]; ++ return f1 (buffer); ++} ++ ++/* So we want to verify that at expand time that we probed the main ++ VLA allocation as well as the residuals. Then we want to verify ++ there was only one probe in the final assembly (implying the ++ residual probe was optimized away). */ ++/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 1 "expand" } } */ ++/* { dg-final { scan-rtl-dump-times "allocation and probing residuals" 1 "expand" } } */ ++ ++/* { dg-final { scan-assembler-times "or\[ql\]" 1 } } */ ++ +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-19.c b/gcc/testsuite/gcc.target/i386/stack-check-19.c +new file mode 100644 +index 00000000000..b92c126d57f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/stack-check-19.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -mtune=generic -fdump-rtl-expand" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++int f1 (char *); ++ ++int ++f2 (const int size) ++{ ++ char buffer[size]; ++ return f1 (buffer); ++} ++ ++/* So we want to verify that at expand time that we probed the main ++ VLA allocation as well as the residuals. Then we want to verify ++ there are two probes in the final assembly code. */ ++/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 1 "expand" } } */ ++/* { dg-final { scan-rtl-dump-times "allocation and probing residuals" 1 "expand" } } */ ++/* { dg-final { scan-assembler-times "or\[ql\]" 2 } } */ ++ ++/* We also want to verify (indirectly) that the residual probe is ++ guarded. We do that by checking the number of conditional ++ branches. There should be 3. One that bypasses the probe loop, one ++ in the probe loop and one that bypasses the residual probe. ++ ++ These will all be equality tests. */ ++/* { dg-final { scan-assembler-times "(\?:je|jne)" 3 } } */ ++ ++ diff --git a/gcc48-rh1469697-22.patch b/gcc48-rh1469697-22.patch new file mode 100644 index 0000000..ee75edb --- /dev/null +++ b/gcc48-rh1469697-22.patch @@ -0,0 +1,118 @@ +diff -Nrup a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +--- a/gcc/config/i386/i386.c 2018-01-03 16:10:46.278171086 -0700 ++++ b/gcc/config/i386/i386.c 2018-01-03 16:12:32.022220166 -0700 +@@ -9862,14 +9862,13 @@ static void + ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) + { + struct machine_function *m = cfun->machine; ++ struct ix86_frame frame; ++ ix86_compute_frame_layout (&frame); + + /* If this function does not statically allocate stack space, then + no probes are needed. */ + if (!size) + { +- struct ix86_frame frame; +- ix86_compute_frame_layout (&frame); +- + /* However, the allocation of space via pushes for register + saves could be viewed as allocating space, but without the + need to probe. */ +@@ -9888,21 +9887,40 @@ ix86_adjust_stack_and_probe_stack_clash + pointer could be anywhere in the guard page. The safe thing + to do is emit a probe now. + ++ The probe can be avoided if we have already emitted any callee ++ register saves into the stack or have a frame pointer (which will ++ have been saved as well). Those saves will function as implicit ++ probes. ++ + ?!? This should be revamped to work like aarch64 and s390 where + we track the offset from the most recent probe. Normally that + offset would be zero. For a non-return function we would reset + it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then + we just probe when we cross PROBE_INTERVAL. */ +- if (TREE_THIS_VOLATILE (cfun->decl)) ++ if (TREE_THIS_VOLATILE (cfun->decl) ++ && !(frame.nregs || frame.nsseregs || frame_pointer_needed)) ++ + { + /* We can safely use any register here since we're just going to push + its value and immediately pop it back. But we do try and avoid + argument passing registers so as not to introduce dependencies in + the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ + rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); +- rtx insn = emit_insn (gen_push (dummy_reg)); +- RTX_FRAME_RELATED_P (insn) = 1; +- ix86_emit_restore_reg_using_pop (dummy_reg); ++ rtx insn_push = emit_insn (gen_push (dummy_reg)); ++ rtx insn_pop = emit_insn (gen_pop (dummy_reg)); ++ m->fs.sp_offset -= UNITS_PER_WORD; ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ { ++ m->fs.cfa_offset -= UNITS_PER_WORD; ++ rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); ++ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); ++ add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x); ++ RTX_FRAME_RELATED_P (insn_push) = 1; ++ x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); ++ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); ++ add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x); ++ RTX_FRAME_RELATED_P (insn_pop) = 1; ++ } + emit_insn (gen_blockage ()); + } + + +diff -Nrup a/gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-12.c b/gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-12.c +--- gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-12.c 2018-01-03 15:42:40.849530670 -0700 ++++ gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-12.c 2018-01-03 15:36:12.528488596 -0700 +@@ -7,7 +7,6 @@ __attribute__ ((noreturn)) void exit (in + __attribute__ ((noreturn)) void + f (void) + { +- asm volatile ("nop" ::: "edi"); + exit (1); + } + +diff -Nrup a/gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-17.c b/gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-17.c +--- gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-17.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc-4.8.5-20150702/gcc/testsuite/gcc.target/i386/stack-check-17.c 2018-01-03 15:36:12.528488596 -0700 +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -mtune=generic -fomit-frame-pointer" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++ ++int x0, x1; ++void f1 (void); ++void f2 (int, int); ++ ++__attribute__ ((noreturn)) ++void ++f3 (void) ++{ ++ int y0 = x0; ++ int y1 = x1; ++ f1 (); ++ f2 (y0, y1); ++ while (1); ++} ++ ++/* Verify no explicit probes. */ ++/* { dg-final { scan-assembler-not "or\[ql\]" } } */ ++ ++/* We also want to verify we did not use a push/pop sequence ++ to probe *sp as the callee register saves are sufficient ++ to probe *sp. ++ ++ y0/y1 are live across the call and thus must be allocated ++ into either a stack slot or callee saved register. The former ++ would be rather dumb. So assume it does not happen. ++ ++ So search for two/four pushes for the callee register saves/argument ++ pushes and no pops (since the function has no reachable epilogue). */ ++/* { dg-final { scan-assembler-times "push\[ql\]" 2 { target { ! ia32 } } } } */ ++/* { dg-final { scan-assembler-times "push\[ql\]" 4 { target { ia32 } } } } */ ++/* { dg-final { scan-assembler-not "pop" } } */ ++ diff --git a/gcc48-rh1469697-23.patch b/gcc48-rh1469697-23.patch new file mode 100644 index 0000000..3b69367 --- /dev/null +++ b/gcc48-rh1469697-23.patch @@ -0,0 +1,52 @@ +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-17.c b/gcc/testsuite/gcc.target/i386/stack-check-17.c +index d2ef83b..dcd2930 100644 +--- a/gcc/testsuite/gcc.target/i386/stack-check-17.c ++++ b/gcc/testsuite/gcc.target/i386/stack-check-17.c +@@ -29,9 +29,11 @@ f3 (void) + into either a stack slot or callee saved register. The former + would be rather dumb. So assume it does not happen. + +- So search for two/four pushes for the callee register saves/argument +- pushes and no pops (since the function has no reachable epilogue). */ +-/* { dg-final { scan-assembler-times "push\[ql\]" 2 { target { ! ia32 } } } } */ +-/* { dg-final { scan-assembler-times "push\[ql\]" 4 { target { ia32 } } } } */ ++ So search for two pushes for the callee register saves pushes ++ and no pops (since the function has no reachable epilogue). ++ ++ This is slightly different than upstream because the code we ++ generate for argument setup is slightly different. */ ++/* { dg-final { scan-assembler-times "push\[ql\]" 2 } } */ + /* { dg-final { scan-assembler-not "pop" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-18.c b/gcc/testsuite/gcc.target/i386/stack-check-18.c +index 6dbff44..1638f77 100644 +--- a/gcc/testsuite/gcc.target/i386/stack-check-18.c ++++ b/gcc/testsuite/gcc.target/i386/stack-check-18.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -mtune=generic -fdump-rtl-expand" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ + + int f1 (char *); + +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-19.c b/gcc/testsuite/gcc.target/i386/stack-check-19.c +index b92c126..c341801 100644 +--- a/gcc/testsuite/gcc.target/i386/stack-check-19.c ++++ b/gcc/testsuite/gcc.target/i386/stack-check-19.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -mtune=generic -fdump-rtl-expand" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ + + int f1 (char *); + +@@ -24,6 +25,6 @@ f2 (const int size) + in the probe loop and one that bypasses the residual probe. + + These will all be equality tests. */ +-/* { dg-final { scan-assembler-times "(\?:je|jne)" 3 } } */ ++/* { dg-final { scan-assembler-times "(\?:jmp|je|jne)" 3 } } */ + + diff --git a/gcc48-rh1469697-3.patch b/gcc48-rh1469697-3.patch new file mode 100644 index 0000000..6dbf24b --- /dev/null +++ b/gcc48-rh1469697-3.patch @@ -0,0 +1,600 @@ +commit a3e2ba88eb09c1eed2f7ed6e17660b345464bb90 +Author: law +Date: Wed Sep 20 05:05:12 2017 +0000 + + 2017-09-18 Jeff Law + + * explow.c: Include "params.h" and "dumpfile.h". + (anti_adjust_stack_and_probe_stack_clash): New function. + (get_stack_check_protect): Likewise. + (compute_stack_clash_protection_loop_data): Likewise. + (emit_stack_clash_protection_loop_start): Likewise. + (emit_stack_clash_protection_loop_end): Likewise. + (allocate_dynamic_stack_space): Use get_stack_check_protect. + Use anti_adjust_stack_and_probe_stack_clash. + * explow.h (compute_stack_clash_protection_loop_data): Prototype. + (emit_stack_clash_protection_loop_start): Likewise. + (emit_stack_clash_protection_loop_end): Likewise. + * rtl.h (get_stack_check_protect): Prototype. + * target.def (stack_clash_protection_final_dynamic_probe): New hook. + * targhooks.c (default_stack_clash_protection_final_dynamic_probe): New. + * targhooks.h (default_stack_clash_protection_final_dynamic_probe): + Prototype. + * doc/tm.texi.in (TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE): + Add @hook. + * doc/tm.texi: Rebuilt. + * config/alpha/alpha.c (alpha_expand_prologue): Likewise. + * config/i386/i386.c (ix86_expand_prologue): Likewise. + * config/ia64/ia64.c (ia64_expand_prologue): Likewise. + * config/mips/mips.c (mips_expand_prologue): Likewise. + * config/rs6000/rs6000.c (rs6000_emit_prologue): Likewise. + * config/sparc/sparc.c (sparc_expand_prologue): Likewise. + (sparc_flat_expand_prologue): Likewise. + + * gcc.dg/stack-check-3.c: New test. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252995 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c +index 2874b8454a9..5402f5213d6 100644 +--- a/gcc/config/alpha/alpha.c ++++ b/gcc/config/alpha/alpha.c +@@ -7625,7 +7625,7 @@ alpha_expand_prologue (void) + + probed_size = frame_size; + if (flag_stack_check) +- probed_size += STACK_CHECK_PROTECT; ++ probed_size += get_stack_check_protect (); + + if (probed_size <= 32768) + { +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index e36726ba722..d996fd160e8 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -10544,12 +10544,12 @@ ix86_expand_prologue (void) + HOST_WIDE_INT size = allocate; + + if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000) +- size = 0x80000000 - STACK_CHECK_PROTECT - 1; ++ size = 0x80000000 - get_stack_check_protect () - 1; + + if (TARGET_STACK_PROBE) +- ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT); ++ ix86_emit_probe_stack_range (0, size + get_stack_check_protect ()); + else +- ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size); ++ ix86_emit_probe_stack_range (get_stack_check_protect (), size); + } + } + +diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c +index 50bbad6661c..390983936e8 100644 +--- a/gcc/config/ia64/ia64.c ++++ b/gcc/config/ia64/ia64.c +@@ -3435,7 +3435,7 @@ ia64_expand_prologue (void) + current_function_static_stack_size = current_frame_info.total_size; + + if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) +- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, ++ ia64_emit_probe_stack_range (get_stack_check_protect (), + current_frame_info.total_size, + current_frame_info.n_input_regs + + current_frame_info.n_local_regs); +diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c +index 41c5d6b6b1f..9b7eb678f19 100644 +--- a/gcc/config/mips/mips.c ++++ b/gcc/config/mips/mips.c +@@ -10746,7 +10746,7 @@ mips_expand_prologue (void) + current_function_static_stack_size = size; + + if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size) +- mips_emit_probe_stack_range (STACK_CHECK_PROTECT, size); ++ mips_emit_probe_stack_range (get_stack_check_protect (), size); + + /* Save the registers. Allocate up to MIPS_MAX_FIRST_STACK_STEP + bytes beforehand; this is enough to cover the register save area +diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c +index 15583055895..a9052c6becf 100644 +--- a/gcc/config/rs6000/rs6000.c ++++ b/gcc/config/rs6000/rs6000.c +@@ -23214,7 +23214,8 @@ rs6000_emit_prologue (void) + current_function_static_stack_size = info->total_size; + + if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && info->total_size) +- rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, info->total_size); ++ rs6000_emit_probe_stack_range (get_stack_check_protect (), ++ info->total_size); + + if (TARGET_FIX_AND_CONTINUE) + { +diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c +index e5d326cdf23..e5e93c80261 100644 +--- a/gcc/config/sparc/sparc.c ++++ b/gcc/config/sparc/sparc.c +@@ -5431,7 +5431,7 @@ sparc_expand_prologue (void) + current_function_static_stack_size = size; + + if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size) +- sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size); ++ sparc_emit_probe_stack_range (get_stack_check_protect (), size); + + if (size == 0) + ; /* do nothing. */ +@@ -5533,7 +5533,7 @@ sparc_flat_expand_prologue (void) + current_function_static_stack_size = size; + + if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size) +- sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size); ++ sparc_emit_probe_stack_range (get_stack_check_protect (), size); + + if (sparc_save_local_in_regs_p) + emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS, +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 6b18a2724bc..eeef757bf5b 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -3571,6 +3571,10 @@ GCC computed the default from the values of the above macros and you will + normally not need to override that default. + @end defmac + ++@deftypefn {Target Hook} bool TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE (rtx @var{residual}) ++Some targets make optimistic assumptions about the state of stack probing when they emit their prologues. On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks. Define this variable to return nonzero if such a probe is required or zero otherwise. You need not define this macro if it would always have the value zero. ++@end deftypefn ++ + @need 2000 + @node Frame Registers + @subsection Registers That Address the Stack Frame +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 7d0b3c73b2f..6707ca87236 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -3539,6 +3539,8 @@ GCC computed the default from the values of the above macros and you will + normally not need to override that default. + @end defmac + ++@hook TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE ++ + @need 2000 + @node Frame Registers + @subsection Registers That Address the Stack Frame +diff --git a/gcc/explow.c b/gcc/explow.c +index 7da8bc75f19..2526e8513b7 100644 +--- a/gcc/explow.c ++++ b/gcc/explow.c +@@ -40,8 +40,11 @@ along with GCC; see the file COPYING3. If not see + #include "target.h" + #include "common/common-target.h" + #include "output.h" ++#include "params.h" ++#include "dumpfile.h" + + static rtx break_out_memory_refs (rtx); ++static void anti_adjust_stack_and_probe_stack_clash (rtx); + + + /* Truncate and perhaps sign-extend C as appropriate for MODE. */ +@@ -1140,6 +1143,29 @@ update_nonlocal_goto_save_area (void) + emit_stack_save (SAVE_NONLOCAL, &r_save); + } + ++/* Return the number of bytes to "protect" on the stack for -fstack-check. ++ ++ "protect" in the context of -fstack-check means how many bytes we ++ should always ensure are available on the stack. More importantly ++ this is how many bytes are skipped when probing the stack. ++ ++ On some targets we want to reuse the -fstack-check prologue support ++ to give a degree of protection against stack clashing style attacks. ++ ++ In that scenario we do not want to skip bytes before probing as that ++ would render the stack clash protections useless. ++ ++ So we never use STACK_CHECK_PROTECT directly. Instead we indirect though ++ this helper which allows us to provide different values for ++ -fstack-check and -fstack-clash-protection. */ ++HOST_WIDE_INT ++get_stack_check_protect (void) ++{ ++ if (flag_stack_clash_protection) ++ return 0; ++ return STACK_CHECK_PROTECT; ++} ++ + /* Return an rtx representing the address of an area of memory dynamically + pushed on the stack. + +@@ -1393,7 +1419,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, + probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE, + size); + else if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) +- probe_stack_range (STACK_CHECK_PROTECT, size); ++ probe_stack_range (get_stack_check_protect (), size); + + /* Don't let anti_adjust_stack emit notes. */ + suppress_reg_args_size = true; +@@ -1451,6 +1477,8 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, + + if (flag_stack_check && STACK_CHECK_MOVING_SP) + anti_adjust_stack_and_probe (size, false); ++ else if (flag_stack_clash_protection) ++ anti_adjust_stack_and_probe_stack_clash (size); + else + anti_adjust_stack (size); + +@@ -1712,6 +1740,219 @@ probe_stack_range (HOST_WIDE_INT first, rtx size) + } + } + ++/* Compute parameters for stack clash probing a dynamic stack ++ allocation of SIZE bytes. ++ ++ We compute ROUNDED_SIZE, LAST_ADDR, RESIDUAL and PROBE_INTERVAL. ++ ++ Additionally we conditionally dump the type of probing that will ++ be needed given the values computed. */ ++ ++void ++compute_stack_clash_protection_loop_data (rtx *rounded_size, rtx *last_addr, ++ rtx *residual, ++ HOST_WIDE_INT *probe_interval, ++ rtx size) ++{ ++ /* Round SIZE down to STACK_CLASH_PROTECTION_PROBE_INTERVAL */ ++ *probe_interval ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ *rounded_size = simplify_gen_binary (AND, Pmode, size, ++ GEN_INT (-*probe_interval)); ++ ++ /* Compute the value of the stack pointer for the last iteration. ++ It's just SP + ROUNDED_SIZE. */ ++ rtx rounded_size_op = force_operand (*rounded_size, NULL_RTX); ++ *last_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, ++ stack_pointer_rtx, ++ rounded_size_op), ++ NULL_RTX); ++ ++ /* Compute any residuals not allocated by the loop above. Residuals ++ are just the ROUNDED_SIZE - SIZE. */ ++ *residual = simplify_gen_binary (MINUS, Pmode, size, *rounded_size); ++ ++ /* Dump key information to make writing tests easy. */ ++ if (dump_file) ++ { ++ if (*rounded_size == CONST0_RTX (Pmode)) ++ fprintf (dump_file, ++ "Stack clash skipped dynamic allocation and probing loop.\n"); ++ else if (GET_CODE (*rounded_size) == CONST_INT ++ && INTVAL (*rounded_size) <= 4 * *probe_interval) ++ fprintf (dump_file, ++ "Stack clash dynamic allocation and probing inline.\n"); ++ else if (GET_CODE (*rounded_size) == CONST_INT) ++ fprintf (dump_file, ++ "Stack clash dynamic allocation and probing in " ++ "rotated loop.\n"); ++ else ++ fprintf (dump_file, ++ "Stack clash dynamic allocation and probing in loop.\n"); ++ ++ if (*residual != CONST0_RTX (Pmode)) ++ fprintf (dump_file, ++ "Stack clash dynamic allocation and probing residuals.\n"); ++ else ++ fprintf (dump_file, ++ "Stack clash skipped dynamic allocation and " ++ "probing residuals.\n"); ++ } ++} ++ ++/* Emit the start of an allocate/probe loop for stack ++ clash protection. ++ ++ LOOP_LAB and END_LAB are returned for use when we emit the ++ end of the loop. ++ ++ LAST addr is the value for SP which stops the loop. */ ++void ++emit_stack_clash_protection_probe_loop_start (rtx *loop_lab, ++ rtx *end_lab, ++ rtx last_addr, ++ bool rotated) ++{ ++ /* Essentially we want to emit any setup code, the top of loop ++ label and the comparison at the top of the loop. */ ++ *loop_lab = gen_label_rtx (); ++ *end_lab = gen_label_rtx (); ++ ++ emit_label (*loop_lab); ++ if (!rotated) ++ emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, EQ, NULL_RTX, ++ Pmode, 1, *end_lab); ++} ++ ++/* Emit the end of a stack clash probing loop. ++ ++ This consists of just the jump back to LOOP_LAB and ++ emitting END_LOOP after the loop. */ ++ ++void ++emit_stack_clash_protection_probe_loop_end (rtx loop_lab, rtx end_loop, ++ rtx last_addr, bool rotated) ++{ ++ if (rotated) ++ emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, NE, NULL_RTX, ++ Pmode, 1, loop_lab); ++ else ++ emit_jump (loop_lab); ++ ++ emit_label (end_loop); ++ ++} ++ ++/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes) ++ while probing it. This pushes when SIZE is positive. SIZE need not ++ be constant. ++ ++ This is subtly different than anti_adjust_stack_and_probe to try and ++ prevent stack-clash attacks ++ ++ 1. It must assume no knowledge of the probing state, any allocation ++ must probe. ++ ++ Consider the case of a 1 byte alloca in a loop. If the sum of the ++ allocations is large, then this could be used to jump the guard if ++ probes were not emitted. ++ ++ 2. It never skips probes, whereas anti_adjust_stack_and_probe will ++ skip probes on the first couple PROBE_INTERVALs on the assumption ++ they're done elsewhere. ++ ++ 3. It only allocates and probes SIZE bytes, it does not need to ++ allocate/probe beyond that because this probing style does not ++ guarantee signal handling capability if the guard is hit. */ ++ ++static void ++anti_adjust_stack_and_probe_stack_clash (rtx size) ++{ ++ /* First ensure SIZE is Pmode. */ ++ if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode) ++ size = convert_to_mode (Pmode, size, 1); ++ ++ /* We can get here with a constant size on some targets. */ ++ rtx rounded_size, last_addr, residual; ++ HOST_WIDE_INT probe_interval; ++ compute_stack_clash_protection_loop_data (&rounded_size, &last_addr, ++ &residual, &probe_interval, size); ++ ++ if (rounded_size != CONST0_RTX (Pmode)) ++ { ++ if (INTVAL (rounded_size) <= 4 * probe_interval) ++ { ++ for (HOST_WIDE_INT i = 0; ++ i < INTVAL (rounded_size); ++ i += probe_interval) ++ { ++ anti_adjust_stack (GEN_INT (probe_interval)); ++ ++ /* The prologue does not probe residuals. Thus the offset ++ here to probe just beyond what the prologue had already ++ allocated. */ ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ (probe_interval ++ - GET_MODE_SIZE (word_mode)))); ++ emit_insn (gen_blockage ()); ++ } ++ } ++ else ++ { ++ rtx loop_lab, end_loop; ++ bool rotate_loop = GET_CODE (rounded_size) == CONST_INT; ++ emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop, ++ last_addr, rotate_loop); ++ ++ anti_adjust_stack (GEN_INT (probe_interval)); ++ ++ /* The prologue does not probe residuals. Thus the offset here ++ to probe just beyond what the prologue had already allocated. */ ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ (probe_interval ++ - GET_MODE_SIZE (word_mode)))); ++ ++ emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop, ++ last_addr, rotate_loop); ++ emit_insn (gen_blockage ()); ++ } ++ } ++ ++ if (residual != CONST0_RTX (Pmode)) ++ { ++ rtx x = force_reg (Pmode, plus_constant (Pmode, residual, ++ -GET_MODE_SIZE (word_mode))); ++ anti_adjust_stack (residual); ++ emit_stack_probe (gen_rtx_PLUS (Pmode, stack_pointer_rtx, x)); ++ emit_insn (gen_blockage ()); ++ } ++ ++ /* Some targets make optimistic assumptions in their prologues about ++ how the caller may have probed the stack. Make sure we honor ++ those assumptions when needed. */ ++ if (size != CONST0_RTX (Pmode) ++ && targetm.stack_clash_protection_final_dynamic_probe (residual)) ++ { ++ /* Ideally we would just probe at *sp. However, if SIZE is not ++ a compile-time constant, but is zero at runtime, then *sp ++ might hold live data. So probe at *sp if we know that ++ an allocation was made, otherwise probe into the red zone ++ which is obviously undesirable. */ ++ if (GET_CODE (size) == CONST_INT) ++ { ++ emit_stack_probe (stack_pointer_rtx); ++ emit_insn (gen_blockage ()); ++ } ++ else ++ { ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ -GET_MODE_SIZE (word_mode))); ++ emit_insn (gen_blockage ()); ++ } ++ } ++} ++ ++ + /* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes) + while probing it. This pushes when SIZE is positive. SIZE need not + be constant. If ADJUST_BACK is true, adjust back the stack pointer +diff --git a/gcc/rtl.h b/gcc/rtl.h +index 91f3387c701..ab8ec27418d 100644 +--- a/gcc/rtl.h ++++ b/gcc/rtl.h +@@ -1756,6 +1756,17 @@ extern int currently_expanding_to_rtl; + /* In explow.c */ + extern HOST_WIDE_INT trunc_int_for_mode (HOST_WIDE_INT, enum machine_mode); + extern rtx plus_constant (enum machine_mode, rtx, HOST_WIDE_INT); ++extern HOST_WIDE_INT get_stack_check_protect (void); ++ ++/* Support for building allocation/probing loops for stack-clash ++ protection of dyamically allocated stack space. */ ++extern void compute_stack_clash_protection_loop_data (rtx *, rtx *, rtx *, ++ HOST_WIDE_INT *, rtx); ++extern void emit_stack_clash_protection_probe_loop_start (rtx *, rtx *, ++ rtx, bool); ++extern void emit_stack_clash_protection_probe_loop_end (rtx, rtx, ++ rtx, bool); ++ + + /* In rtl.c */ + extern rtx rtx_alloc_stat (RTX_CODE MEM_STAT_DECL); +diff --git a/gcc/target.def b/gcc/target.def +index 4d6081c3121..eb2bd46f7a1 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -2580,6 +2580,13 @@ DEFHOOK + void, (void), + hook_void_void) + ++DEFHOOK ++(stack_clash_protection_final_dynamic_probe, ++ "Some targets make optimistic assumptions about the state of stack probing when they emit their prologues. On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks. Define this variable to return nonzero if such a probe is required or zero otherwise. You need not define this macro if it would always have the value zero.", ++ bool, (rtx residual), ++ default_stack_clash_protection_final_dynamic_probe) ++ ++ + /* Functions specific to the C family of frontends. */ + #undef HOOK_PREFIX + #define HOOK_PREFIX "TARGET_C_" +diff --git a/gcc/targhooks.c b/gcc/targhooks.c +index f6aa9907225..be23875538d 100644 +--- a/gcc/targhooks.c ++++ b/gcc/targhooks.c +@@ -1557,4 +1557,10 @@ default_canonicalize_comparison (int *, rtx *, rtx *, bool) + { + } + ++bool ++default_stack_clash_protection_final_dynamic_probe (rtx residual ATTRIBUTE_UNUSED) ++{ ++ return 0; ++} ++ + #include "gt-targhooks.h" +diff --git a/gcc/targhooks.h b/gcc/targhooks.h +index b64274d3ff9..4acf33fae08 100644 +--- a/gcc/targhooks.h ++++ b/gcc/targhooks.h +@@ -195,3 +195,4 @@ extern const char *default_pch_valid_p (const void *, size_t); + extern void default_asm_output_ident_directive (const char*); + + extern bool default_member_type_forces_blk (const_tree, enum machine_mode); ++extern bool default_stack_clash_protection_final_dynamic_probe (rtx); +diff --git a/gcc/testsuite/gcc.dg/stack-check-3.c b/gcc/testsuite/gcc.dg/stack-check-3.c +new file mode 100644 +index 00000000000..58fb65649ee +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-3.c +@@ -0,0 +1,86 @@ ++/* The goal here is to ensure that dynamic allocations via vlas or ++ alloca calls receive probing. ++ ++ Scanning the RTL or assembly code seems like insanity here as does ++ checking for particular allocation sizes and probe offsets. For ++ now we just verify that there's an allocation + probe loop and ++ residual allocation + probe for f?. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-expand -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=4096 --param stack-clash-protection-guard-size=4096" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++__attribute__((noinline, noclone)) void ++foo (char *p) ++{ ++ asm volatile ("" : : "r" (p) : "memory"); ++} ++ ++/* Simple VLA, no other locals. */ ++__attribute__((noinline, noclone)) void ++f0 (int x) ++{ ++ char vla[x]; ++ foo (vla); ++} ++ ++/* Simple VLA, small local frame. */ ++__attribute__((noinline, noclone)) void ++f1 (int x) ++{ ++ char locals[128]; ++ char vla[x]; ++ foo (vla); ++} ++ ++/* Small constant alloca, no other locals. */ ++__attribute__((noinline, noclone)) void ++f2 (int x) ++{ ++ char *vla = __builtin_alloca (128); ++ foo (vla); ++} ++ ++/* Big constant alloca, small local frame. */ ++__attribute__((noinline, noclone)) void ++f3 (int x) ++{ ++ char locals[128]; ++ char *vla = __builtin_alloca (16384); ++ foo (vla); ++} ++ ++/* Big constant alloca, small local frame. */ ++__attribute__((noinline, noclone)) void ++f3a (int x) ++{ ++ char locals[128]; ++ char *vla = __builtin_alloca (32768); ++ foo (vla); ++} ++ ++/* Nonconstant alloca, no other locals. */ ++__attribute__((noinline, noclone)) void ++f4 (int x) ++{ ++ char *vla = __builtin_alloca (x); ++ foo (vla); ++} ++ ++/* Nonconstant alloca, small local frame. */ ++__attribute__((noinline, noclone)) void ++f5 (int x) ++{ ++ char locals[128]; ++ char *vla = __builtin_alloca (x); ++ foo (vla); ++} ++ ++/* { dg-final { scan-rtl-dump-times "allocation and probing residuals" 7 "expand" } } */ ++ ++ ++/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 7 "expand" { target callee_realigns_stack } } } */ ++/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 4 "expand" { target { ! callee_realigns_stack } } } } */ ++/* { dg-final { scan-rtl-dump-times "allocation and probing in rotated loop" 1 "expand" { target { ! callee_realigns_stack } } } } */ ++/* { dg-final { scan-rtl-dump-times "allocation and probing inline" 1 "expand" { target { ! callee_realigns_stack } } } } */ ++/* { dg-final { scan-rtl-dump-times "skipped dynamic allocation and probing loop" 1 "expand" { target { ! callee_realigns_stack } } } } */ diff --git a/gcc48-rh1469697-4.patch b/gcc48-rh1469697-4.patch new file mode 100644 index 0000000..003d79f --- /dev/null +++ b/gcc48-rh1469697-4.patch @@ -0,0 +1,143 @@ +commit 8a1b46d59d6c3e1e5eb606cd44689c8557612257 +Author: law +Date: Wed Sep 20 05:21:09 2017 +0000 + + * config/alpha/alpha.c (alpha_expand_prologue): Also check + flag_stack_clash_protection. + * config/ia64/ia64.c (ia64_compute_frame_size): Likewise. + (ia64_expand_prologue): Likewise. + * config/mips/mips.c (mips_expand_prologue): Likewise. + * config/sparc/sparc.c (sparc_expand_prologue): Likewise. + (sparc_flat_expand_prologue): Likewise. + * config/spu/spu.c (spu_expand_prologue): Likewise. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252996 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c +index 5402f5213d6..c46c843e462 100644 +--- a/gcc/config/alpha/alpha.c ++++ b/gcc/config/alpha/alpha.c +@@ -7624,7 +7624,7 @@ alpha_expand_prologue (void) + Note that we are only allowed to adjust sp once in the prologue. */ + + probed_size = frame_size; +- if (flag_stack_check) ++ if (flag_stack_check || flag_stack_clash_protection) + probed_size += get_stack_check_protect (); + + if (probed_size <= 32768) +@@ -7639,7 +7639,7 @@ alpha_expand_prologue (void) + /* We only have to do this probe if we aren't saving registers or + if we are probing beyond the frame because of -fstack-check. */ + if ((sa_size == 0 && probed_size > probed - 4096) +- || flag_stack_check) ++ || flag_stack_check || flag_stack_clash_protection) + emit_insn (gen_probe_stack (GEN_INT (-probed_size))); + } + +@@ -7669,7 +7669,8 @@ alpha_expand_prologue (void) + late in the compilation, generate the loop as a single insn. */ + emit_insn (gen_prologue_stack_probe_loop (count, ptr)); + +- if ((leftover > 4096 && sa_size == 0) || flag_stack_check) ++ if ((leftover > 4096 && sa_size == 0) ++ || flag_stack_check || flag_stack_clash_protection) + { + rtx last = gen_rtx_MEM (DImode, + plus_constant (Pmode, ptr, -leftover)); +@@ -7677,7 +7678,7 @@ alpha_expand_prologue (void) + emit_move_insn (last, const0_rtx); + } + +- if (flag_stack_check) ++ if (flag_stack_check || flag_stack_clash_protection) + { + /* If -fstack-check is specified we have to load the entire + constant into a register and subtract from the sp in one go, +diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c +index 390983936e8..5bf7046cf15 100644 +--- a/gcc/config/ia64/ia64.c ++++ b/gcc/config/ia64/ia64.c +@@ -2638,7 +2638,8 @@ ia64_compute_frame_size (HOST_WIDE_INT size) + mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL); + + /* Static stack checking uses r2 and r3. */ +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) ++ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) + current_frame_info.gr_used_mask |= 0xc; + + /* Find the size of the register stack frame. We have only 80 local +@@ -3434,7 +3435,8 @@ ia64_expand_prologue (void) + if (flag_stack_usage_info) + current_function_static_stack_size = current_frame_info.total_size; + +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) ++ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) + ia64_emit_probe_stack_range (get_stack_check_protect (), + current_frame_info.total_size, + current_frame_info.n_input_regs +diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c +index 9b7eb678f19..da17f94b4f9 100644 +--- a/gcc/config/mips/mips.c ++++ b/gcc/config/mips/mips.c +@@ -10745,7 +10745,9 @@ mips_expand_prologue (void) + if (flag_stack_usage_info) + current_function_static_stack_size = size; + +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size) ++ if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) ++ && size) + mips_emit_probe_stack_range (get_stack_check_protect (), size); + + /* Save the registers. Allocate up to MIPS_MAX_FIRST_STACK_STEP +diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c +index e5e93c80261..617aa617208 100644 +--- a/gcc/config/sparc/sparc.c ++++ b/gcc/config/sparc/sparc.c +@@ -5430,7 +5430,9 @@ sparc_expand_prologue (void) + if (flag_stack_usage_info) + current_function_static_stack_size = size; + +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size) ++ if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) ++ && size) + sparc_emit_probe_stack_range (get_stack_check_protect (), size); + + if (size == 0) +@@ -5532,7 +5534,9 @@ sparc_flat_expand_prologue (void) + if (flag_stack_usage_info) + current_function_static_stack_size = size; + +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size) ++ if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) ++ && size) + sparc_emit_probe_stack_range (get_stack_check_protect (), size); + + if (sparc_save_local_in_regs_p) +diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c +index 328bd5bd2ae..5541a3cd243 100644 +--- a/gcc/config/spu/spu.c ++++ b/gcc/config/spu/spu.c +@@ -1761,7 +1761,7 @@ spu_expand_prologue (void) + + if (total_size > 0) + { +- if (flag_stack_check) ++ if (flag_stack_check || flag_stack_clash_protection) + { + /* We compare against total_size-1 because + ($sp >= total_size) <=> ($sp > total_size-1) */ +@@ -5366,7 +5366,7 @@ spu_allocate_stack (rtx op0, rtx op1) + emit_insn (gen_spu_convert (sp, stack_pointer_rtx)); + emit_insn (gen_subv4si3 (sp, sp, splatted)); + +- if (flag_stack_check) ++ if (flag_stack_check || flag_stack_clash_protection) + { + rtx avail = gen_reg_rtx(SImode); + rtx result = gen_reg_rtx(SImode); diff --git a/gcc48-rh1469697-5.patch b/gcc48-rh1469697-5.patch new file mode 100644 index 0000000..e56e7b1 --- /dev/null +++ b/gcc48-rh1469697-5.patch @@ -0,0 +1,94 @@ +commit ea2b372d666ec1105abf4ef5418d92d612283e88 +Author: law +Date: Wed Sep 20 05:23:51 2017 +0000 + + * function.c (dump_stack_clash_frame_info): New function. + * function.h (dump_stack_clash_frame_info): Prototype. + (enum stack_clash_probes): New enum. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252997 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/function.c b/gcc/function.c +index 76baf307984..9b395aebcb3 100644 +--- a/gcc/function.c ++++ b/gcc/function.c +@@ -5263,6 +5263,58 @@ get_arg_pointer_save_area (void) + return ret; + } + ++ ++/* If debugging dumps are requested, dump information about how the ++ target handled -fstack-check=clash for the prologue. ++ ++ PROBES describes what if any probes were emitted. ++ ++ RESIDUALS indicates if the prologue had any residual allocation ++ (i.e. total allocation was not a multiple of PROBE_INTERVAL). */ ++ ++void ++dump_stack_clash_frame_info (enum stack_clash_probes probes, bool residuals) ++{ ++ if (!dump_file) ++ return; ++ ++ switch (probes) ++ { ++ case NO_PROBE_NO_FRAME: ++ fprintf (dump_file, ++ "Stack clash no probe no stack adjustment in prologue.\n"); ++ break; ++ case NO_PROBE_SMALL_FRAME: ++ fprintf (dump_file, ++ "Stack clash no probe small stack adjustment in prologue.\n"); ++ break; ++ case PROBE_INLINE: ++ fprintf (dump_file, "Stack clash inline probes in prologue.\n"); ++ break; ++ case PROBE_LOOP: ++ fprintf (dump_file, "Stack clash probe loop in prologue.\n"); ++ break; ++ } ++ ++ if (residuals) ++ fprintf (dump_file, "Stack clash residual allocation in prologue.\n"); ++ else ++ fprintf (dump_file, "Stack clash no residual allocation in prologue.\n"); ++ ++ if (frame_pointer_needed) ++ fprintf (dump_file, "Stack clash frame pointer needed.\n"); ++ else ++ fprintf (dump_file, "Stack clash no frame pointer needed.\n"); ++ ++ if (TREE_THIS_VOLATILE (cfun->decl)) ++ fprintf (dump_file, ++ "Stack clash noreturn prologue, assuming no implicit" ++ " probes in caller.\n"); ++ else ++ fprintf (dump_file, ++ "Stack clash not noreturn prologue.\n"); ++} ++ + /* Add a list of INSNS to the hash HASHP, possibly allocating HASHP + for the first time. */ + +diff --git a/gcc/function.h b/gcc/function.h +index 89d71e592dd..ffea1e564ba 100644 +--- a/gcc/function.h ++++ b/gcc/function.h +@@ -712,6 +712,16 @@ extern void instantiate_decl_rtl (rtx x); + and create duplicate blocks. */ + extern void reorder_blocks (void); + ++enum stack_clash_probes { ++ NO_PROBE_NO_FRAME, ++ NO_PROBE_SMALL_FRAME, ++ PROBE_INLINE, ++ PROBE_LOOP ++}; ++ ++extern void dump_stack_clash_frame_info (enum stack_clash_probes, bool); ++ ++ + /* Set BLOCK_NUMBER for all the blocks in FN. */ + extern void number_blocks (tree); + diff --git a/gcc48-rh1469697-6.patch b/gcc48-rh1469697-6.patch new file mode 100644 index 0000000..b08bb30 --- /dev/null +++ b/gcc48-rh1469697-6.patch @@ -0,0 +1,2705 @@ +commit dd9992d1b96b5811873c98a208c029bebb0c3577 +Author: law +Date: Wed Sep 20 05:35:07 2017 +0000 + + * config/i386/i386.c (ix86_adjust_stack_and_probe_stack_clash): New. + (ix86_expand_prologue): Dump stack clash info as needed. + Call ix86_adjust_stack_and_probe_stack_clash as needed. + + * gcc.dg/stack-check-4.c: New test. + * gcc.dg/stack-check-5.c: New test. + * gcc.dg/stack-check-6.c: New test. + * gcc.dg/stack-check-6a.c: New test. + * gcc.dg/stack-check-7.c: New test. + * gcc.dg/stack-check-8.c: New test. + * gcc.dg/stack-check-9.c: New test. + * gcc.dg/stack-check-10.c: New test. + * lib/target-supports.exp + (check_effective_target_supports_stack_clash_protection): Enable for + x86 and x86_64 targets. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252998 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index d996fd160e8..a555b0774c0 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -9839,6 +9839,147 @@ release_scratch_register_on_entry (struct scratch_reg *sr) + + #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) + ++/* Emit code to adjust the stack pointer by SIZE bytes while probing it. ++ ++ This differs from the next routine in that it tries hard to prevent ++ attacks that jump the stack guard. Thus it is never allowed to allocate ++ more than PROBE_INTERVAL bytes of stack space without a suitable ++ probe. */ ++ ++static void ++ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) ++{ ++ struct machine_function *m = cfun->machine; ++ ++ /* If this function does not statically allocate stack space, then ++ no probes are needed. */ ++ if (!size) ++ { ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); ++ return; ++ } ++ ++ /* If we are a noreturn function, then we have to consider the ++ possibility that we're called via a jump rather than a call. ++ ++ Thus we don't have the implicit probe generated by saving the ++ return address into the stack at the call. Thus, the stack ++ pointer could be anywhere in the guard page. The safe thing ++ to do is emit a probe now. ++ ++ ?!? This should be revamped to work like aarch64 and s390 where ++ we track the offset from the most recent probe. Normally that ++ offset would be zero. For a non-return function we would reset ++ it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then ++ we just probe when we cross PROBE_INTERVAL. */ ++ if (TREE_THIS_VOLATILE (cfun->decl)) ++ { ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ -GET_MODE_SIZE (word_mode))); ++ emit_insn (gen_blockage ()); ++ } ++ ++ /* If we allocate less than the size of the guard statically, ++ then no probing is necessary, but we do need to allocate ++ the stack. */ ++ if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE))) ++ { ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-size), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ return; ++ } ++ ++ /* We're allocating a large enough stack frame that we need to ++ emit probes. Either emit them inline or in a loop depending ++ on the size. */ ++ HOST_WIDE_INT probe_interval ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ if (size <= 4 * probe_interval) ++ { ++ HOST_WIDE_INT i; ++ for (i = probe_interval; i <= size; i += probe_interval) ++ { ++ /* Allocate PROBE_INTERVAL bytes. */ ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-probe_interval), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ ++ /* And probe at *sp. */ ++ emit_stack_probe (stack_pointer_rtx); ++ emit_insn (gen_blockage ()); ++ } ++ ++ /* We need to allocate space for the residual, but we do not need ++ to probe the residual. */ ++ HOST_WIDE_INT residual = (i - probe_interval - size); ++ if (residual) ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (residual), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); ++ } ++ else ++ { ++ struct scratch_reg sr; ++ get_scratch_register_on_entry (&sr); ++ ++ /* Step 1: round SIZE down to a multiple of the interval. */ ++ HOST_WIDE_INT rounded_size = size & -probe_interval; ++ ++ /* Step 2: compute final value of the loop counter. Use lea if ++ possible. */ ++ rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size); ++ rtx insn; ++ if (address_operand (addr, Pmode)) ++ insn = emit_insn (gen_rtx_SET (VOIDmode, sr.reg, addr)); ++ else ++ { ++ emit_move_insn (sr.reg, GEN_INT (-rounded_size)); ++ insn = emit_insn (gen_rtx_SET (VOIDmode, sr.reg, ++ gen_rtx_PLUS (Pmode, sr.reg, ++ stack_pointer_rtx))); ++ } ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ { ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, sr.reg, ++ m->fs.cfa_offset + rounded_size)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ ++ /* Step 3: the loop. */ ++ rtx size_rtx = GEN_INT (rounded_size); ++ insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, ++ size_rtx)); ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ { ++ m->fs.cfa_offset += rounded_size; ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, stack_pointer_rtx, ++ m->fs.cfa_offset)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ m->fs.sp_offset += rounded_size; ++ emit_insn (gen_blockage ()); ++ ++ /* Step 4: adjust SP if we cannot assert at compile-time that SIZE ++ is equal to ROUNDED_SIZE. */ ++ ++ if (size != rounded_size) ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (rounded_size - size), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); ++ ++ release_scratch_register_on_entry (&sr); ++ } ++ ++ /* Make sure nothing is scheduled before we are done. */ ++ emit_insn (gen_blockage ()); ++} ++ + /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */ + + static void +@@ -10529,12 +10670,19 @@ ix86_expand_prologue (void) + + /* The stack has already been decremented by the instruction calling us + so probe if the size is non-negative to preserve the protection area. */ +- if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK) ++ if (allocate >= 0 ++ && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection)) + { + /* We expect the registers to be saved when probes are used. */ + gcc_assert (int_registers_saved); + +- if (STACK_CHECK_MOVING_SP) ++ if (flag_stack_clash_protection) ++ { ++ ix86_adjust_stack_and_probe_stack_clash (allocate); ++ allocate = 0; ++ } ++ else if (STACK_CHECK_MOVING_SP) + { + ix86_adjust_stack_and_probe (allocate); + allocate = 0; +diff --git a/gcc/testsuite/gcc.dg/stack-check-10.c b/gcc/testsuite/gcc.dg/stack-check-10.c +new file mode 100644 +index 00000000000..a86956ad692 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-10.c +@@ -0,0 +1,41 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++int f (int *); ++ ++int ++g (int a) ++{ ++ return f (&a); ++} ++ ++int f1 (void); ++int f2 (int); ++ ++int ++f3 (void) ++{ ++ return f2 (f1 ()); ++} ++ ++ ++/* If we have caller implicit probes, then we should not need probes in either callee. ++ Else callees may need probes, particularly if non-leaf functions require a ++ frame/frame pointer. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 2 "pro_and_epilogue" { target caller_implicit_probes } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash inline probe" 1 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 1 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */ ++ ++/* Neither of these functions are a nonreturn function. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 2 "pro_and_epilogue" } } */ ++ ++/* If the callee realigns the stack or has a mandatory frame, then both functions ++ have a residual allocation. Else just g() has a residual allocation. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 2 "pro_and_epilogue" } } */ ++ ++ ++/* If the target has frame pointers for non-leafs, then both functions will ++ need a frame pointer. Otherwise neither should. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ +diff --git a/gcc/testsuite/gcc.dg/stack-check-3.c b/gcc/testsuite/gcc.dg/stack-check-3.c +index 58fb65649ee..f0bf7c71a5b 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-3.c ++++ b/gcc/testsuite/gcc.dg/stack-check-3.c +@@ -7,7 +7,7 @@ + residual allocation + probe for f?. */ + + /* { dg-do compile } */ +-/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-expand -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=4096 --param stack-clash-protection-guard-size=4096" } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-expand -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ + + __attribute__((noinline, noclone)) void +diff --git a/gcc/testsuite/gcc.dg/stack-check-4.c b/gcc/testsuite/gcc.dg/stack-check-4.c +new file mode 100644 +index 00000000000..b0c5c61972f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-4.c +@@ -0,0 +1,42 @@ ++/* On targets where the call instruction is an implicit probe of *sp, we ++ elide stack probes as long as the size of the local stack is less than ++ PROBE_INTERVAL. ++ ++ But if the caller were to transform a tail call into a direct jump ++ we do not have that implicit probe. This normally isn't a problem as ++ the caller must not have a local frame for that optimization to apply. ++ ++ However, a sufficiently smart compiler could realize that the caller's ++ local stack need not be torn down and thus could transform a call into ++ a jump if the target is a noreturn function, even if the caller has ++ a local frame. ++ ++ To guard against that, targets that depend on *sp being probed by the ++ call itself must emit a probe if the target function is a noreturn ++ function, even if they just allocate a small amount of stack space. ++ ++ Rather than try to parse RTL or assembly code, we instead require the ++ prologue code to emit information into the dump file that we can ++ scan for. We scan for both the positive and negative cases. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++extern void arf (char *); ++ ++__attribute__ ((noreturn)) void foo1 () ++{ ++ char x[10]; ++ while (1) ++ arf (x); ++} ++ ++void foo2 () ++{ ++ char x[10]; ++ arf (x); ++} ++/* { dg-final { scan-rtl-dump-times "Stack clash noreturn" 1 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 1 "pro_and_epilogue" } } */ ++ +diff --git a/gcc/testsuite/gcc.dg/stack-check-5.c b/gcc/testsuite/gcc.dg/stack-check-5.c +new file mode 100644 +index 00000000000..2171d9b6c23 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-5.c +@@ -0,0 +1,74 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++ ++extern void foo (char *); ++extern void bar (void); ++ ++/* This function allocates no local stack and is a leaf. It should have no ++ probes on any target and should not require a frame pointer. */ ++int ++f0 (int x, int y) ++{ ++ asm volatile ("" : : : "memory"); ++ return x + y; ++} ++ ++/* This function allocates no local stack, but is not a leaf. Ideally it ++ should not need probing and no frame pointer. */ ++int ++f1 (int x, int y) ++{ ++ asm volatile ("" : : : "memory"); ++ bar (); ++} ++ ++/* This is a leaf with a small frame. On targets with implicit probes in ++ the caller, this should not need probing. On targets with no implicit ++ probes in the caller, it may require probes. Ideally it should need no ++ frame pointer. */ ++void ++f2 (void) ++{ ++ char buf[512]; ++ asm volatile ("" : : "g" (&buf) : "memory"); ++} ++ ++/* This is a non-leaf with a small frame. On targets with implicit probes in ++ the caller, this should not need probing. On targets with no implicit ++ probes in the caller, it may require probes. It should need no frame ++ pointer. */ ++void ++f3 (void) ++{ ++ char buf[512]; ++ foo (buf); ++} ++ ++/* If we have caller implicit probes, then we should not need probes. ++ Else callees may need probes, particularly if non-leaf functions require a ++ frame/frame pointer. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 4 "pro_and_epilogue" { target caller_implicit_probes } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 2 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash inline probes " 2 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */ ++ ++/* None of these functions are marked with the noreturn attribute. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 4 "pro_and_epilogue" } } */ ++ ++/* Two functions are leafs, two are not. Verify the target identified them ++ appropriately. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 4 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ ++ ++ ++/* We have selected the size of the array in f2/f3 to be large enough ++ to not live in the red zone on targets that support it. ++ ++ That allows simplification of this test considerably. ++ f1() should not require any allocations, thus no residuals. ++ All the rest of the functions require some kind of allocation, ++ either for the saved fp/rp or the array. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no residual allocation in prologue" 1 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 3 "pro_and_epilogue" } } */ +diff --git a/gcc/testsuite/gcc.dg/stack-check-6.c b/gcc/testsuite/gcc.dg/stack-check-6.c +new file mode 100644 +index 00000000000..ad2021c9037 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-6.c +@@ -0,0 +1,55 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++ ++extern void foo (char *); ++extern void bar (void); ++ ++ ++/* This is a leaf with a frame that is large enough to require probing with ++ a residual allocation, but small enough to probe inline. */ ++void ++f4 (void) ++{ ++ char buf[4096 + 512]; ++ asm volatile ("" : : "g" (&buf) : "memory"); ++} ++ ++ ++/* This is a non-leaf with a frame large enough to require probing and ++ a residual allocation, but small enough to probe inline. */ ++void ++f5 (void) ++{ ++ char buf[4096 + 512]; ++ foo (buf); ++} ++ ++/* This is a leaf with a frame that is large enough to require probing with ++ a loop plus a residual allocation. */ ++void ++f6 (void) ++{ ++ char buf[4096 * 10 + 512]; ++ asm volatile ("" : : "g" (&buf) : "memory"); ++} ++ ++ ++/* This is a non-leaf with a frame large enough to require probing with ++ a loop plus a residual allocation. */ ++void ++f7 (void) ++{ ++ char buf[4096 * 10 + 512]; ++ foo (buf); ++} ++ ++/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 2 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash probe loop" 2 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 4 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 4 "pro_and_epilogue" } } */ ++ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 4 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ +diff --git a/gcc/testsuite/gcc.dg/stack-check-6a.c b/gcc/testsuite/gcc.dg/stack-check-6a.c +new file mode 100644 +index 00000000000..6f8e7128921 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-6a.c +@@ -0,0 +1,17 @@ ++/* The goal here is to verify that increasing the size of the guard allows ++ elimination of all probing on the relevant targets. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=16" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++#include "stack-check-6.c" ++ ++/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 0 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash probe loop" 0 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 4 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 4 "pro_and_epilogue" } } */ ++ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 4 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ +diff --git a/gcc/testsuite/gcc.dg/stack-check-7.c b/gcc/testsuite/gcc.dg/stack-check-7.c +new file mode 100644 +index 00000000000..b963a2881dc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-7.c +@@ -0,0 +1,36 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fstack-clash-protection -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++/* For further testing, this can be run under valgrind where it's crashed ++ on aarch64 and ppc64le with -fstack-check=specific. */ ++ ++ ++__attribute__((noinline, noclone)) void ++foo (char *p) ++{ ++ asm volatile ("" : : "r" (p) : "memory"); ++} ++ ++__attribute__((noinline, noclone)) void ++bar (void) ++{ ++ char buf[131072]; ++ foo (buf); ++} ++ ++__attribute__((noinline, noclone)) void ++baz (void) ++{ ++ char buf[12000]; ++ foo (buf); ++} ++ ++int ++main () ++{ ++ bar (); ++ baz (); ++ return 0; ++} ++ +diff --git a/gcc/testsuite/gcc.dg/stack-check-8.c b/gcc/testsuite/gcc.dg/stack-check-8.c +new file mode 100644 +index 00000000000..0ccec8b532a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-8.c +@@ -0,0 +1,139 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -w -fstack-clash-protection -Wno-psabi -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++ ++typedef float V __attribute__((vector_size (32))); ++ ++__attribute__((noinline, noclone)) void ++foo (char *p) ++{ ++ asm volatile ("" : : "r" (p) : "memory"); ++} ++ ++__attribute__((noinline, noclone)) int ++f0 (int x, int y) ++{ ++ asm volatile ("" : : : "memory"); ++ return x + y; ++} ++ ++__attribute__((noinline, noclone)) void ++f1 (void) ++{ ++ char buf[64]; ++ foo (buf); ++} ++ ++__attribute__((noinline, noclone)) void ++f2 (void) ++{ ++ char buf[12000]; ++ foo (buf); ++} ++ ++__attribute__((noinline, noclone)) void ++f3 (void) ++{ ++ char buf[131072]; ++ foo (buf); ++} ++ ++__attribute__((noinline, noclone)) void ++f4 (int x) ++{ ++ char vla[x]; ++ foo (vla); ++} ++ ++__attribute__((noinline, noclone)) void ++f5 (int x) ++{ ++ char buf[12000]; ++ foo (buf); ++ { ++ char vla[x]; ++ foo (vla); ++ } ++ { ++ char vla[x]; ++ foo (vla); ++ } ++} ++ ++V v; ++ ++__attribute__((noinline, noclone)) int ++f6 (int x, int y, V a, V b, V c) ++{ ++ asm volatile ("" : : : "memory"); ++ v = a + b + c; ++ return x + y; ++} ++ ++__attribute__((noinline, noclone)) void ++f7 (V a, V b, V c) ++{ ++ char buf[64]; ++ foo (buf); ++ v = a + b + c; ++} ++ ++__attribute__((noinline, noclone)) void ++f8 (V a, V b, V c) ++{ ++ char buf[12000]; ++ foo (buf); ++ v = a + b + c; ++} ++ ++__attribute__((noinline, noclone)) void ++f9 (V a, V b, V c) ++{ ++ char buf[131072]; ++ foo (buf); ++ v = a + b + c; ++} ++ ++__attribute__((noinline, noclone)) void ++f10 (int x, V a, V b, V c) ++{ ++ char vla[x]; ++ foo (vla); ++ v = a + b + c; ++} ++ ++__attribute__((noinline, noclone)) void ++f11 (int x, V a, V b, V c) ++{ ++ char buf[12000]; ++ foo (buf); ++ v = a + b + c; ++ { ++ char vla[x]; ++ foo (vla); ++ } ++ { ++ char vla[x]; ++ foo (vla); ++ } ++} ++ ++int ++main () ++{ ++ f0 (2, 3); ++ f1 (); ++ f2 (); ++ f3 (); ++ f4 (12000); ++ f5 (12000); ++ f6 (2, 3, v, v, v); ++ f7 (v, v, v); ++ f8 (v, v, v); ++ f9 (v, v, v); ++ f10 (12000, v, v, v); ++ f11 (12000, v, v, v); ++ return 0; ++} ++ +diff --git a/gcc/testsuite/gcc.dg/stack-check-9.c b/gcc/testsuite/gcc.dg/stack-check-9.c +new file mode 100644 +index 00000000000..b84075b9b43 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-9.c +@@ -0,0 +1,2022 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++double f1 (void); ++double f2 (double, double); ++ ++double ++f3 (void) ++{ ++ double d000 = f1 (); ++ double d001 = f1 (); ++ double d002 = f1 (); ++ double d003 = f1 (); ++ double d004 = f1 (); ++ double d005 = f1 (); ++ double d006 = f1 (); ++ double d007 = f1 (); ++ double d008 = f1 (); ++ double d009 = f1 (); ++ double d010 = f1 (); ++ double d011 = f1 (); ++ double d012 = f1 (); ++ double d013 = f1 (); ++ double d014 = f1 (); ++ double d015 = f1 (); ++ double d016 = f1 (); ++ double d017 = f1 (); ++ double d018 = f1 (); ++ double d019 = f1 (); ++ double d020 = f1 (); ++ double d021 = f1 (); ++ double d022 = f1 (); ++ double d023 = f1 (); ++ double d024 = f1 (); ++ double d025 = f1 (); ++ double d026 = f1 (); ++ double d027 = f1 (); ++ double d028 = f1 (); ++ double d029 = f1 (); ++ double d030 = f1 (); ++ double d031 = f1 (); ++ double d032 = f1 (); ++ double d033 = f1 (); ++ double d034 = f1 (); ++ double d035 = f1 (); ++ double d036 = f1 (); ++ double d037 = f1 (); ++ double d038 = f1 (); ++ double d039 = f1 (); ++ double d040 = f1 (); ++ double d041 = f1 (); ++ double d042 = f1 (); ++ double d043 = f1 (); ++ double d044 = f1 (); ++ double d045 = f1 (); ++ double d046 = f1 (); ++ double d047 = f1 (); ++ double d048 = f1 (); ++ double d049 = f1 (); ++ double d050 = f1 (); ++ double d051 = f1 (); ++ double d052 = f1 (); ++ double d053 = f1 (); ++ double d054 = f1 (); ++ double d055 = f1 (); ++ double d056 = f1 (); ++ double d057 = f1 (); ++ double d058 = f1 (); ++ double d059 = f1 (); ++ double d060 = f1 (); ++ double d061 = f1 (); ++ double d062 = f1 (); ++ double d063 = f1 (); ++ double d064 = f1 (); ++ double d065 = f1 (); ++ double d066 = f1 (); ++ double d067 = f1 (); ++ double d068 = f1 (); ++ double d069 = f1 (); ++ double d070 = f1 (); ++ double d071 = f1 (); ++ double d072 = f1 (); ++ double d073 = f1 (); ++ double d074 = f1 (); ++ double d075 = f1 (); ++ double d076 = f1 (); ++ double d077 = f1 (); ++ double d078 = f1 (); ++ double d079 = f1 (); ++ double d080 = f1 (); ++ double d081 = f1 (); ++ double d082 = f1 (); ++ double d083 = f1 (); ++ double d084 = f1 (); ++ double d085 = f1 (); ++ double d086 = f1 (); ++ double d087 = f1 (); ++ double d088 = f1 (); ++ double d089 = f1 (); ++ double d090 = f1 (); ++ double d091 = f1 (); ++ double d092 = f1 (); ++ double d093 = f1 (); ++ double d094 = f1 (); ++ double d095 = f1 (); ++ double d096 = f1 (); ++ double d097 = f1 (); ++ double d098 = f1 (); ++ double d099 = f1 (); ++ double d100 = f1 (); ++ double d101 = f1 (); ++ double d102 = f1 (); ++ double d103 = f1 (); ++ double d104 = f1 (); ++ double d105 = f1 (); ++ double d106 = f1 (); ++ double d107 = f1 (); ++ double d108 = f1 (); ++ double d109 = f1 (); ++ double d110 = f1 (); ++ double d111 = f1 (); ++ double d112 = f1 (); ++ double d113 = f1 (); ++ double d114 = f1 (); ++ double d115 = f1 (); ++ double d116 = f1 (); ++ double d117 = f1 (); ++ double d118 = f1 (); ++ double d119 = f1 (); ++ double d120 = f1 (); ++ double d121 = f1 (); ++ double d122 = f1 (); ++ double d123 = f1 (); ++ double d124 = f1 (); ++ double d125 = f1 (); ++ double d126 = f1 (); ++ double d127 = f1 (); ++ double d128 = f1 (); ++ double d129 = f1 (); ++ double d130 = f1 (); ++ double d131 = f1 (); ++ double d132 = f1 (); ++ double d133 = f1 (); ++ double d134 = f1 (); ++ double d135 = f1 (); ++ double d136 = f1 (); ++ double d137 = f1 (); ++ double d138 = f1 (); ++ double d139 = f1 (); ++ double d140 = f1 (); ++ double d141 = f1 (); ++ double d142 = f1 (); ++ double d143 = f1 (); ++ double d144 = f1 (); ++ double d145 = f1 (); ++ double d146 = f1 (); ++ double d147 = f1 (); ++ double d148 = f1 (); ++ double d149 = f1 (); ++ double d150 = f1 (); ++ double d151 = f1 (); ++ double d152 = f1 (); ++ double d153 = f1 (); ++ double d154 = f1 (); ++ double d155 = f1 (); ++ double d156 = f1 (); ++ double d157 = f1 (); ++ double d158 = f1 (); ++ double d159 = f1 (); ++ double d160 = f1 (); ++ double d161 = f1 (); ++ double d162 = f1 (); ++ double d163 = f1 (); ++ double d164 = f1 (); ++ double d165 = f1 (); ++ double d166 = f1 (); ++ double d167 = f1 (); ++ double d168 = f1 (); ++ double d169 = f1 (); ++ double d170 = f1 (); ++ double d171 = f1 (); ++ double d172 = f1 (); ++ double d173 = f1 (); ++ double d174 = f1 (); ++ double d175 = f1 (); ++ double d176 = f1 (); ++ double d177 = f1 (); ++ double d178 = f1 (); ++ double d179 = f1 (); ++ double d180 = f1 (); ++ double d181 = f1 (); ++ double d182 = f1 (); ++ double d183 = f1 (); ++ double d184 = f1 (); ++ double d185 = f1 (); ++ double d186 = f1 (); ++ double d187 = f1 (); ++ double d188 = f1 (); ++ double d189 = f1 (); ++ double d190 = f1 (); ++ double d191 = f1 (); ++ double d192 = f1 (); ++ double d193 = f1 (); ++ double d194 = f1 (); ++ double d195 = f1 (); ++ double d196 = f1 (); ++ double d197 = f1 (); ++ double d198 = f1 (); ++ double d199 = f1 (); ++ double d200 = f1 (); ++ double d201 = f1 (); ++ double d202 = f1 (); ++ double d203 = f1 (); ++ double d204 = f1 (); ++ double d205 = f1 (); ++ double d206 = f1 (); ++ double d207 = f1 (); ++ double d208 = f1 (); ++ double d209 = f1 (); ++ double d210 = f1 (); ++ double d211 = f1 (); ++ double d212 = f1 (); ++ double d213 = f1 (); ++ double d214 = f1 (); ++ double d215 = f1 (); ++ double d216 = f1 (); ++ double d217 = f1 (); ++ double d218 = f1 (); ++ double d219 = f1 (); ++ double d220 = f1 (); ++ double d221 = f1 (); ++ double d222 = f1 (); ++ double d223 = f1 (); ++ double d224 = f1 (); ++ double d225 = f1 (); ++ double d226 = f1 (); ++ double d227 = f1 (); ++ double d228 = f1 (); ++ double d229 = f1 (); ++ double d230 = f1 (); ++ double d231 = f1 (); ++ double d232 = f1 (); ++ double d233 = f1 (); ++ double d234 = f1 (); ++ double d235 = f1 (); ++ double d236 = f1 (); ++ double d237 = f1 (); ++ double d238 = f1 (); ++ double d239 = f1 (); ++ double d240 = f1 (); ++ double d241 = f1 (); ++ double d242 = f1 (); ++ double d243 = f1 (); ++ double d244 = f1 (); ++ double d245 = f1 (); ++ double d246 = f1 (); ++ double d247 = f1 (); ++ double d248 = f1 (); ++ double d249 = f1 (); ++ double d250 = f1 (); ++ double d251 = f1 (); ++ double d252 = f1 (); ++ double d253 = f1 (); ++ double d254 = f1 (); ++ double d255 = f1 (); ++ double d256 = f1 (); ++ double d257 = f1 (); ++ double d258 = f1 (); ++ double d259 = f1 (); ++ double d260 = f1 (); ++ double d261 = f1 (); ++ double d262 = f1 (); ++ double d263 = f1 (); ++ double d264 = f1 (); ++ double d265 = f1 (); ++ double d266 = f1 (); ++ double d267 = f1 (); ++ double d268 = f1 (); ++ double d269 = f1 (); ++ double d270 = f1 (); ++ double d271 = f1 (); ++ double d272 = f1 (); ++ double d273 = f1 (); ++ double d274 = f1 (); ++ double d275 = f1 (); ++ double d276 = f1 (); ++ double d277 = f1 (); ++ double d278 = f1 (); ++ double d279 = f1 (); ++ double d280 = f1 (); ++ double d281 = f1 (); ++ double d282 = f1 (); ++ double d283 = f1 (); ++ double d284 = f1 (); ++ double d285 = f1 (); ++ double d286 = f1 (); ++ double d287 = f1 (); ++ double d288 = f1 (); ++ double d289 = f1 (); ++ double d290 = f1 (); ++ double d291 = f1 (); ++ double d292 = f1 (); ++ double d293 = f1 (); ++ double d294 = f1 (); ++ double d295 = f1 (); ++ double d296 = f1 (); ++ double d297 = f1 (); ++ double d298 = f1 (); ++ double d299 = f1 (); ++ double d300 = f1 (); ++ double d301 = f1 (); ++ double d302 = f1 (); ++ double d303 = f1 (); ++ double d304 = f1 (); ++ double d305 = f1 (); ++ double d306 = f1 (); ++ double d307 = f1 (); ++ double d308 = f1 (); ++ double d309 = f1 (); ++ double d310 = f1 (); ++ double d311 = f1 (); ++ double d312 = f1 (); ++ double d313 = f1 (); ++ double d314 = f1 (); ++ double d315 = f1 (); ++ double d316 = f1 (); ++ double d317 = f1 (); ++ double d318 = f1 (); ++ double d319 = f1 (); ++ double d320 = f1 (); ++ double d321 = f1 (); ++ double d322 = f1 (); ++ double d323 = f1 (); ++ double d324 = f1 (); ++ double d325 = f1 (); ++ double d326 = f1 (); ++ double d327 = f1 (); ++ double d328 = f1 (); ++ double d329 = f1 (); ++ double d330 = f1 (); ++ double d331 = f1 (); ++ double d332 = f1 (); ++ double d333 = f1 (); ++ double d334 = f1 (); ++ double d335 = f1 (); ++ double d336 = f1 (); ++ double d337 = f1 (); ++ double d338 = f1 (); ++ double d339 = f1 (); ++ double d340 = f1 (); ++ double d341 = f1 (); ++ double d342 = f1 (); ++ double d343 = f1 (); ++ double d344 = f1 (); ++ double d345 = f1 (); ++ double d346 = f1 (); ++ double d347 = f1 (); ++ double d348 = f1 (); ++ double d349 = f1 (); ++ double d350 = f1 (); ++ double d351 = f1 (); ++ double d352 = f1 (); ++ double d353 = f1 (); ++ double d354 = f1 (); ++ double d355 = f1 (); ++ double d356 = f1 (); ++ double d357 = f1 (); ++ double d358 = f1 (); ++ double d359 = f1 (); ++ double d360 = f1 (); ++ double d361 = f1 (); ++ double d362 = f1 (); ++ double d363 = f1 (); ++ double d364 = f1 (); ++ double d365 = f1 (); ++ double d366 = f1 (); ++ double d367 = f1 (); ++ double d368 = f1 (); ++ double d369 = f1 (); ++ double d370 = f1 (); ++ double d371 = f1 (); ++ double d372 = f1 (); ++ double d373 = f1 (); ++ double d374 = f1 (); ++ double d375 = f1 (); ++ double d376 = f1 (); ++ double d377 = f1 (); ++ double d378 = f1 (); ++ double d379 = f1 (); ++ double d380 = f1 (); ++ double d381 = f1 (); ++ double d382 = f1 (); ++ double d383 = f1 (); ++ double d384 = f1 (); ++ double d385 = f1 (); ++ double d386 = f1 (); ++ double d387 = f1 (); ++ double d388 = f1 (); ++ double d389 = f1 (); ++ double d390 = f1 (); ++ double d391 = f1 (); ++ double d392 = f1 (); ++ double d393 = f1 (); ++ double d394 = f1 (); ++ double d395 = f1 (); ++ double d396 = f1 (); ++ double d397 = f1 (); ++ double d398 = f1 (); ++ double d399 = f1 (); ++ double d400 = f1 (); ++ double d401 = f1 (); ++ double d402 = f1 (); ++ double d403 = f1 (); ++ double d404 = f1 (); ++ double d405 = f1 (); ++ double d406 = f1 (); ++ double d407 = f1 (); ++ double d408 = f1 (); ++ double d409 = f1 (); ++ double d410 = f1 (); ++ double d411 = f1 (); ++ double d412 = f1 (); ++ double d413 = f1 (); ++ double d414 = f1 (); ++ double d415 = f1 (); ++ double d416 = f1 (); ++ double d417 = f1 (); ++ double d418 = f1 (); ++ double d419 = f1 (); ++ double d420 = f1 (); ++ double d421 = f1 (); ++ double d422 = f1 (); ++ double d423 = f1 (); ++ double d424 = f1 (); ++ double d425 = f1 (); ++ double d426 = f1 (); ++ double d427 = f1 (); ++ double d428 = f1 (); ++ double d429 = f1 (); ++ double d430 = f1 (); ++ double d431 = f1 (); ++ double d432 = f1 (); ++ double d433 = f1 (); ++ double d434 = f1 (); ++ double d435 = f1 (); ++ double d436 = f1 (); ++ double d437 = f1 (); ++ double d438 = f1 (); ++ double d439 = f1 (); ++ double d440 = f1 (); ++ double d441 = f1 (); ++ double d442 = f1 (); ++ double d443 = f1 (); ++ double d444 = f1 (); ++ double d445 = f1 (); ++ double d446 = f1 (); ++ double d447 = f1 (); ++ double d448 = f1 (); ++ double d449 = f1 (); ++ double d450 = f1 (); ++ double d451 = f1 (); ++ double d452 = f1 (); ++ double d453 = f1 (); ++ double d454 = f1 (); ++ double d455 = f1 (); ++ double d456 = f1 (); ++ double d457 = f1 (); ++ double d458 = f1 (); ++ double d459 = f1 (); ++ double d460 = f1 (); ++ double d461 = f1 (); ++ double d462 = f1 (); ++ double d463 = f1 (); ++ double d464 = f1 (); ++ double d465 = f1 (); ++ double d466 = f1 (); ++ double d467 = f1 (); ++ double d468 = f1 (); ++ double d469 = f1 (); ++ double d470 = f1 (); ++ double d471 = f1 (); ++ double d472 = f1 (); ++ double d473 = f1 (); ++ double d474 = f1 (); ++ double d475 = f1 (); ++ double d476 = f1 (); ++ double d477 = f1 (); ++ double d478 = f1 (); ++ double d479 = f1 (); ++ double d480 = f1 (); ++ double d481 = f1 (); ++ double d482 = f1 (); ++ double d483 = f1 (); ++ double d484 = f1 (); ++ double d485 = f1 (); ++ double d486 = f1 (); ++ double d487 = f1 (); ++ double d488 = f1 (); ++ double d489 = f1 (); ++ double d490 = f1 (); ++ double d491 = f1 (); ++ double d492 = f1 (); ++ double d493 = f1 (); ++ double d494 = f1 (); ++ double d495 = f1 (); ++ double d496 = f1 (); ++ double d497 = f1 (); ++ double d498 = f1 (); ++ double d499 = f1 (); ++ double d500 = f1 (); ++ double d501 = f1 (); ++ double d502 = f1 (); ++ double d503 = f1 (); ++ double d504 = f1 (); ++ double d505 = f1 (); ++ double d506 = f1 (); ++ double d507 = f1 (); ++ double d508 = f1 (); ++ double d509 = f1 (); ++ double d510 = f1 (); ++ double d511 = f1 (); ++ double d512 = f1 (); ++ double d513 = f1 (); ++ double d514 = f1 (); ++ double d515 = f1 (); ++ double d516 = f1 (); ++ double d517 = f1 (); ++ double d518 = f1 (); ++ double d519 = f1 (); ++ double d520 = f1 (); ++ double d521 = f1 (); ++ double d522 = f1 (); ++ double d523 = f1 (); ++ double d524 = f1 (); ++ double d525 = f1 (); ++ double d526 = f1 (); ++ double d527 = f1 (); ++ double d528 = f1 (); ++ double d529 = f1 (); ++ double d530 = f1 (); ++ double d531 = f1 (); ++ double d532 = f1 (); ++ double d533 = f1 (); ++ double d534 = f1 (); ++ double d535 = f1 (); ++ double d536 = f1 (); ++ double d537 = f1 (); ++ double d538 = f1 (); ++ double d539 = f1 (); ++ double d540 = f1 (); ++ double d541 = f1 (); ++ double d542 = f1 (); ++ double d543 = f1 (); ++ double d544 = f1 (); ++ double d545 = f1 (); ++ double d546 = f1 (); ++ double d547 = f1 (); ++ double d548 = f1 (); ++ double d549 = f1 (); ++ double d550 = f1 (); ++ double d551 = f1 (); ++ double d552 = f1 (); ++ double d553 = f1 (); ++ double d554 = f1 (); ++ double d555 = f1 (); ++ double d556 = f1 (); ++ double d557 = f1 (); ++ double d558 = f1 (); ++ double d559 = f1 (); ++ double d560 = f1 (); ++ double d561 = f1 (); ++ double d562 = f1 (); ++ double d563 = f1 (); ++ double d564 = f1 (); ++ double d565 = f1 (); ++ double d566 = f1 (); ++ double d567 = f1 (); ++ double d568 = f1 (); ++ double d569 = f1 (); ++ double d570 = f1 (); ++ double d571 = f1 (); ++ double d572 = f1 (); ++ double d573 = f1 (); ++ double d574 = f1 (); ++ double d575 = f1 (); ++ double d576 = f1 (); ++ double d577 = f1 (); ++ double d578 = f1 (); ++ double d579 = f1 (); ++ double d580 = f1 (); ++ double d581 = f1 (); ++ double d582 = f1 (); ++ double d583 = f1 (); ++ double d584 = f1 (); ++ double d585 = f1 (); ++ double d586 = f1 (); ++ double d587 = f1 (); ++ double d588 = f1 (); ++ double d589 = f1 (); ++ double d590 = f1 (); ++ double d591 = f1 (); ++ double d592 = f1 (); ++ double d593 = f1 (); ++ double d594 = f1 (); ++ double d595 = f1 (); ++ double d596 = f1 (); ++ double d597 = f1 (); ++ double d598 = f1 (); ++ double d599 = f1 (); ++ double d600 = f1 (); ++ double d601 = f1 (); ++ double d602 = f1 (); ++ double d603 = f1 (); ++ double d604 = f1 (); ++ double d605 = f1 (); ++ double d606 = f1 (); ++ double d607 = f1 (); ++ double d608 = f1 (); ++ double d609 = f1 (); ++ double d610 = f1 (); ++ double d611 = f1 (); ++ double d612 = f1 (); ++ double d613 = f1 (); ++ double d614 = f1 (); ++ double d615 = f1 (); ++ double d616 = f1 (); ++ double d617 = f1 (); ++ double d618 = f1 (); ++ double d619 = f1 (); ++ double d620 = f1 (); ++ double d621 = f1 (); ++ double d622 = f1 (); ++ double d623 = f1 (); ++ double d624 = f1 (); ++ double d625 = f1 (); ++ double d626 = f1 (); ++ double d627 = f1 (); ++ double d628 = f1 (); ++ double d629 = f1 (); ++ double d630 = f1 (); ++ double d631 = f1 (); ++ double d632 = f1 (); ++ double d633 = f1 (); ++ double d634 = f1 (); ++ double d635 = f1 (); ++ double d636 = f1 (); ++ double d637 = f1 (); ++ double d638 = f1 (); ++ double d639 = f1 (); ++ double d640 = f1 (); ++ double d641 = f1 (); ++ double d642 = f1 (); ++ double d643 = f1 (); ++ double d644 = f1 (); ++ double d645 = f1 (); ++ double d646 = f1 (); ++ double d647 = f1 (); ++ double d648 = f1 (); ++ double d649 = f1 (); ++ double d650 = f1 (); ++ double d651 = f1 (); ++ double d652 = f1 (); ++ double d653 = f1 (); ++ double d654 = f1 (); ++ double d655 = f1 (); ++ double d656 = f1 (); ++ double d657 = f1 (); ++ double d658 = f1 (); ++ double d659 = f1 (); ++ double d660 = f1 (); ++ double d661 = f1 (); ++ double d662 = f1 (); ++ double d663 = f1 (); ++ double d664 = f1 (); ++ double d665 = f1 (); ++ double d666 = f1 (); ++ double d667 = f1 (); ++ double d668 = f1 (); ++ double d669 = f1 (); ++ double d670 = f1 (); ++ double d671 = f1 (); ++ double d672 = f1 (); ++ double d673 = f1 (); ++ double d674 = f1 (); ++ double d675 = f1 (); ++ double d676 = f1 (); ++ double d677 = f1 (); ++ double d678 = f1 (); ++ double d679 = f1 (); ++ double d680 = f1 (); ++ double d681 = f1 (); ++ double d682 = f1 (); ++ double d683 = f1 (); ++ double d684 = f1 (); ++ double d685 = f1 (); ++ double d686 = f1 (); ++ double d687 = f1 (); ++ double d688 = f1 (); ++ double d689 = f1 (); ++ double d690 = f1 (); ++ double d691 = f1 (); ++ double d692 = f1 (); ++ double d693 = f1 (); ++ double d694 = f1 (); ++ double d695 = f1 (); ++ double d696 = f1 (); ++ double d697 = f1 (); ++ double d698 = f1 (); ++ double d699 = f1 (); ++ double d700 = f1 (); ++ double d701 = f1 (); ++ double d702 = f1 (); ++ double d703 = f1 (); ++ double d704 = f1 (); ++ double d705 = f1 (); ++ double d706 = f1 (); ++ double d707 = f1 (); ++ double d708 = f1 (); ++ double d709 = f1 (); ++ double d710 = f1 (); ++ double d711 = f1 (); ++ double d712 = f1 (); ++ double d713 = f1 (); ++ double d714 = f1 (); ++ double d715 = f1 (); ++ double d716 = f1 (); ++ double d717 = f1 (); ++ double d718 = f1 (); ++ double d719 = f1 (); ++ double d720 = f1 (); ++ double d721 = f1 (); ++ double d722 = f1 (); ++ double d723 = f1 (); ++ double d724 = f1 (); ++ double d725 = f1 (); ++ double d726 = f1 (); ++ double d727 = f1 (); ++ double d728 = f1 (); ++ double d729 = f1 (); ++ double d730 = f1 (); ++ double d731 = f1 (); ++ double d732 = f1 (); ++ double d733 = f1 (); ++ double d734 = f1 (); ++ double d735 = f1 (); ++ double d736 = f1 (); ++ double d737 = f1 (); ++ double d738 = f1 (); ++ double d739 = f1 (); ++ double d740 = f1 (); ++ double d741 = f1 (); ++ double d742 = f1 (); ++ double d743 = f1 (); ++ double d744 = f1 (); ++ double d745 = f1 (); ++ double d746 = f1 (); ++ double d747 = f1 (); ++ double d748 = f1 (); ++ double d749 = f1 (); ++ double d750 = f1 (); ++ double d751 = f1 (); ++ double d752 = f1 (); ++ double d753 = f1 (); ++ double d754 = f1 (); ++ double d755 = f1 (); ++ double d756 = f1 (); ++ double d757 = f1 (); ++ double d758 = f1 (); ++ double d759 = f1 (); ++ double d760 = f1 (); ++ double d761 = f1 (); ++ double d762 = f1 (); ++ double d763 = f1 (); ++ double d764 = f1 (); ++ double d765 = f1 (); ++ double d766 = f1 (); ++ double d767 = f1 (); ++ double d768 = f1 (); ++ double d769 = f1 (); ++ double d770 = f1 (); ++ double d771 = f1 (); ++ double d772 = f1 (); ++ double d773 = f1 (); ++ double d774 = f1 (); ++ double d775 = f1 (); ++ double d776 = f1 (); ++ double d777 = f1 (); ++ double d778 = f1 (); ++ double d779 = f1 (); ++ double d780 = f1 (); ++ double d781 = f1 (); ++ double d782 = f1 (); ++ double d783 = f1 (); ++ double d784 = f1 (); ++ double d785 = f1 (); ++ double d786 = f1 (); ++ double d787 = f1 (); ++ double d788 = f1 (); ++ double d789 = f1 (); ++ double d790 = f1 (); ++ double d791 = f1 (); ++ double d792 = f1 (); ++ double d793 = f1 (); ++ double d794 = f1 (); ++ double d795 = f1 (); ++ double d796 = f1 (); ++ double d797 = f1 (); ++ double d798 = f1 (); ++ double d799 = f1 (); ++ double d800 = f1 (); ++ double d801 = f1 (); ++ double d802 = f1 (); ++ double d803 = f1 (); ++ double d804 = f1 (); ++ double d805 = f1 (); ++ double d806 = f1 (); ++ double d807 = f1 (); ++ double d808 = f1 (); ++ double d809 = f1 (); ++ double d810 = f1 (); ++ double d811 = f1 (); ++ double d812 = f1 (); ++ double d813 = f1 (); ++ double d814 = f1 (); ++ double d815 = f1 (); ++ double d816 = f1 (); ++ double d817 = f1 (); ++ double d818 = f1 (); ++ double d819 = f1 (); ++ double d820 = f1 (); ++ double d821 = f1 (); ++ double d822 = f1 (); ++ double d823 = f1 (); ++ double d824 = f1 (); ++ double d825 = f1 (); ++ double d826 = f1 (); ++ double d827 = f1 (); ++ double d828 = f1 (); ++ double d829 = f1 (); ++ double d830 = f1 (); ++ double d831 = f1 (); ++ double d832 = f1 (); ++ double d833 = f1 (); ++ double d834 = f1 (); ++ double d835 = f1 (); ++ double d836 = f1 (); ++ double d837 = f1 (); ++ double d838 = f1 (); ++ double d839 = f1 (); ++ double d840 = f1 (); ++ double d841 = f1 (); ++ double d842 = f1 (); ++ double d843 = f1 (); ++ double d844 = f1 (); ++ double d845 = f1 (); ++ double d846 = f1 (); ++ double d847 = f1 (); ++ double d848 = f1 (); ++ double d849 = f1 (); ++ double d850 = f1 (); ++ double d851 = f1 (); ++ double d852 = f1 (); ++ double d853 = f1 (); ++ double d854 = f1 (); ++ double d855 = f1 (); ++ double d856 = f1 (); ++ double d857 = f1 (); ++ double d858 = f1 (); ++ double d859 = f1 (); ++ double d860 = f1 (); ++ double d861 = f1 (); ++ double d862 = f1 (); ++ double d863 = f1 (); ++ double d864 = f1 (); ++ double d865 = f1 (); ++ double d866 = f1 (); ++ double d867 = f1 (); ++ double d868 = f1 (); ++ double d869 = f1 (); ++ double d870 = f1 (); ++ double d871 = f1 (); ++ double d872 = f1 (); ++ double d873 = f1 (); ++ double d874 = f1 (); ++ double d875 = f1 (); ++ double d876 = f1 (); ++ double d877 = f1 (); ++ double d878 = f1 (); ++ double d879 = f1 (); ++ double d880 = f1 (); ++ double d881 = f1 (); ++ double d882 = f1 (); ++ double d883 = f1 (); ++ double d884 = f1 (); ++ double d885 = f1 (); ++ double d886 = f1 (); ++ double d887 = f1 (); ++ double d888 = f1 (); ++ double d889 = f1 (); ++ double d890 = f1 (); ++ double d891 = f1 (); ++ double d892 = f1 (); ++ double d893 = f1 (); ++ double d894 = f1 (); ++ double d895 = f1 (); ++ double d896 = f1 (); ++ double d897 = f1 (); ++ double d898 = f1 (); ++ double d899 = f1 (); ++ double d900 = f1 (); ++ double d901 = f1 (); ++ double d902 = f1 (); ++ double d903 = f1 (); ++ double d904 = f1 (); ++ double d905 = f1 (); ++ double d906 = f1 (); ++ double d907 = f1 (); ++ double d908 = f1 (); ++ double d909 = f1 (); ++ double d910 = f1 (); ++ double d911 = f1 (); ++ double d912 = f1 (); ++ double d913 = f1 (); ++ double d914 = f1 (); ++ double d915 = f1 (); ++ double d916 = f1 (); ++ double d917 = f1 (); ++ double d918 = f1 (); ++ double d919 = f1 (); ++ double d920 = f1 (); ++ double d921 = f1 (); ++ double d922 = f1 (); ++ double d923 = f1 (); ++ double d924 = f1 (); ++ double d925 = f1 (); ++ double d926 = f1 (); ++ double d927 = f1 (); ++ double d928 = f1 (); ++ double d929 = f1 (); ++ double d930 = f1 (); ++ double d931 = f1 (); ++ double d932 = f1 (); ++ double d933 = f1 (); ++ double d934 = f1 (); ++ double d935 = f1 (); ++ double d936 = f1 (); ++ double d937 = f1 (); ++ double d938 = f1 (); ++ double d939 = f1 (); ++ double d940 = f1 (); ++ double d941 = f1 (); ++ double d942 = f1 (); ++ double d943 = f1 (); ++ double d944 = f1 (); ++ double d945 = f1 (); ++ double d946 = f1 (); ++ double d947 = f1 (); ++ double d948 = f1 (); ++ double d949 = f1 (); ++ double d950 = f1 (); ++ double d951 = f1 (); ++ double d952 = f1 (); ++ double d953 = f1 (); ++ double d954 = f1 (); ++ double d955 = f1 (); ++ double d956 = f1 (); ++ double d957 = f1 (); ++ double d958 = f1 (); ++ double d959 = f1 (); ++ double d960 = f1 (); ++ double d961 = f1 (); ++ double d962 = f1 (); ++ double d963 = f1 (); ++ double d964 = f1 (); ++ double d965 = f1 (); ++ double d966 = f1 (); ++ double d967 = f1 (); ++ double d968 = f1 (); ++ double d969 = f1 (); ++ double d970 = f1 (); ++ double d971 = f1 (); ++ double d972 = f1 (); ++ double d973 = f1 (); ++ double d974 = f1 (); ++ double d975 = f1 (); ++ double d976 = f1 (); ++ double d977 = f1 (); ++ double d978 = f1 (); ++ double d979 = f1 (); ++ double d980 = f1 (); ++ double d981 = f1 (); ++ double d982 = f1 (); ++ double d983 = f1 (); ++ double d984 = f1 (); ++ double d985 = f1 (); ++ double d986 = f1 (); ++ double d987 = f1 (); ++ double d988 = f1 (); ++ double d989 = f1 (); ++ double d990 = f1 (); ++ double d991 = f1 (); ++ double d992 = f1 (); ++ double d993 = f1 (); ++ double d994 = f1 (); ++ double d995 = f1 (); ++ double d996 = f1 (); ++ double d997 = f1 (); ++ double d998 = f1 (); ++ double d999 = f1 (); ++ ++ double x = 0; ++ x = f2 (x, d000); ++ x = f2 (x, d001); ++ x = f2 (x, d002); ++ x = f2 (x, d003); ++ x = f2 (x, d004); ++ x = f2 (x, d005); ++ x = f2 (x, d006); ++ x = f2 (x, d007); ++ x = f2 (x, d008); ++ x = f2 (x, d009); ++ x = f2 (x, d010); ++ x = f2 (x, d011); ++ x = f2 (x, d012); ++ x = f2 (x, d013); ++ x = f2 (x, d014); ++ x = f2 (x, d015); ++ x = f2 (x, d016); ++ x = f2 (x, d017); ++ x = f2 (x, d018); ++ x = f2 (x, d019); ++ x = f2 (x, d020); ++ x = f2 (x, d021); ++ x = f2 (x, d022); ++ x = f2 (x, d023); ++ x = f2 (x, d024); ++ x = f2 (x, d025); ++ x = f2 (x, d026); ++ x = f2 (x, d027); ++ x = f2 (x, d028); ++ x = f2 (x, d029); ++ x = f2 (x, d030); ++ x = f2 (x, d031); ++ x = f2 (x, d032); ++ x = f2 (x, d033); ++ x = f2 (x, d034); ++ x = f2 (x, d035); ++ x = f2 (x, d036); ++ x = f2 (x, d037); ++ x = f2 (x, d038); ++ x = f2 (x, d039); ++ x = f2 (x, d040); ++ x = f2 (x, d041); ++ x = f2 (x, d042); ++ x = f2 (x, d043); ++ x = f2 (x, d044); ++ x = f2 (x, d045); ++ x = f2 (x, d046); ++ x = f2 (x, d047); ++ x = f2 (x, d048); ++ x = f2 (x, d049); ++ x = f2 (x, d050); ++ x = f2 (x, d051); ++ x = f2 (x, d052); ++ x = f2 (x, d053); ++ x = f2 (x, d054); ++ x = f2 (x, d055); ++ x = f2 (x, d056); ++ x = f2 (x, d057); ++ x = f2 (x, d058); ++ x = f2 (x, d059); ++ x = f2 (x, d060); ++ x = f2 (x, d061); ++ x = f2 (x, d062); ++ x = f2 (x, d063); ++ x = f2 (x, d064); ++ x = f2 (x, d065); ++ x = f2 (x, d066); ++ x = f2 (x, d067); ++ x = f2 (x, d068); ++ x = f2 (x, d069); ++ x = f2 (x, d070); ++ x = f2 (x, d071); ++ x = f2 (x, d072); ++ x = f2 (x, d073); ++ x = f2 (x, d074); ++ x = f2 (x, d075); ++ x = f2 (x, d076); ++ x = f2 (x, d077); ++ x = f2 (x, d078); ++ x = f2 (x, d079); ++ x = f2 (x, d080); ++ x = f2 (x, d081); ++ x = f2 (x, d082); ++ x = f2 (x, d083); ++ x = f2 (x, d084); ++ x = f2 (x, d085); ++ x = f2 (x, d086); ++ x = f2 (x, d087); ++ x = f2 (x, d088); ++ x = f2 (x, d089); ++ x = f2 (x, d090); ++ x = f2 (x, d091); ++ x = f2 (x, d092); ++ x = f2 (x, d093); ++ x = f2 (x, d094); ++ x = f2 (x, d095); ++ x = f2 (x, d096); ++ x = f2 (x, d097); ++ x = f2 (x, d098); ++ x = f2 (x, d099); ++ x = f2 (x, d100); ++ x = f2 (x, d101); ++ x = f2 (x, d102); ++ x = f2 (x, d103); ++ x = f2 (x, d104); ++ x = f2 (x, d105); ++ x = f2 (x, d106); ++ x = f2 (x, d107); ++ x = f2 (x, d108); ++ x = f2 (x, d109); ++ x = f2 (x, d110); ++ x = f2 (x, d111); ++ x = f2 (x, d112); ++ x = f2 (x, d113); ++ x = f2 (x, d114); ++ x = f2 (x, d115); ++ x = f2 (x, d116); ++ x = f2 (x, d117); ++ x = f2 (x, d118); ++ x = f2 (x, d119); ++ x = f2 (x, d120); ++ x = f2 (x, d121); ++ x = f2 (x, d122); ++ x = f2 (x, d123); ++ x = f2 (x, d124); ++ x = f2 (x, d125); ++ x = f2 (x, d126); ++ x = f2 (x, d127); ++ x = f2 (x, d128); ++ x = f2 (x, d129); ++ x = f2 (x, d130); ++ x = f2 (x, d131); ++ x = f2 (x, d132); ++ x = f2 (x, d133); ++ x = f2 (x, d134); ++ x = f2 (x, d135); ++ x = f2 (x, d136); ++ x = f2 (x, d137); ++ x = f2 (x, d138); ++ x = f2 (x, d139); ++ x = f2 (x, d140); ++ x = f2 (x, d141); ++ x = f2 (x, d142); ++ x = f2 (x, d143); ++ x = f2 (x, d144); ++ x = f2 (x, d145); ++ x = f2 (x, d146); ++ x = f2 (x, d147); ++ x = f2 (x, d148); ++ x = f2 (x, d149); ++ x = f2 (x, d150); ++ x = f2 (x, d151); ++ x = f2 (x, d152); ++ x = f2 (x, d153); ++ x = f2 (x, d154); ++ x = f2 (x, d155); ++ x = f2 (x, d156); ++ x = f2 (x, d157); ++ x = f2 (x, d158); ++ x = f2 (x, d159); ++ x = f2 (x, d160); ++ x = f2 (x, d161); ++ x = f2 (x, d162); ++ x = f2 (x, d163); ++ x = f2 (x, d164); ++ x = f2 (x, d165); ++ x = f2 (x, d166); ++ x = f2 (x, d167); ++ x = f2 (x, d168); ++ x = f2 (x, d169); ++ x = f2 (x, d170); ++ x = f2 (x, d171); ++ x = f2 (x, d172); ++ x = f2 (x, d173); ++ x = f2 (x, d174); ++ x = f2 (x, d175); ++ x = f2 (x, d176); ++ x = f2 (x, d177); ++ x = f2 (x, d178); ++ x = f2 (x, d179); ++ x = f2 (x, d180); ++ x = f2 (x, d181); ++ x = f2 (x, d182); ++ x = f2 (x, d183); ++ x = f2 (x, d184); ++ x = f2 (x, d185); ++ x = f2 (x, d186); ++ x = f2 (x, d187); ++ x = f2 (x, d188); ++ x = f2 (x, d189); ++ x = f2 (x, d190); ++ x = f2 (x, d191); ++ x = f2 (x, d192); ++ x = f2 (x, d193); ++ x = f2 (x, d194); ++ x = f2 (x, d195); ++ x = f2 (x, d196); ++ x = f2 (x, d197); ++ x = f2 (x, d198); ++ x = f2 (x, d199); ++ x = f2 (x, d200); ++ x = f2 (x, d201); ++ x = f2 (x, d202); ++ x = f2 (x, d203); ++ x = f2 (x, d204); ++ x = f2 (x, d205); ++ x = f2 (x, d206); ++ x = f2 (x, d207); ++ x = f2 (x, d208); ++ x = f2 (x, d209); ++ x = f2 (x, d210); ++ x = f2 (x, d211); ++ x = f2 (x, d212); ++ x = f2 (x, d213); ++ x = f2 (x, d214); ++ x = f2 (x, d215); ++ x = f2 (x, d216); ++ x = f2 (x, d217); ++ x = f2 (x, d218); ++ x = f2 (x, d219); ++ x = f2 (x, d220); ++ x = f2 (x, d221); ++ x = f2 (x, d222); ++ x = f2 (x, d223); ++ x = f2 (x, d224); ++ x = f2 (x, d225); ++ x = f2 (x, d226); ++ x = f2 (x, d227); ++ x = f2 (x, d228); ++ x = f2 (x, d229); ++ x = f2 (x, d230); ++ x = f2 (x, d231); ++ x = f2 (x, d232); ++ x = f2 (x, d233); ++ x = f2 (x, d234); ++ x = f2 (x, d235); ++ x = f2 (x, d236); ++ x = f2 (x, d237); ++ x = f2 (x, d238); ++ x = f2 (x, d239); ++ x = f2 (x, d240); ++ x = f2 (x, d241); ++ x = f2 (x, d242); ++ x = f2 (x, d243); ++ x = f2 (x, d244); ++ x = f2 (x, d245); ++ x = f2 (x, d246); ++ x = f2 (x, d247); ++ x = f2 (x, d248); ++ x = f2 (x, d249); ++ x = f2 (x, d250); ++ x = f2 (x, d251); ++ x = f2 (x, d252); ++ x = f2 (x, d253); ++ x = f2 (x, d254); ++ x = f2 (x, d255); ++ x = f2 (x, d256); ++ x = f2 (x, d257); ++ x = f2 (x, d258); ++ x = f2 (x, d259); ++ x = f2 (x, d260); ++ x = f2 (x, d261); ++ x = f2 (x, d262); ++ x = f2 (x, d263); ++ x = f2 (x, d264); ++ x = f2 (x, d265); ++ x = f2 (x, d266); ++ x = f2 (x, d267); ++ x = f2 (x, d268); ++ x = f2 (x, d269); ++ x = f2 (x, d270); ++ x = f2 (x, d271); ++ x = f2 (x, d272); ++ x = f2 (x, d273); ++ x = f2 (x, d274); ++ x = f2 (x, d275); ++ x = f2 (x, d276); ++ x = f2 (x, d277); ++ x = f2 (x, d278); ++ x = f2 (x, d279); ++ x = f2 (x, d280); ++ x = f2 (x, d281); ++ x = f2 (x, d282); ++ x = f2 (x, d283); ++ x = f2 (x, d284); ++ x = f2 (x, d285); ++ x = f2 (x, d286); ++ x = f2 (x, d287); ++ x = f2 (x, d288); ++ x = f2 (x, d289); ++ x = f2 (x, d290); ++ x = f2 (x, d291); ++ x = f2 (x, d292); ++ x = f2 (x, d293); ++ x = f2 (x, d294); ++ x = f2 (x, d295); ++ x = f2 (x, d296); ++ x = f2 (x, d297); ++ x = f2 (x, d298); ++ x = f2 (x, d299); ++ x = f2 (x, d300); ++ x = f2 (x, d301); ++ x = f2 (x, d302); ++ x = f2 (x, d303); ++ x = f2 (x, d304); ++ x = f2 (x, d305); ++ x = f2 (x, d306); ++ x = f2 (x, d307); ++ x = f2 (x, d308); ++ x = f2 (x, d309); ++ x = f2 (x, d310); ++ x = f2 (x, d311); ++ x = f2 (x, d312); ++ x = f2 (x, d313); ++ x = f2 (x, d314); ++ x = f2 (x, d315); ++ x = f2 (x, d316); ++ x = f2 (x, d317); ++ x = f2 (x, d318); ++ x = f2 (x, d319); ++ x = f2 (x, d320); ++ x = f2 (x, d321); ++ x = f2 (x, d322); ++ x = f2 (x, d323); ++ x = f2 (x, d324); ++ x = f2 (x, d325); ++ x = f2 (x, d326); ++ x = f2 (x, d327); ++ x = f2 (x, d328); ++ x = f2 (x, d329); ++ x = f2 (x, d330); ++ x = f2 (x, d331); ++ x = f2 (x, d332); ++ x = f2 (x, d333); ++ x = f2 (x, d334); ++ x = f2 (x, d335); ++ x = f2 (x, d336); ++ x = f2 (x, d337); ++ x = f2 (x, d338); ++ x = f2 (x, d339); ++ x = f2 (x, d340); ++ x = f2 (x, d341); ++ x = f2 (x, d342); ++ x = f2 (x, d343); ++ x = f2 (x, d344); ++ x = f2 (x, d345); ++ x = f2 (x, d346); ++ x = f2 (x, d347); ++ x = f2 (x, d348); ++ x = f2 (x, d349); ++ x = f2 (x, d350); ++ x = f2 (x, d351); ++ x = f2 (x, d352); ++ x = f2 (x, d353); ++ x = f2 (x, d354); ++ x = f2 (x, d355); ++ x = f2 (x, d356); ++ x = f2 (x, d357); ++ x = f2 (x, d358); ++ x = f2 (x, d359); ++ x = f2 (x, d360); ++ x = f2 (x, d361); ++ x = f2 (x, d362); ++ x = f2 (x, d363); ++ x = f2 (x, d364); ++ x = f2 (x, d365); ++ x = f2 (x, d366); ++ x = f2 (x, d367); ++ x = f2 (x, d368); ++ x = f2 (x, d369); ++ x = f2 (x, d370); ++ x = f2 (x, d371); ++ x = f2 (x, d372); ++ x = f2 (x, d373); ++ x = f2 (x, d374); ++ x = f2 (x, d375); ++ x = f2 (x, d376); ++ x = f2 (x, d377); ++ x = f2 (x, d378); ++ x = f2 (x, d379); ++ x = f2 (x, d380); ++ x = f2 (x, d381); ++ x = f2 (x, d382); ++ x = f2 (x, d383); ++ x = f2 (x, d384); ++ x = f2 (x, d385); ++ x = f2 (x, d386); ++ x = f2 (x, d387); ++ x = f2 (x, d388); ++ x = f2 (x, d389); ++ x = f2 (x, d390); ++ x = f2 (x, d391); ++ x = f2 (x, d392); ++ x = f2 (x, d393); ++ x = f2 (x, d394); ++ x = f2 (x, d395); ++ x = f2 (x, d396); ++ x = f2 (x, d397); ++ x = f2 (x, d398); ++ x = f2 (x, d399); ++ x = f2 (x, d400); ++ x = f2 (x, d401); ++ x = f2 (x, d402); ++ x = f2 (x, d403); ++ x = f2 (x, d404); ++ x = f2 (x, d405); ++ x = f2 (x, d406); ++ x = f2 (x, d407); ++ x = f2 (x, d408); ++ x = f2 (x, d409); ++ x = f2 (x, d410); ++ x = f2 (x, d411); ++ x = f2 (x, d412); ++ x = f2 (x, d413); ++ x = f2 (x, d414); ++ x = f2 (x, d415); ++ x = f2 (x, d416); ++ x = f2 (x, d417); ++ x = f2 (x, d418); ++ x = f2 (x, d419); ++ x = f2 (x, d420); ++ x = f2 (x, d421); ++ x = f2 (x, d422); ++ x = f2 (x, d423); ++ x = f2 (x, d424); ++ x = f2 (x, d425); ++ x = f2 (x, d426); ++ x = f2 (x, d427); ++ x = f2 (x, d428); ++ x = f2 (x, d429); ++ x = f2 (x, d430); ++ x = f2 (x, d431); ++ x = f2 (x, d432); ++ x = f2 (x, d433); ++ x = f2 (x, d434); ++ x = f2 (x, d435); ++ x = f2 (x, d436); ++ x = f2 (x, d437); ++ x = f2 (x, d438); ++ x = f2 (x, d439); ++ x = f2 (x, d440); ++ x = f2 (x, d441); ++ x = f2 (x, d442); ++ x = f2 (x, d443); ++ x = f2 (x, d444); ++ x = f2 (x, d445); ++ x = f2 (x, d446); ++ x = f2 (x, d447); ++ x = f2 (x, d448); ++ x = f2 (x, d449); ++ x = f2 (x, d450); ++ x = f2 (x, d451); ++ x = f2 (x, d452); ++ x = f2 (x, d453); ++ x = f2 (x, d454); ++ x = f2 (x, d455); ++ x = f2 (x, d456); ++ x = f2 (x, d457); ++ x = f2 (x, d458); ++ x = f2 (x, d459); ++ x = f2 (x, d460); ++ x = f2 (x, d461); ++ x = f2 (x, d462); ++ x = f2 (x, d463); ++ x = f2 (x, d464); ++ x = f2 (x, d465); ++ x = f2 (x, d466); ++ x = f2 (x, d467); ++ x = f2 (x, d468); ++ x = f2 (x, d469); ++ x = f2 (x, d470); ++ x = f2 (x, d471); ++ x = f2 (x, d472); ++ x = f2 (x, d473); ++ x = f2 (x, d474); ++ x = f2 (x, d475); ++ x = f2 (x, d476); ++ x = f2 (x, d477); ++ x = f2 (x, d478); ++ x = f2 (x, d479); ++ x = f2 (x, d480); ++ x = f2 (x, d481); ++ x = f2 (x, d482); ++ x = f2 (x, d483); ++ x = f2 (x, d484); ++ x = f2 (x, d485); ++ x = f2 (x, d486); ++ x = f2 (x, d487); ++ x = f2 (x, d488); ++ x = f2 (x, d489); ++ x = f2 (x, d490); ++ x = f2 (x, d491); ++ x = f2 (x, d492); ++ x = f2 (x, d493); ++ x = f2 (x, d494); ++ x = f2 (x, d495); ++ x = f2 (x, d496); ++ x = f2 (x, d497); ++ x = f2 (x, d498); ++ x = f2 (x, d499); ++ x = f2 (x, d500); ++ x = f2 (x, d501); ++ x = f2 (x, d502); ++ x = f2 (x, d503); ++ x = f2 (x, d504); ++ x = f2 (x, d505); ++ x = f2 (x, d506); ++ x = f2 (x, d507); ++ x = f2 (x, d508); ++ x = f2 (x, d509); ++ x = f2 (x, d510); ++ x = f2 (x, d511); ++ x = f2 (x, d512); ++ x = f2 (x, d513); ++ x = f2 (x, d514); ++ x = f2 (x, d515); ++ x = f2 (x, d516); ++ x = f2 (x, d517); ++ x = f2 (x, d518); ++ x = f2 (x, d519); ++ x = f2 (x, d520); ++ x = f2 (x, d521); ++ x = f2 (x, d522); ++ x = f2 (x, d523); ++ x = f2 (x, d524); ++ x = f2 (x, d525); ++ x = f2 (x, d526); ++ x = f2 (x, d527); ++ x = f2 (x, d528); ++ x = f2 (x, d529); ++ x = f2 (x, d530); ++ x = f2 (x, d531); ++ x = f2 (x, d532); ++ x = f2 (x, d533); ++ x = f2 (x, d534); ++ x = f2 (x, d535); ++ x = f2 (x, d536); ++ x = f2 (x, d537); ++ x = f2 (x, d538); ++ x = f2 (x, d539); ++ x = f2 (x, d540); ++ x = f2 (x, d541); ++ x = f2 (x, d542); ++ x = f2 (x, d543); ++ x = f2 (x, d544); ++ x = f2 (x, d545); ++ x = f2 (x, d546); ++ x = f2 (x, d547); ++ x = f2 (x, d548); ++ x = f2 (x, d549); ++ x = f2 (x, d550); ++ x = f2 (x, d551); ++ x = f2 (x, d552); ++ x = f2 (x, d553); ++ x = f2 (x, d554); ++ x = f2 (x, d555); ++ x = f2 (x, d556); ++ x = f2 (x, d557); ++ x = f2 (x, d558); ++ x = f2 (x, d559); ++ x = f2 (x, d560); ++ x = f2 (x, d561); ++ x = f2 (x, d562); ++ x = f2 (x, d563); ++ x = f2 (x, d564); ++ x = f2 (x, d565); ++ x = f2 (x, d566); ++ x = f2 (x, d567); ++ x = f2 (x, d568); ++ x = f2 (x, d569); ++ x = f2 (x, d570); ++ x = f2 (x, d571); ++ x = f2 (x, d572); ++ x = f2 (x, d573); ++ x = f2 (x, d574); ++ x = f2 (x, d575); ++ x = f2 (x, d576); ++ x = f2 (x, d577); ++ x = f2 (x, d578); ++ x = f2 (x, d579); ++ x = f2 (x, d580); ++ x = f2 (x, d581); ++ x = f2 (x, d582); ++ x = f2 (x, d583); ++ x = f2 (x, d584); ++ x = f2 (x, d585); ++ x = f2 (x, d586); ++ x = f2 (x, d587); ++ x = f2 (x, d588); ++ x = f2 (x, d589); ++ x = f2 (x, d590); ++ x = f2 (x, d591); ++ x = f2 (x, d592); ++ x = f2 (x, d593); ++ x = f2 (x, d594); ++ x = f2 (x, d595); ++ x = f2 (x, d596); ++ x = f2 (x, d597); ++ x = f2 (x, d598); ++ x = f2 (x, d599); ++ x = f2 (x, d600); ++ x = f2 (x, d601); ++ x = f2 (x, d602); ++ x = f2 (x, d603); ++ x = f2 (x, d604); ++ x = f2 (x, d605); ++ x = f2 (x, d606); ++ x = f2 (x, d607); ++ x = f2 (x, d608); ++ x = f2 (x, d609); ++ x = f2 (x, d610); ++ x = f2 (x, d611); ++ x = f2 (x, d612); ++ x = f2 (x, d613); ++ x = f2 (x, d614); ++ x = f2 (x, d615); ++ x = f2 (x, d616); ++ x = f2 (x, d617); ++ x = f2 (x, d618); ++ x = f2 (x, d619); ++ x = f2 (x, d620); ++ x = f2 (x, d621); ++ x = f2 (x, d622); ++ x = f2 (x, d623); ++ x = f2 (x, d624); ++ x = f2 (x, d625); ++ x = f2 (x, d626); ++ x = f2 (x, d627); ++ x = f2 (x, d628); ++ x = f2 (x, d629); ++ x = f2 (x, d630); ++ x = f2 (x, d631); ++ x = f2 (x, d632); ++ x = f2 (x, d633); ++ x = f2 (x, d634); ++ x = f2 (x, d635); ++ x = f2 (x, d636); ++ x = f2 (x, d637); ++ x = f2 (x, d638); ++ x = f2 (x, d639); ++ x = f2 (x, d640); ++ x = f2 (x, d641); ++ x = f2 (x, d642); ++ x = f2 (x, d643); ++ x = f2 (x, d644); ++ x = f2 (x, d645); ++ x = f2 (x, d646); ++ x = f2 (x, d647); ++ x = f2 (x, d648); ++ x = f2 (x, d649); ++ x = f2 (x, d650); ++ x = f2 (x, d651); ++ x = f2 (x, d652); ++ x = f2 (x, d653); ++ x = f2 (x, d654); ++ x = f2 (x, d655); ++ x = f2 (x, d656); ++ x = f2 (x, d657); ++ x = f2 (x, d658); ++ x = f2 (x, d659); ++ x = f2 (x, d660); ++ x = f2 (x, d661); ++ x = f2 (x, d662); ++ x = f2 (x, d663); ++ x = f2 (x, d664); ++ x = f2 (x, d665); ++ x = f2 (x, d666); ++ x = f2 (x, d667); ++ x = f2 (x, d668); ++ x = f2 (x, d669); ++ x = f2 (x, d670); ++ x = f2 (x, d671); ++ x = f2 (x, d672); ++ x = f2 (x, d673); ++ x = f2 (x, d674); ++ x = f2 (x, d675); ++ x = f2 (x, d676); ++ x = f2 (x, d677); ++ x = f2 (x, d678); ++ x = f2 (x, d679); ++ x = f2 (x, d680); ++ x = f2 (x, d681); ++ x = f2 (x, d682); ++ x = f2 (x, d683); ++ x = f2 (x, d684); ++ x = f2 (x, d685); ++ x = f2 (x, d686); ++ x = f2 (x, d687); ++ x = f2 (x, d688); ++ x = f2 (x, d689); ++ x = f2 (x, d690); ++ x = f2 (x, d691); ++ x = f2 (x, d692); ++ x = f2 (x, d693); ++ x = f2 (x, d694); ++ x = f2 (x, d695); ++ x = f2 (x, d696); ++ x = f2 (x, d697); ++ x = f2 (x, d698); ++ x = f2 (x, d699); ++ x = f2 (x, d700); ++ x = f2 (x, d701); ++ x = f2 (x, d702); ++ x = f2 (x, d703); ++ x = f2 (x, d704); ++ x = f2 (x, d705); ++ x = f2 (x, d706); ++ x = f2 (x, d707); ++ x = f2 (x, d708); ++ x = f2 (x, d709); ++ x = f2 (x, d710); ++ x = f2 (x, d711); ++ x = f2 (x, d712); ++ x = f2 (x, d713); ++ x = f2 (x, d714); ++ x = f2 (x, d715); ++ x = f2 (x, d716); ++ x = f2 (x, d717); ++ x = f2 (x, d718); ++ x = f2 (x, d719); ++ x = f2 (x, d720); ++ x = f2 (x, d721); ++ x = f2 (x, d722); ++ x = f2 (x, d723); ++ x = f2 (x, d724); ++ x = f2 (x, d725); ++ x = f2 (x, d726); ++ x = f2 (x, d727); ++ x = f2 (x, d728); ++ x = f2 (x, d729); ++ x = f2 (x, d730); ++ x = f2 (x, d731); ++ x = f2 (x, d732); ++ x = f2 (x, d733); ++ x = f2 (x, d734); ++ x = f2 (x, d735); ++ x = f2 (x, d736); ++ x = f2 (x, d737); ++ x = f2 (x, d738); ++ x = f2 (x, d739); ++ x = f2 (x, d740); ++ x = f2 (x, d741); ++ x = f2 (x, d742); ++ x = f2 (x, d743); ++ x = f2 (x, d744); ++ x = f2 (x, d745); ++ x = f2 (x, d746); ++ x = f2 (x, d747); ++ x = f2 (x, d748); ++ x = f2 (x, d749); ++ x = f2 (x, d750); ++ x = f2 (x, d751); ++ x = f2 (x, d752); ++ x = f2 (x, d753); ++ x = f2 (x, d754); ++ x = f2 (x, d755); ++ x = f2 (x, d756); ++ x = f2 (x, d757); ++ x = f2 (x, d758); ++ x = f2 (x, d759); ++ x = f2 (x, d760); ++ x = f2 (x, d761); ++ x = f2 (x, d762); ++ x = f2 (x, d763); ++ x = f2 (x, d764); ++ x = f2 (x, d765); ++ x = f2 (x, d766); ++ x = f2 (x, d767); ++ x = f2 (x, d768); ++ x = f2 (x, d769); ++ x = f2 (x, d770); ++ x = f2 (x, d771); ++ x = f2 (x, d772); ++ x = f2 (x, d773); ++ x = f2 (x, d774); ++ x = f2 (x, d775); ++ x = f2 (x, d776); ++ x = f2 (x, d777); ++ x = f2 (x, d778); ++ x = f2 (x, d779); ++ x = f2 (x, d780); ++ x = f2 (x, d781); ++ x = f2 (x, d782); ++ x = f2 (x, d783); ++ x = f2 (x, d784); ++ x = f2 (x, d785); ++ x = f2 (x, d786); ++ x = f2 (x, d787); ++ x = f2 (x, d788); ++ x = f2 (x, d789); ++ x = f2 (x, d790); ++ x = f2 (x, d791); ++ x = f2 (x, d792); ++ x = f2 (x, d793); ++ x = f2 (x, d794); ++ x = f2 (x, d795); ++ x = f2 (x, d796); ++ x = f2 (x, d797); ++ x = f2 (x, d798); ++ x = f2 (x, d799); ++ x = f2 (x, d800); ++ x = f2 (x, d801); ++ x = f2 (x, d802); ++ x = f2 (x, d803); ++ x = f2 (x, d804); ++ x = f2 (x, d805); ++ x = f2 (x, d806); ++ x = f2 (x, d807); ++ x = f2 (x, d808); ++ x = f2 (x, d809); ++ x = f2 (x, d810); ++ x = f2 (x, d811); ++ x = f2 (x, d812); ++ x = f2 (x, d813); ++ x = f2 (x, d814); ++ x = f2 (x, d815); ++ x = f2 (x, d816); ++ x = f2 (x, d817); ++ x = f2 (x, d818); ++ x = f2 (x, d819); ++ x = f2 (x, d820); ++ x = f2 (x, d821); ++ x = f2 (x, d822); ++ x = f2 (x, d823); ++ x = f2 (x, d824); ++ x = f2 (x, d825); ++ x = f2 (x, d826); ++ x = f2 (x, d827); ++ x = f2 (x, d828); ++ x = f2 (x, d829); ++ x = f2 (x, d830); ++ x = f2 (x, d831); ++ x = f2 (x, d832); ++ x = f2 (x, d833); ++ x = f2 (x, d834); ++ x = f2 (x, d835); ++ x = f2 (x, d836); ++ x = f2 (x, d837); ++ x = f2 (x, d838); ++ x = f2 (x, d839); ++ x = f2 (x, d840); ++ x = f2 (x, d841); ++ x = f2 (x, d842); ++ x = f2 (x, d843); ++ x = f2 (x, d844); ++ x = f2 (x, d845); ++ x = f2 (x, d846); ++ x = f2 (x, d847); ++ x = f2 (x, d848); ++ x = f2 (x, d849); ++ x = f2 (x, d850); ++ x = f2 (x, d851); ++ x = f2 (x, d852); ++ x = f2 (x, d853); ++ x = f2 (x, d854); ++ x = f2 (x, d855); ++ x = f2 (x, d856); ++ x = f2 (x, d857); ++ x = f2 (x, d858); ++ x = f2 (x, d859); ++ x = f2 (x, d860); ++ x = f2 (x, d861); ++ x = f2 (x, d862); ++ x = f2 (x, d863); ++ x = f2 (x, d864); ++ x = f2 (x, d865); ++ x = f2 (x, d866); ++ x = f2 (x, d867); ++ x = f2 (x, d868); ++ x = f2 (x, d869); ++ x = f2 (x, d870); ++ x = f2 (x, d871); ++ x = f2 (x, d872); ++ x = f2 (x, d873); ++ x = f2 (x, d874); ++ x = f2 (x, d875); ++ x = f2 (x, d876); ++ x = f2 (x, d877); ++ x = f2 (x, d878); ++ x = f2 (x, d879); ++ x = f2 (x, d880); ++ x = f2 (x, d881); ++ x = f2 (x, d882); ++ x = f2 (x, d883); ++ x = f2 (x, d884); ++ x = f2 (x, d885); ++ x = f2 (x, d886); ++ x = f2 (x, d887); ++ x = f2 (x, d888); ++ x = f2 (x, d889); ++ x = f2 (x, d890); ++ x = f2 (x, d891); ++ x = f2 (x, d892); ++ x = f2 (x, d893); ++ x = f2 (x, d894); ++ x = f2 (x, d895); ++ x = f2 (x, d896); ++ x = f2 (x, d897); ++ x = f2 (x, d898); ++ x = f2 (x, d899); ++ x = f2 (x, d900); ++ x = f2 (x, d901); ++ x = f2 (x, d902); ++ x = f2 (x, d903); ++ x = f2 (x, d904); ++ x = f2 (x, d905); ++ x = f2 (x, d906); ++ x = f2 (x, d907); ++ x = f2 (x, d908); ++ x = f2 (x, d909); ++ x = f2 (x, d910); ++ x = f2 (x, d911); ++ x = f2 (x, d912); ++ x = f2 (x, d913); ++ x = f2 (x, d914); ++ x = f2 (x, d915); ++ x = f2 (x, d916); ++ x = f2 (x, d917); ++ x = f2 (x, d918); ++ x = f2 (x, d919); ++ x = f2 (x, d920); ++ x = f2 (x, d921); ++ x = f2 (x, d922); ++ x = f2 (x, d923); ++ x = f2 (x, d924); ++ x = f2 (x, d925); ++ x = f2 (x, d926); ++ x = f2 (x, d927); ++ x = f2 (x, d928); ++ x = f2 (x, d929); ++ x = f2 (x, d930); ++ x = f2 (x, d931); ++ x = f2 (x, d932); ++ x = f2 (x, d933); ++ x = f2 (x, d934); ++ x = f2 (x, d935); ++ x = f2 (x, d936); ++ x = f2 (x, d937); ++ x = f2 (x, d938); ++ x = f2 (x, d939); ++ x = f2 (x, d940); ++ x = f2 (x, d941); ++ x = f2 (x, d942); ++ x = f2 (x, d943); ++ x = f2 (x, d944); ++ x = f2 (x, d945); ++ x = f2 (x, d946); ++ x = f2 (x, d947); ++ x = f2 (x, d948); ++ x = f2 (x, d949); ++ x = f2 (x, d950); ++ x = f2 (x, d951); ++ x = f2 (x, d952); ++ x = f2 (x, d953); ++ x = f2 (x, d954); ++ x = f2 (x, d955); ++ x = f2 (x, d956); ++ x = f2 (x, d957); ++ x = f2 (x, d958); ++ x = f2 (x, d959); ++ x = f2 (x, d960); ++ x = f2 (x, d961); ++ x = f2 (x, d962); ++ x = f2 (x, d963); ++ x = f2 (x, d964); ++ x = f2 (x, d965); ++ x = f2 (x, d966); ++ x = f2 (x, d967); ++ x = f2 (x, d968); ++ x = f2 (x, d969); ++ x = f2 (x, d970); ++ x = f2 (x, d971); ++ x = f2 (x, d972); ++ x = f2 (x, d973); ++ x = f2 (x, d974); ++ x = f2 (x, d975); ++ x = f2 (x, d976); ++ x = f2 (x, d977); ++ x = f2 (x, d978); ++ x = f2 (x, d979); ++ x = f2 (x, d980); ++ x = f2 (x, d981); ++ x = f2 (x, d982); ++ x = f2 (x, d983); ++ x = f2 (x, d984); ++ x = f2 (x, d985); ++ x = f2 (x, d986); ++ x = f2 (x, d987); ++ x = f2 (x, d988); ++ x = f2 (x, d989); ++ x = f2 (x, d990); ++ x = f2 (x, d991); ++ x = f2 (x, d992); ++ x = f2 (x, d993); ++ x = f2 (x, d994); ++ x = f2 (x, d995); ++ x = f2 (x, d996); ++ x = f2 (x, d997); ++ x = f2 (x, d998); ++ x = f2 (x, d999); ++ return x; ++} ++ ++/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 1 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 1 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 1 "pro_and_epilogue" } } */ ++ ++/* f3 is not a leaf ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 1 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 1 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 821cea9cb33..2c669a9822f 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -5421,11 +5421,15 @@ proc check_effective_target_autoincdec { } { + proc check_effective_target_supports_stack_clash_protection { } { + + # Temporary until the target bits are fully ACK'd. +-# if { [istarget aarch*-*-*] || [istarget x86_64-*-*] +-# || [istarget i?86-*-*] || [istarget s390*-*-*] ++# if { [istarget aarch*-*-*] ++# || [istarget s390*-*-*] + # || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { + # return 1 + # } ++ ++ if { [istarget x86_64-*-*] || [istarget i?86-*-*] } { ++ return 1 ++ } + return 0 + } + diff --git a/gcc48-rh1469697-7.patch b/gcc48-rh1469697-7.patch new file mode 100644 index 0000000..0a86f92 --- /dev/null +++ b/gcc48-rh1469697-7.patch @@ -0,0 +1,115 @@ +commit 2bb044f9734259945e2b5048d92bc8d0af707d27 +Author: law +Date: Wed Sep 20 05:43:28 2017 +0000 + + * combine-stack-adj.c (combine_stack_adjustments_for_block): Do + nothing for stack adjustments with REG_STACK_CHECK. + * sched-deps.c (parse_add_or_inc): Reject insns with + REG_STACK_CHECK from dependency breaking. + * config/i386/i386.c (pro_epilogue_adjust_stack): Return insn. + (ix86_adjust_satck_and_probe_stack_clash): Add REG_STACK_NOTEs. + * reg-notes.def (STACK_CHECK): New note. + + * gcc.target/i386/stack-check-11.c: New test. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252999 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c +index 0a4d8a51d1d..ee66c28ca35 100644 +--- a/gcc/combine-stack-adj.c ++++ b/gcc/combine-stack-adj.c +@@ -441,6 +441,8 @@ combine_stack_adjustments_for_block (basic_block bb) + continue; + + set = single_set_for_csa (insn); ++ if (set && find_reg_note (insn, REG_STACK_CHECK, NULL_RTX)) ++ set = NULL_RTX; + if (set) + { + rtx dest = SET_DEST (set); +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index a07104d304d..a9072f58f50 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -9502,7 +9502,7 @@ ix86_add_queued_cfa_restore_notes (rtx insn) + zero if %r11 register is live and cannot be freely used and positive + otherwise. */ + +-static void ++static rtx + pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, + int style, bool set_cfa) + { +@@ -9589,6 +9589,7 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, + m->fs.sp_offset = ooffset - INTVAL (offset); + m->fs.sp_valid = valid; + } ++ return insn; + } + + /* Find an available register to be used as dynamic realign argument +@@ -9902,9 +9903,11 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) + for (i = probe_interval; i <= size; i += probe_interval) + { + /* Allocate PROBE_INTERVAL bytes. */ +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-probe_interval), -1, +- m->fs.cfa_reg == stack_pointer_rtx); ++ rtx insn ++ = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-PROBE_INTERVAL), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); + + /* And probe at *sp. */ + emit_stack_probe (stack_pointer_rtx); +diff --git a/gcc/reg-notes.def b/gcc/reg-notes.def +index db61c092aab..1d7a4356a85 100644 +--- a/gcc/reg-notes.def ++++ b/gcc/reg-notes.def +@@ -216,3 +216,7 @@ REG_NOTE (ARGS_SIZE) + that the return value of a call can be used to reinitialize a + pseudo reg. */ + REG_NOTE (RETURNED) ++ ++/* Indicates the instruction is a stack check probe that should not ++ be combined with other stack adjustments. */ ++REG_NOTE (STACK_CHECK) +diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c +index 4ac2542a3af..75780150e34 100644 +--- a/gcc/sched-deps.c ++++ b/gcc/sched-deps.c +@@ -4607,6 +4607,11 @@ parse_add_or_inc (struct mem_inc_info *mii, rtx insn, bool before_mem) + if (RTX_FRAME_RELATED_P (insn) || !pat) + return false; + ++ /* Do not allow breaking data dependencies for insns that are marked ++ with REG_STACK_CHECK. */ ++ if (find_reg_note (insn, REG_STACK_CHECK, NULL)) ++ return false; ++ + /* Result must be single reg. */ + if (!REG_P (SET_DEST (pat))) + return false; +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-11.c b/gcc/testsuite/gcc.target/i386/stack-check-11.c +new file mode 100644 +index 00000000000..183103f01e5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/stack-check-11.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++extern void arf (unsigned long int *, unsigned long int *); ++void ++frob () ++{ ++ unsigned long int num[859]; ++ unsigned long int den[859]; ++ arf (den, num); ++} ++ ++/* { dg-final { scan-assembler-times "subq" 4 } } */ ++/* { dg-final { scan-assembler-times "orq" 3 } } */ ++ diff --git a/gcc48-rh1469697-8.patch b/gcc48-rh1469697-8.patch new file mode 100644 index 0000000..1c3cdd6 --- /dev/null +++ b/gcc48-rh1469697-8.patch @@ -0,0 +1,88 @@ +commit 93ed472702aad6d9b8998592775a0ab4120b6242 +Author: law +Date: Wed Sep 20 21:59:50 2017 +0000 + + * explow.c (compute_stack_clash_protection_loop_data): Use + CONST_INT_P instead of explicit test. Verify object is a + CONST_INT_P before looking at INTVAL. + (anti_adjust_stack_and_probe_stack_clash): Use CONST_INT_P + instead of explicit test. + + * gcc.target/i386/stack-check-11.c: Update test and regexp + so that it works for both i?86 and x86_64. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253034 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/explow.c b/gcc/explow.c +index 2526e8513b7..d118e0d7782 100644 +--- a/gcc/explow.c ++++ b/gcc/explow.c +@@ -1778,11 +1778,11 @@ compute_stack_clash_protection_loop_data (rtx *rounded_size, rtx *last_addr, + if (*rounded_size == CONST0_RTX (Pmode)) + fprintf (dump_file, + "Stack clash skipped dynamic allocation and probing loop.\n"); +- else if (GET_CODE (*rounded_size) == CONST_INT ++ else if (CONST_INT_P (*rounded_size) + && INTVAL (*rounded_size) <= 4 * *probe_interval) + fprintf (dump_file, + "Stack clash dynamic allocation and probing inline.\n"); +- else if (GET_CODE (*rounded_size) == CONST_INT) ++ else if (CONST_INT_P (*rounded_size)) + fprintf (dump_file, + "Stack clash dynamic allocation and probing in " + "rotated loop.\n"); +@@ -1880,7 +1880,8 @@ anti_adjust_stack_and_probe_stack_clash (rtx size) + + if (rounded_size != CONST0_RTX (Pmode)) + { +- if (INTVAL (rounded_size) <= 4 * probe_interval) ++ if (CONST_INT_P (rounded_size) ++ && INTVAL (rounded_size) <= 4 * probe_interval) + { + for (HOST_WIDE_INT i = 0; + i < INTVAL (rounded_size); +@@ -1900,7 +1901,7 @@ anti_adjust_stack_and_probe_stack_clash (rtx size) + else + { + rtx loop_lab, end_loop; +- bool rotate_loop = GET_CODE (rounded_size) == CONST_INT; ++ bool rotate_loop = CONST_INT_P (rounded_size); + emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop, + last_addr, rotate_loop); + +@@ -1938,7 +1939,7 @@ anti_adjust_stack_and_probe_stack_clash (rtx size) + might hold live data. So probe at *sp if we know that + an allocation was made, otherwise probe into the red zone + which is obviously undesirable. */ +- if (GET_CODE (size) == CONST_INT) ++ if (CONST_INT_P (size)) + { + emit_stack_probe (stack_pointer_rtx); + emit_insn (gen_blockage ()); +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-11.c b/gcc/testsuite/gcc.target/i386/stack-check-11.c +index 183103f01e5..fe5b2c2b844 100644 +--- a/gcc/testsuite/gcc.target/i386/stack-check-11.c ++++ b/gcc/testsuite/gcc.target/i386/stack-check-11.c +@@ -2,15 +2,17 @@ + /* { dg-options "-O2 -fstack-clash-protection" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ + +-extern void arf (unsigned long int *, unsigned long int *); ++#include ++ ++extern void arf (uint64_t *, uint64_t *); + void + frob () + { +- unsigned long int num[859]; +- unsigned long int den[859]; ++ uint64_t num[859]; ++ uint64_t den[859]; + arf (den, num); + } + +-/* { dg-final { scan-assembler-times "subq" 4 } } */ +-/* { dg-final { scan-assembler-times "orq" 3 } } */ ++/* { dg-final { scan-assembler-times "sub\[ql\]" 4 } } */ ++/* { dg-final { scan-assembler-times "or\[ql\]" 3 } } */ + diff --git a/gcc48-rh1469697-9.patch b/gcc48-rh1469697-9.patch new file mode 100644 index 0000000..fb2903b --- /dev/null +++ b/gcc48-rh1469697-9.patch @@ -0,0 +1,389 @@ +commit b49f8fb8a97e9af8e6ba2b65d18195099cd1bb79 +Author: law +Date: Thu Sep 21 04:30:16 2017 +0000 + + * config/s390/s390.c (MIN_UNROLL_PROBES): Define. + (allocate_stack_space): New function, partially extracted from + s390_emit_prologue. + (s390_emit_prologue): Track offset to most recent stack probe. + Code to allocate space moved into allocate_stack_space. + Dump actions when no stack is allocated. + (s390_prologue_plus_offset): New function. + (s390_emit_stack_probe): Likewise. + + * gcc.dg/stack-check-5.c: Add argument for s390. + * lib/target-supports.exp: + (check_effective_target_supports_stack_clash_protection): Enable for + s390/s390x targets. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253049 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c +index 3c04781f947..45998bc7516 100644 +--- a/gcc/config/s390/s390.c ++++ b/gcc/config/s390/s390.c +@@ -10350,6 +10350,184 @@ s390_emit_stack_tie (void) + emit_insn (gen_stack_tie (mem)); + } + ++/* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it. ++ - push too big immediates to the literal pool and annotate the refs ++ - emit frame related notes for stack pointer changes. */ ++ ++static rtx ++s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p) ++{ ++ rtx insn; ++ rtx orig_offset = offset; ++ ++ gcc_assert (REG_P (target)); ++ gcc_assert (REG_P (reg)); ++ gcc_assert (CONST_INT_P (offset)); ++ ++ if (offset == const0_rtx) /* lr/lgr */ ++ { ++ insn = emit_move_insn (target, reg); ++ } ++ else if (DISP_IN_RANGE (INTVAL (offset))) /* la */ ++ { ++ insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg, ++ offset)); ++ } ++ else ++ { ++ if (!satisfies_constraint_K (offset) /* ahi/aghi */ ++ && (!TARGET_EXTIMM ++ || (!satisfies_constraint_Op (offset) /* alfi/algfi */ ++ && !satisfies_constraint_On (offset)))) /* slfi/slgfi */ ++ offset = force_const_mem (Pmode, offset); ++ ++ if (target != reg) ++ { ++ insn = emit_move_insn (target, reg); ++ RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0; ++ } ++ ++ insn = emit_insn (gen_add2_insn (target, offset)); ++ ++ if (!CONST_INT_P (offset)) ++ { ++ annotate_constant_pool_refs (&PATTERN (insn)); ++ ++ if (frame_related_p) ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (VOIDmode, target, ++ gen_rtx_PLUS (Pmode, target, ++ orig_offset))); ++ } ++ } ++ ++ RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0; ++ ++ /* If this is a stack adjustment and we are generating a stack clash ++ prologue, then add a REG_STACK_CHECK note to signal that this insn ++ should be left alone. */ ++ if (flag_stack_clash_protection && target == stack_pointer_rtx) ++ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); ++ ++ return insn; ++} ++ ++/* Emit a compare instruction with a volatile memory access as stack ++ probe. It does not waste store tags and does not clobber any ++ registers apart from the condition code. */ ++static void ++s390_emit_stack_probe (rtx addr) ++{ ++ rtx tmp = gen_rtx_MEM (Pmode, addr); ++ MEM_VOLATILE_P (tmp) = 1; ++ s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp); ++ emit_insn (gen_blockage ()); ++} ++ ++/* Use a runtime loop if we have to emit more probes than this. */ ++#define MIN_UNROLL_PROBES 3 ++ ++/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary ++ if necessary. LAST_PROBE_OFFSET contains the offset of the closest ++ probe relative to the stack pointer. ++ ++ Note that SIZE is negative. ++ ++ The return value is true if TEMP_REG has been clobbered. */ ++static bool ++allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset, ++ rtx temp_reg) ++{ ++ bool temp_reg_clobbered_p = false; ++ HOST_WIDE_INT probe_interval ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ HOST_WIDE_INT guard_size ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); ++ ++ if (flag_stack_clash_protection) ++ { ++ if (last_probe_offset + -INTVAL (size) < guard_size) ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ else ++ { ++ rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG); ++ HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval; ++ HOST_WIDE_INT num_probes = rounded_size / probe_interval; ++ HOST_WIDE_INT residual = -INTVAL (size) - rounded_size; ++ ++ if (num_probes < MIN_UNROLL_PROBES) ++ { ++ /* Emit unrolled probe statements. */ ++ ++ for (unsigned int i = 0; i < num_probes; i++) ++ { ++ s390_prologue_plus_offset (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-probe_interval), true); ++ s390_emit_stack_probe (gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, ++ offset)); ++ } ++ dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); ++ } ++ else ++ { ++ /* Emit a loop probing the pages. */ ++ ++ rtx loop_start_label = gen_label_rtx (); ++ ++ /* From now on temp_reg will be the CFA register. */ ++ s390_prologue_plus_offset (temp_reg, stack_pointer_rtx, ++ GEN_INT (-rounded_size), true); ++ emit_label (loop_start_label); ++ ++ s390_prologue_plus_offset (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-probe_interval), false); ++ s390_emit_stack_probe (gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, ++ offset)); ++ emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg, ++ GT, NULL_RTX, ++ Pmode, 1, loop_start_label); ++ ++ /* Without this make_edges ICEes. */ ++ JUMP_LABEL (get_last_insn ()) = loop_start_label; ++ LABEL_NUSES (loop_start_label) = 1; ++ ++ /* That's going to be a NOP since stack pointer and ++ temp_reg are supposed to be the same here. We just ++ emit it to set the CFA reg back to r15. */ ++ s390_prologue_plus_offset (stack_pointer_rtx, temp_reg, ++ const0_rtx, true); ++ temp_reg_clobbered_p = true; ++ dump_stack_clash_frame_info (PROBE_LOOP, residual != 0); ++ } ++ ++ /* Handle any residual allocation request. */ ++ s390_prologue_plus_offset (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-residual), true); ++ last_probe_offset += residual; ++ if (last_probe_offset >= probe_interval) ++ s390_emit_stack_probe (gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, ++ GEN_INT (residual ++ - UNITS_PER_LONG))); ++ ++ return temp_reg_clobbered_p; ++ } ++ } ++ ++ /* Subtract frame size from stack pointer. */ ++ s390_prologue_plus_offset (stack_pointer_rtx, ++ stack_pointer_rtx, ++ size, true); ++ ++ return temp_reg_clobbered_p; ++} ++ ++ + /* Expand the prologue into a bunch of separate insns. */ + + void +@@ -10391,6 +10569,19 @@ s390_emit_prologue (void) + else + temp_reg = gen_rtx_REG (Pmode, 1); + ++ /* When probing for stack-clash mitigation, we have to track the distance ++ between the stack pointer and closest known reference. ++ ++ Most of the time we have to make a worst cast assumption. The ++ only exception is when TARGET_BACKCHAIN is active, in which case ++ we know *sp (offset 0) was written. */ ++ HOST_WIDE_INT probe_interval ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ HOST_WIDE_INT last_probe_offset ++ = (TARGET_BACKCHAIN ++ ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0) ++ : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD)); ++ + /* Save call saved gprs. */ + if (cfun_frame_layout.first_save_gpr != -1) + { +@@ -10400,6 +10591,14 @@ s390_emit_prologue (void) + - cfun_frame_layout.first_save_gpr_slot), + cfun_frame_layout.first_save_gpr, + cfun_frame_layout.last_save_gpr); ++ ++ /* This is not 100% correct. If we have more than one register saved, ++ then LAST_PROBE_OFFSET can move even closer to sp. */ ++ last_probe_offset ++ = (cfun_frame_layout.gprs_offset + ++ UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr ++ - cfun_frame_layout.first_save_gpr_slot)); ++ + emit_insn (insn); + } + +@@ -10416,6 +10615,8 @@ s390_emit_prologue (void) + if (cfun_fpr_bit_p (i)) + { + save_fpr (stack_pointer_rtx, offset, i + 16); ++ if (offset < last_probe_offset) ++ last_probe_offset = offset; + offset += 8; + } + else if (!TARGET_PACKED_STACK) +@@ -10429,6 +10630,8 @@ s390_emit_prologue (void) + if (cfun_fpr_bit_p (i)) + { + insn = save_fpr (stack_pointer_rtx, offset, i + 16); ++ if (offset < last_probe_offset) ++ last_probe_offset = offset; + offset += 8; + + /* If f4 and f6 are call clobbered they are saved due to stdargs and +@@ -10451,6 +10654,8 @@ s390_emit_prologue (void) + if (cfun_fpr_bit_p (i)) + { + insn = save_fpr (stack_pointer_rtx, offset, i + 16); ++ if (offset < last_probe_offset) ++ last_probe_offset = offset; + + RTX_FRAME_RELATED_P (insn) = 1; + offset -= 8; +@@ -10470,10 +10675,11 @@ s390_emit_prologue (void) + if (cfun_frame_layout.frame_size > 0) + { + rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size); +- rtx real_frame_off; ++ rtx stack_pointer_backup_loc; ++ bool temp_reg_clobbered_p; + + if (s390_stack_size) +- { ++ { + HOST_WIDE_INT stack_guard; + + if (s390_stack_guard) +@@ -10538,35 +10744,36 @@ s390_emit_prologue (void) + if (s390_warn_dynamicstack_p && cfun->calls_alloca) + warning (0, "%qs uses dynamic stack allocation", current_function_name ()); + +- /* Save incoming stack pointer into temp reg. */ +- if (TARGET_BACKCHAIN || next_fpr) +- insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx)); ++ /* Save the location where we could backup the incoming stack ++ pointer. */ ++ stack_pointer_backup_loc = get_last_insn (); + +- /* Subtract frame size from stack pointer. */ ++ temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset, ++ temp_reg); + +- if (DISP_IN_RANGE (INTVAL (frame_off))) +- { +- insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx, +- gen_rtx_PLUS (Pmode, stack_pointer_rtx, +- frame_off)); +- insn = emit_insn (insn); +- } +- else ++ if (TARGET_BACKCHAIN || next_fpr) + { +- if (!CONST_OK_FOR_K (INTVAL (frame_off))) +- frame_off = force_const_mem (Pmode, frame_off); +- +- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off)); +- annotate_constant_pool_refs (&PATTERN (insn)); ++ if (temp_reg_clobbered_p) ++ { ++ /* allocate_stack_space had to make use of temp_reg and ++ we need it to hold a backup of the incoming stack ++ pointer. Calculate back that value from the current ++ stack pointer. */ ++ s390_prologue_plus_offset (temp_reg, stack_pointer_rtx, ++ GEN_INT (cfun_frame_layout.frame_size), ++ false); ++ } ++ else ++ { ++ /* allocate_stack_space didn't actually required ++ temp_reg. Insert the stack pointer backup insn ++ before the stack pointer decrement code - knowing now ++ that the value will survive. */ ++ emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx), ++ stack_pointer_backup_loc); ++ } + } + +- RTX_FRAME_RELATED_P (insn) = 1; +- real_frame_off = GEN_INT (-cfun_frame_layout.frame_size); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, +- gen_rtx_SET (VOIDmode, stack_pointer_rtx, +- gen_rtx_PLUS (Pmode, stack_pointer_rtx, +- real_frame_off))); +- + /* Set backchain. */ + + if (TARGET_BACKCHAIN) +@@ -10590,6 +10797,8 @@ s390_emit_prologue (void) + emit_clobber (addr); + } + } ++ else if (flag_stack_clash_protection) ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); + + /* Save fprs 8 - 15 (64 bit ABI). */ + +diff --git a/gcc/testsuite/gcc.dg/stack-check-5.c b/gcc/testsuite/gcc.dg/stack-check-5.c +index 2171d9b6c23..3178f5d8ce5 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-5.c ++++ b/gcc/testsuite/gcc.dg/stack-check-5.c +@@ -3,6 +3,10 @@ + /* { dg-require-effective-target supports_stack_clash_protection } */ + + ++/* Otherwise the S/390 back-end might save the stack pointer in f2 () ++ into an FPR. */ ++/* { dg-additional-options "-msoft-float" { target { s390x-*-* } } } */ ++ + extern void foo (char *); + extern void bar (void); + +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 2c669a9822f..f24c5c6e0ac 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -5422,12 +5422,12 @@ proc check_effective_target_supports_stack_clash_protection { } { + + # Temporary until the target bits are fully ACK'd. + # if { [istarget aarch*-*-*] +-# || [istarget s390*-*-*] + # || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { + # return 1 + # } + +- if { [istarget x86_64-*-*] || [istarget i?86-*-*] } { ++ if { [istarget x86_64-*-*] || [istarget i?86-*-*] ++ || [istarget s390*-*-*] } { + return 1 + } + return 0 diff --git a/gcc48-rh1482762.patch b/gcc48-rh1482762.patch new file mode 100644 index 0000000..bb570d4 --- /dev/null +++ b/gcc48-rh1482762.patch @@ -0,0 +1,53 @@ +2017-03-25 Uros Bizjak + + PR target/80180 + * config/i386/i386.c (ix86_expand_builtin) + : Do not expand arg0 between + flags reg setting and flags reg using instructions. + : Ditto. Use non-flags reg + clobbering instructions to zero extend op2. + +--- gcc/config/i386/i386.c (revision 246478) ++++ gcc/config/i386/i386.c (revision 246479) +@@ -39533,9 +39533,6 @@ + mode0 = DImode; + + rdrand_step: +- op0 = gen_reg_rtx (mode0); +- emit_insn (GEN_FCN (icode) (op0)); +- + arg0 = CALL_EXPR_ARG (exp, 0); + op1 = expand_normal (arg0); + if (!address_operand (op1, VOIDmode)) +@@ -39543,6 +39540,10 @@ + op1 = convert_memory_address (Pmode, op1); + op1 = copy_addr_to_reg (op1); + } ++ ++ op0 = gen_reg_rtx (mode0); ++ emit_insn (GEN_FCN (icode) (op0)); ++ + emit_move_insn (gen_rtx_MEM (mode0, op1), op0); + + op1 = gen_reg_rtx (SImode); +@@ -39584,9 +39597,6 @@ + mode0 = DImode; + + rdseed_step: +- op0 = gen_reg_rtx (mode0); +- emit_insn (GEN_FCN (icode) (op0)); +- + arg0 = CALL_EXPR_ARG (exp, 0); + op1 = expand_normal (arg0); + if (!address_operand (op1, VOIDmode)) +@@ -39594,6 +39604,10 @@ + op1 = convert_memory_address (Pmode, op1); + op1 = copy_addr_to_reg (op1); + } ++ ++ op0 = gen_reg_rtx (mode0); ++ emit_insn (GEN_FCN (icode) (op0)); ++ + emit_move_insn (gen_rtx_MEM (mode0, op1), op0); + + op2 = gen_reg_rtx (QImode); diff --git a/gcc48-rh1487434.patch b/gcc48-rh1487434.patch new file mode 100644 index 0000000..5295ce6 --- /dev/null +++ b/gcc48-rh1487434.patch @@ -0,0 +1,49 @@ +2016-05-04 Alan Modra + + * config/rs6000/rs6000.c (rs6000_elf_output_toc_section_asm_op): + Align .toc. + +--- gcc/config/rs6000/rs6000.c ++++ gcc/config/rs6000/rs6000.c +@@ -31339,8 +31339,8 @@ rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) + { + if (!toc_initialized) + { +- toc_initialized = 1; + fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); ++ ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); + (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0); + fprintf (asm_out_file, "\t.tc "); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],"); +@@ -31348,20 +31348,30 @@ rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) + fprintf (asm_out_file, "\n"); + + fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); ++ ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); + fprintf (asm_out_file, " = .+32768\n"); ++ toc_initialized = 1; + } + else + fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); + } + else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + && !TARGET_RELOCATABLE) +- fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); ++ { ++ fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); ++ if (!toc_initialized) ++ { ++ ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); ++ toc_initialized = 1; ++ } ++ } + else + { + fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); + if (!toc_initialized) + { ++ ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); + ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); + fprintf (asm_out_file, " = .+32768\n"); + toc_initialized = 1; diff --git a/gcc48-rh1491395.patch b/gcc48-rh1491395.patch new file mode 100644 index 0000000..f0a8bb4 --- /dev/null +++ b/gcc48-rh1491395.patch @@ -0,0 +1,177 @@ +2016-01-16 Torvald Riegel + + * method-gl.cc (gl_wt_dispatch::trycommit): Ensure proxy privatization + safety. + * method-ml.cc (ml_wt_dispatch::trycommit): Likewise. + * libitm/testsuite/libitm.c/priv-1.c: New. + +--- libitm/method-gl.cc ++++ libitm/method-gl.cc +@@ -291,12 +291,18 @@ public: + // See begin_or_restart() for why we need release memory order here. + v = gl_mg::clear_locked(v) + 1; + o_gl_mg.orec.store(v, memory_order_release); +- +- // Need to ensure privatization safety. Every other transaction must +- // have a snapshot time that is at least as high as our commit time +- // (i.e., our commit must be visible to them). +- priv_time = v; + } ++ ++ // Need to ensure privatization safety. Every other transaction must have ++ // a snapshot time that is at least as high as our commit time (i.e., our ++ // commit must be visible to them). Because of proxy privatization, we ++ // must ensure that even if we are a read-only transaction. See ++ // ml_wt_dispatch::trycommit() for details: We can't get quite the same ++ // set of problems because we just use one orec and thus, for example, ++ // there cannot be concurrent writers -- but we can still get pending ++ // loads to privatized data when not ensuring privatization safety, which ++ // is problematic if the program unmaps the privatized memory. ++ priv_time = v; + return true; + } + +--- libitm/method-ml.cc ++++ libitm/method-ml.cc +@@ -513,6 +513,21 @@ public: + if (!tx->writelog.size()) + { + tx->readlog.clear(); ++ // We still need to ensure privatization safety, unfortunately. While ++ // we cannot have privatized anything by ourselves (because we are not ++ // an update transaction), we can have observed the commits of ++ // another update transaction that privatized something. Because any ++ // commit happens before ensuring privatization, our snapshot and ++ // commit can thus have happened before ensuring privatization safety ++ // for this commit/snapshot time. Therefore, before we can return to ++ // nontransactional code that might use the privatized data, we must ++ // ensure privatization safety for our snapshot time. ++ // This still seems to be better than not allowing use of the ++ // snapshot time before privatization safety has been ensured because ++ // we at least can run transactions such as this one, and in the ++ // meantime the transaction producing this commit time might have ++ // finished ensuring privatization safety for it. ++ priv_time = tx->shared_state.load(memory_order_relaxed); + return true; + } + +--- /dev/null ++++ libitm/testsuite/libitm.c/priv-1.c +@@ -0,0 +1,117 @@ ++/* Quick stress test for proxy privatization. */ ++ ++/* We need to use a TM method that has to enforce privatization safety ++ explicitly. */ ++/* { dg-set-target-env-var ITM_DEFAULT_METHOD "ml_wt" } */ ++/* { dg-options "-std=gnu11" } */ ++ ++#include ++#include ++#include ++ ++/* Make them likely to be mapped to different orecs. */ ++#define ALIGN __attribute__((aligned (256))) ++/* Don't make these static to work around PR 68591. */ ++int x ALIGN; ++int *ptr ALIGN; ++int *priv_ptr ALIGN; ++int priv_value ALIGN; ++int barrier ALIGN = 0; ++const int iters = 100; ++ ++static void arrive_and_wait (int expected_value) ++{ ++ int now = __atomic_add_fetch (&barrier, 1, __ATOMIC_ACQ_REL); ++ while (now < expected_value) ++ __atomic_load (&barrier, &now, __ATOMIC_ACQUIRE); ++} ++ ++static void __attribute__((transaction_pure,noinline)) delay (int i) ++{ ++ for (volatile int v = 0; v < i; v++); ++} ++ ++/* This tries to catch a case in which proxy privatization safety is not ++ ensured by privatization_user. Specifically, it's access to the value ++ of it's transactional snapshot of ptr must read from an uncommitted write ++ by writer; thus, writer must still be active but must have read ptr before ++ proxy can privatize *ptr by assigning to ptr. ++ We try to make this interleaving more likely by delaying the commit of ++ writer and the start of proxy. */ ++static void *writer (void *dummy __attribute__((unused))) ++{ ++ for (int i = 0; i < iters; i++) ++ { ++ /* Initialize state in each round. */ ++ x = 0; ++ ptr = &x; ++ priv_ptr = NULL; ++ int wrote = 1; ++ arrive_and_wait (i * 6 + 3); ++ /* Interference by another writer. Has a conflict with the proxy ++ privatizer. */ ++ __transaction_atomic ++ { ++ if (ptr != NULL) ++ *ptr = 1; ++ else ++ wrote = 0; ++ delay (2000000); ++ } ++ arrive_and_wait (i * 6 + 6); ++ /* If the previous transaction committed first, wrote == 1 and x == 1; ++ otherwise, if the proxy came first, wrote == 0 and priv_value == 0. ++ */ ++ if (wrote != priv_value) ++ abort (); ++ } ++ return NULL; ++} ++ ++static void *proxy (void *dummy __attribute__((unused))) ++{ ++ for (int i = 0; i < iters; i++) ++ { ++ arrive_and_wait (i * 6 + 3); ++ delay(1000000); ++ __transaction_atomic ++ { ++ /* Hand-off to privatization-user and its read-only transaction and ++ subsequent use of privatization. */ ++ priv_ptr = ptr; ++ ptr = NULL; ++ } ++ arrive_and_wait (i * 6 + 6); ++ } ++ return NULL; ++} ++ ++static void *privatization_user (void *dummy __attribute__((unused))) ++{ ++ for (int i = 0; i < iters; i++) ++ { ++ arrive_and_wait (i * 6 + 3); ++ /* Spin until we have gotten a pointer from the proxy. Then access ++ the value pointed to nontransactionally. */ ++ int *p = NULL; ++ while (p == NULL) ++ __transaction_atomic { p = priv_ptr; } ++ priv_value = *p; ++ arrive_and_wait (i * 6 + 6); ++ } ++ return NULL; ++} ++ ++int main() ++{ ++ pthread_t p[3]; ++ ++ pthread_create (p+0, NULL, writer, NULL); ++ pthread_create (p+1, NULL, proxy, NULL); ++ pthread_create (p+2, NULL, privatization_user, NULL); ++ ++ for (int i = 0; i < 3; ++i) ++ pthread_join (p[i], NULL); ++ ++ return 0; ++} diff --git a/gcc48-rh1535655-1.patch b/gcc48-rh1535655-1.patch new file mode 100644 index 0000000..2aa5bc1 --- /dev/null +++ b/gcc48-rh1535655-1.patch @@ -0,0 +1,30 @@ +commit 30562e52396c7fbe2a404acda2b1b77f871005ea +Author: root +Date: Thu Jan 18 00:12:41 2018 -0500 + + Add FIRST_INT_REG, LAST_INT_REG, LEGACY_INT_REG_P , and LEGACY_INT_REGNO_P + +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index e31c8d0..87fd381 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -1115,6 +1115,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); + /* Base register for access to local variables of the function. */ + #define FRAME_POINTER_REGNUM 20 + ++#define FIRST_INT_REG AX_REG ++#define LAST_INT_REG SP_REG ++ + /* First floating point reg */ + #define FIRST_FLOAT_REG 8 + +@@ -1317,6 +1320,9 @@ enum reg_class + #define QI_REG_P(X) (REG_P (X) && QI_REGNO_P (REGNO (X))) + #define QI_REGNO_P(N) IN_RANGE ((N), AX_REG, BX_REG) + ++#define LEGACY_INT_REG_P(X) (REG_P (X) && LEGACY_INT_REGNO_P (REGNO (X))) ++#define LEGACY_INT_REGNO_P(N) (IN_RANGE ((N), FIRST_INT_REG, LAST_INT_REG)) ++ + #define GENERAL_REG_P(X) \ + (REG_P (X) && GENERAL_REGNO_P (REGNO (X))) + #define GENERAL_REGNO_P(N) \ diff --git a/gcc48-rh1535655-2.patch b/gcc48-rh1535655-2.patch new file mode 100644 index 0000000..339098b --- /dev/null +++ b/gcc48-rh1535655-2.patch @@ -0,0 +1,1595 @@ +commit 60281b40f9b28b1b1b912f3157547d6b4f50669c +Author: root +Date: Thu Jan 18 17:50:46 2018 -0500 + + HJ patch #1 + +diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h +index 11c0845..139d23c 100644 +--- a/gcc/config/i386/i386-opts.h ++++ b/gcc/config/i386/i386-opts.h +@@ -85,4 +85,16 @@ enum ix86_veclibabi { + ix86_veclibabi_type_acml + }; + ++/* This is used to mitigate variant #2 of the speculative execution ++ vulnerabilities on x86 processors identified by CVE-2017-5715, aka ++ Spectre. They convert indirect branches and function returns to ++ call and return thunks to avoid speculative execution via indirect ++ call, jmp and ret. */ ++enum indirect_branch { ++ indirect_branch_unset = 0, ++ indirect_branch_keep, ++ indirect_branch_thunk, ++ indirect_branch_thunk_inline, ++ indirect_branch_thunk_extern ++}; + #endif +diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h +index 96e7c5c..ecdf108 100644 +--- a/gcc/config/i386/i386-protos.h ++++ b/gcc/config/i386/i386-protos.h +@@ -306,6 +306,7 @@ extern enum attr_cpu ix86_schedule; + #endif + + extern const char * ix86_output_call_insn (rtx insn, rtx call_op); ++extern const char * ix86_output_indirect_jmp (rtx call_op, bool ret_p); + + #ifdef RTX_CODE + /* Target data for multipass lookahead scheduling. +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index b91a456..ebc9a90 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -2572,12 +2572,23 @@ struct rtl_opt_pass pass_insert_vzeroupper = + } + }; + +-/* Return true if a red-zone is in use. */ ++/* Return true if a red-zone is in use. We can't use red-zone when ++ there are local indirect jumps, like "indirect_jump" or "tablejump", ++ which jumps to another place in the function, since "call" in the ++ indirect thunk pushes the return address onto stack, destroying ++ red-zone. ++ ++ TODO: If we can reserve the first 2 WORDs, for PUSH and, another ++ for CALL, in red-zone, we can allow local indirect jumps with ++ indirect thunk. */ + + static inline bool + ix86_using_red_zone (void) + { +- return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI; ++ return (TARGET_RED_ZONE ++ && !TARGET_64BIT_MS_ABI ++ && (!cfun->machine->has_local_indirect_jump ++ || cfun->machine->indirect_branch_type == indirect_branch_keep)); + } + + /* Return a string that documents the current -m options. The caller is +@@ -4595,6 +4606,37 @@ ix86_can_inline_p (tree caller, tree callee) + } + + ++/* Set the indirect_branch_type field from the function FNDECL. */ ++ ++static void ++ix86_set_indirect_branch_type (tree fndecl) ++{ ++ if (cfun->machine->indirect_branch_type == indirect_branch_unset) ++ { ++ tree attr = lookup_attribute ("indirect_branch", ++ DECL_ATTRIBUTES (fndecl)); ++ if (attr != NULL) ++ { ++ tree args = TREE_VALUE (attr); ++ if (args == NULL) ++ gcc_unreachable (); ++ tree cst = TREE_VALUE (args); ++ if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0) ++ cfun->machine->indirect_branch_type = indirect_branch_keep; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0) ++ cfun->machine->indirect_branch_type = indirect_branch_thunk; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0) ++ cfun->machine->indirect_branch_type = indirect_branch_thunk_inline; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0) ++ cfun->machine->indirect_branch_type = indirect_branch_thunk_extern; ++ else ++ gcc_unreachable (); ++ } ++ else ++ cfun->machine->indirect_branch_type = ix86_indirect_branch; ++ } ++} ++ + /* Remember the last target of ix86_set_current_function. */ + static GTY(()) tree ix86_previous_fndecl; + +@@ -4609,6 +4651,9 @@ ix86_set_current_function (tree fndecl) + slow things down too much or call target_reinit when it isn't safe. */ + if (fndecl && fndecl != ix86_previous_fndecl) + { ++ if (cfun && cfun->machine && fndecl) ++ ix86_set_indirect_branch_type (fndecl); ++ + tree old_tree = (ix86_previous_fndecl + ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) + : NULL_TREE); +@@ -4637,6 +4682,8 @@ ix86_set_current_function (tree fndecl) + target_reinit (); + } + } ++ if (cfun && cfun->machine && fndecl) ++ ix86_set_indirect_branch_type (fndecl); + } + + +@@ -8668,6 +8715,196 @@ ix86_setup_frame_addresses (void) + # endif + #endif + ++/* Label count for call and return thunks. It is used to make unique ++ labels in call and return thunks. */ ++static int indirectlabelno; ++ ++/* True if call and return thunk functions are needed. */ ++static bool indirect_thunk_needed = false; ++ ++/* Bit masks of integer registers, which contain branch target, used ++ by call and return thunks functions. */ ++static int indirect_thunks_used; ++ ++#ifndef INDIRECT_LABEL ++# define INDIRECT_LABEL "LIND" ++#endif ++ ++/* Fills in the label name that should be used for the indirect thunk. */ ++ ++static void ++indirect_thunk_name (char name[32], int regno) ++{ ++ if (USE_HIDDEN_LINKONCE) ++ { ++ if (regno >= 0) ++ { ++ const char *reg_prefix; ++ if (LEGACY_INT_REGNO_P (regno)) ++ reg_prefix = TARGET_64BIT ? "r" : "e"; ++ else ++ reg_prefix = ""; ++ sprintf (name, "__x86_indirect_thunk_%s%s", ++ reg_prefix, reg_names[regno]); ++ } ++ else ++ sprintf (name, "__x86_indirect_thunk"); ++ } ++ else ++ { ++ if (regno >= 0) ++ ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno); ++ else ++ ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0); ++ } ++} ++ ++/* Output a call and return thunk for indirect branch. If REGNO != -1, ++ the function address is in REGNO and the call and return thunk looks like: ++ ++ call L2 ++ L1: ++ pause ++ jmp L1 ++ L2: ++ mov %REG, (%sp) ++ ret ++ ++ Otherwise, the function address is on the top of stack and the ++ call and return thunk looks like: ++ ++ call L2 ++ L1: ++ pause ++ jmp L1 ++ L2: ++ lea WORD_SIZE(%sp), %sp ++ ret ++ */ ++ ++static void ++output_indirect_thunk (int regno) ++{ ++ char indirectlabel1[32]; ++ char indirectlabel2[32]; ++ ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL, ++ indirectlabelno++); ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL, ++ indirectlabelno++); ++ ++ /* Call */ ++ fputs ("\tcall\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel2); ++ fputc ('\n', asm_out_file); ++ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); ++ ++ /* Pause + lfence. */ ++ fprintf (asm_out_file, "\tpause\n\tlfence\n"); ++ ++ /* Jump. */ ++ fputs ("\tjmp\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel1); ++ fputc ('\n', asm_out_file); ++ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); ++ ++ if (regno >= 0) ++ { ++ /* MOV. */ ++ rtx xops[2]; ++ xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx); ++ xops[1] = gen_rtx_REG (word_mode, regno); ++ output_asm_insn ("mov\t{%1, %0|%0, %1}", xops); ++ } ++ else ++ { ++ /* LEA. */ ++ rtx xops[2]; ++ xops[0] = stack_pointer_rtx; ++ xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); ++ output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops); ++ } ++ ++ fputs ("\tret\n", asm_out_file); ++} ++ ++/* Output a funtion with a call and return thunk for indirect branch. ++ If REGNO != -1, the function address is in REGNO. Otherwise, the ++ function address is on the top of stack. */ ++ ++static void ++output_indirect_thunk_function (int regno) ++{ ++ char name[32]; ++ tree decl; ++ ++ /* Create __x86_indirect_thunk. */ ++ indirect_thunk_name (name, regno); ++ decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, ++ get_identifier (name), ++ build_function_type_list (void_type_node, NULL_TREE)); ++ DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, ++ NULL_TREE, void_type_node); ++ TREE_PUBLIC (decl) = 1; ++ TREE_STATIC (decl) = 1; ++ DECL_IGNORED_P (decl) = 1; ++ ++#if TARGET_MACHO ++ if (TARGET_MACHO) ++ { ++ switch_to_section (darwin_sections[picbase_thunk_section]); ++ fputs ("\t.weak_definition\t", asm_out_file); ++ assemble_name (asm_out_file, name); ++ fputs ("\n\t.private_extern\t", asm_out_file); ++ assemble_name (asm_out_file, name); ++ putc ('\n', asm_out_file); ++ ASM_OUTPUT_LABEL (asm_out_file, name); ++ DECL_WEAK (decl) = 1; ++ } ++ else ++#endif ++ if (USE_HIDDEN_LINKONCE) ++ { ++ DECL_COMDAT (decl) = 1; ++ make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); ++ ++ targetm.asm_out.unique_section (decl, 0); ++ switch_to_section (get_named_section (decl, NULL, 0)); ++ ++ targetm.asm_out.globalize_label (asm_out_file, name); ++ fputs ("\t.hidden\t", asm_out_file); ++ assemble_name (asm_out_file, name); ++ putc ('\n', asm_out_file); ++ ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); ++ } ++ else ++ { ++ switch_to_section (text_section); ++ ASM_OUTPUT_LABEL (asm_out_file, name); ++ } ++ ++ DECL_INITIAL (decl) = make_node (BLOCK); ++ current_function_decl = decl; ++ allocate_struct_function (decl, false); ++ init_function_start (decl); ++ /* We're about to hide the function body from callees of final_* by ++ emitting it directly; tell them we're a thunk, if they care. */ ++ cfun->is_thunk = true; ++ first_function_block_is_cold = false; ++ /* Make sure unwind info is emitted for the thunk if needed. */ ++ final_start_function (emit_barrier (), asm_out_file, 1); ++ ++ output_indirect_thunk (regno); ++ ++ final_end_function (); ++ init_insn_lengths (); ++ free_after_compilation (cfun); ++ set_cfun (NULL); ++ current_function_decl = NULL; ++} ++ + static int pic_labels_used; + + /* Fills in the label name that should be used for a pc thunk for +@@ -8694,11 +8931,24 @@ ix86_code_end (void) + rtx xops[2]; + int regno; + ++ if (indirect_thunk_needed) ++ output_indirect_thunk_function (-1); ++ ++ for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++) ++ { ++ int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1; ++ if ((indirect_thunks_used & (1 << i))) ++ output_indirect_thunk_function (regno); ++ } ++ + for (regno = AX_REG; regno <= SP_REG; regno++) + { + char name[32]; + tree decl; + ++ if ((indirect_thunks_used & (1 << regno))) ++ output_indirect_thunk_function (regno); ++ + if (!(pic_labels_used & (1 << regno))) + continue; + +@@ -24074,12 +24324,250 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, + return call; + } + ++/* Output indirect branch via a call and return thunk. CALL_OP is a ++ register which contains the branch target. XASM is the assembly ++ template for CALL_OP. Branch is a tail call if SIBCALL_P is true. ++ A normal call is converted to: ++ ++ call __x86_indirect_thunk_reg ++ ++ and a tail call is converted to: ++ ++ jmp __x86_indirect_thunk_reg ++ */ ++ ++static void ++ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p) ++{ ++ char thunk_name_buf[32]; ++ char *thunk_name; ++ int regno = REGNO (call_op); ++ ++ if (cfun->machine->indirect_branch_type ++ != indirect_branch_thunk_inline) ++ { ++ if (cfun->machine->indirect_branch_type == indirect_branch_thunk) ++ { ++ int i = regno; ++ if (i >= FIRST_REX_INT_REG) ++ i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1); ++ indirect_thunks_used |= 1 << i; ++ } ++ indirect_thunk_name (thunk_name_buf, regno); ++ thunk_name = thunk_name_buf; ++ } ++ else ++ thunk_name = NULL; ++ ++ if (sibcall_p) ++ { ++ if (thunk_name != NULL) ++ fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name); ++ else ++ output_indirect_thunk (regno); ++ } ++ else ++ { ++ if (thunk_name != NULL) ++ { ++ fprintf (asm_out_file, "\tcall\t%s\n", thunk_name); ++ return; ++ } ++ ++ char indirectlabel1[32]; ++ char indirectlabel2[32]; ++ ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, ++ INDIRECT_LABEL, ++ indirectlabelno++); ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, ++ INDIRECT_LABEL, ++ indirectlabelno++); ++ ++ /* Jump. */ ++ fputs ("\tjmp\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel2); ++ fputc ('\n', asm_out_file); ++ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); ++ ++ if (thunk_name != NULL) ++ fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name); ++ else ++ output_indirect_thunk (regno); ++ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); ++ ++ /* Call. */ ++ fputs ("\tcall\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel1); ++ fputc ('\n', asm_out_file); ++ } ++} ++ ++/* Output indirect branch via a call and return thunk. CALL_OP is ++ the branch target. XASM is the assembly template for CALL_OP. ++ Branch is a tail call if SIBCALL_P is true. A normal call is ++ converted to: ++ ++ jmp L2 ++ L1: ++ push CALL_OP ++ jmp __x86_indirect_thunk ++ L2: ++ call L1 ++ ++ and a tail call is converted to: ++ ++ push CALL_OP ++ jmp __x86_indirect_thunk ++ */ ++ ++static void ++ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm, ++ bool sibcall_p) ++{ ++ char thunk_name_buf[32]; ++ char *thunk_name; ++ char push_buf[64]; ++ int regno = -1; ++ ++ if (cfun->machine->indirect_branch_type ++ != indirect_branch_thunk_inline) ++ { ++ if (cfun->machine->indirect_branch_type == indirect_branch_thunk) ++ indirect_thunk_needed = true; ++ indirect_thunk_name (thunk_name_buf, regno); ++ thunk_name = thunk_name_buf; ++ } ++ else ++ thunk_name = NULL; ++ ++ snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s", ++ TARGET_64BIT ? 'q' : 'l', xasm); ++ ++ if (sibcall_p) ++ { ++ output_asm_insn (push_buf, &call_op); ++ if (thunk_name != NULL) ++ fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name); ++ else ++ output_indirect_thunk (regno); ++ } ++ else ++ { ++ char indirectlabel1[32]; ++ char indirectlabel2[32]; ++ ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, ++ INDIRECT_LABEL, ++ indirectlabelno++); ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, ++ INDIRECT_LABEL, ++ indirectlabelno++); ++ ++ /* Jump. */ ++ fputs ("\tjmp\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel2); ++ fputc ('\n', asm_out_file); ++ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); ++ ++ /* An external function may be called via GOT, instead of PLT. */ ++ if (MEM_P (call_op)) ++ { ++ struct ix86_address parts; ++ rtx addr = XEXP (call_op, 0); ++ if (ix86_decompose_address (addr, &parts) ++ && parts.base == stack_pointer_rtx) ++ { ++ /* Since call will adjust stack by -UNITS_PER_WORD, ++ we must convert "disp(stack, index, scale)" to ++ "disp+UNITS_PER_WORD(stack, index, scale)". */ ++ if (parts.index) ++ { ++ addr = gen_rtx_MULT (Pmode, parts.index, ++ GEN_INT (parts.scale)); ++ addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ addr); ++ } ++ else ++ addr = stack_pointer_rtx; ++ ++ rtx disp; ++ if (parts.disp != NULL_RTX) ++ disp = plus_constant (Pmode, parts.disp, ++ UNITS_PER_WORD); ++ else ++ disp = GEN_INT (UNITS_PER_WORD); ++ ++ addr = gen_rtx_PLUS (Pmode, addr, disp); ++ call_op = gen_rtx_MEM (GET_MODE (call_op), addr); ++ } ++ } ++ ++ output_asm_insn (push_buf, &call_op); ++ ++ if (thunk_name != NULL) ++ fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name); ++ else ++ output_indirect_thunk (regno); ++ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); ++ ++ /* Call. */ ++ fputs ("\tcall\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel1); ++ fputc ('\n', asm_out_file); ++ } ++} ++ ++/* Output indirect branch via a call and return thunk. CALL_OP is ++ the branch target. XASM is the assembly template for CALL_OP. ++ Branch is a tail call if SIBCALL_P is true. */ ++ ++static void ++ix86_output_indirect_branch (rtx call_op, const char *xasm, ++ bool sibcall_p) ++{ ++ if (REG_P (call_op)) ++ ix86_output_indirect_branch_via_reg (call_op, sibcall_p); ++ else ++ ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p); ++} ++/* Output indirect jump. CALL_OP is the jump target. Jump is a ++ function return if RET_P is true. */ ++ ++const char * ++ix86_output_indirect_jmp (rtx call_op, bool ret_p) ++{ ++ if (cfun->machine->indirect_branch_type != indirect_branch_keep) ++ { ++ struct ix86_frame frame; ++ ix86_compute_frame_layout (&frame); ++ ++ /* We can't have red-zone if this isn't a function return since ++ "call" in the indirect thunk pushes the return address onto ++ stack, destroying red-zone. */ ++ if (!ret_p && frame.red_zone_size != 0) ++ gcc_unreachable (); ++ ++ ix86_output_indirect_branch (call_op, "%0", true); ++ return ""; ++ } ++ else ++ return "jmp\t%A0"; ++} ++ + /* Output the assembly for a call instruction. */ + + const char * + ix86_output_call_insn (rtx insn, rtx call_op) + { + bool direct_p = constant_call_address_operand (call_op, VOIDmode); ++ bool output_indirect_p ++ = (!TARGET_SEH ++ && cfun->machine->indirect_branch_type != indirect_branch_keep); + bool seh_nop_p = false; + const char *xasm; + +@@ -24092,9 +24580,17 @@ ix86_output_call_insn (rtx insn, rtx call_op) + else if (TARGET_SEH) + xasm = "rex.W jmp %A0"; + else +- xasm = "jmp\t%A0"; ++ { ++ if (output_indirect_p) ++ xasm = "%0"; ++ else ++ xasm = "jmp\t%A0"; ++ } + +- output_asm_insn (xasm, &call_op); ++ if (output_indirect_p && !direct_p) ++ ix86_output_indirect_branch (call_op, xasm, true); ++ else ++ output_asm_insn (xasm, &call_op); + return ""; + } + +@@ -24131,9 +24627,17 @@ ix86_output_call_insn (rtx insn, rtx call_op) + if (direct_p) + xasm = "call\t%P0"; + else +- xasm = "call\t%A0"; ++ { ++ if (output_indirect_p) ++ xasm = "%0"; ++ else ++ xasm = "call\t%A0"; ++ } + +- output_asm_insn (xasm, &call_op); ++ if (output_indirect_p && !direct_p) ++ ix86_output_indirect_branch (call_op, xasm, false); ++ else ++ output_asm_insn (xasm, &call_op); + + if (seh_nop_p) + return "nop"; +@@ -35436,7 +35940,7 @@ ix86_handle_struct_attribute (tree *node, tree name, + + static tree + ix86_handle_fndecl_attribute (tree *node, tree name, +- tree args ATTRIBUTE_UNUSED, ++ tree args, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) + { + if (TREE_CODE (*node) != FUNCTION_DECL) +@@ -35445,6 +35949,29 @@ ix86_handle_fndecl_attribute (tree *node, tree name, + name); + *no_add_attrs = true; + } ++ ++ if (is_attribute_p ("indirect_branch", name)) ++ { ++ tree cst = TREE_VALUE (args); ++ if (TREE_CODE (cst) != STRING_CST) ++ { ++ warning (OPT_Wattributes, ++ "%qE attribute requires a string constant argument", ++ name); ++ *no_add_attrs = true; ++ } ++ else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) ++ { ++ warning (OPT_Wattributes, ++ "argument to %qE attribute is not " ++ "(keep|thunk|thunk-inline|thunk-extern)", name); ++ *no_add_attrs = true; ++ } ++ } ++ + return NULL_TREE; + } + +@@ -38963,6 +39490,8 @@ static const struct attribute_spec ix86_attribute_table[] = + false }, + { "callee_pop_aggregate_return", 1, 1, false, true, true, + ix86_handle_callee_pop_aggregate_return, true }, ++ { "indirect_branch", 1, 1, true, false, false, ++ ix86_handle_fndecl_attribute, false }, + /* End element. */ + { NULL, 0, 0, false, false, false, NULL, false } + }; +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 87fd381..8183cee7 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2322,6 +2322,13 @@ struct GTY(()) machine_function { + stack below the return address. */ + BOOL_BITFIELD static_chain_on_stack : 1; + ++ /* How to generate indirec branch. */ ++ ENUM_BITFIELD(indirect_branch) indirect_branch_type : 3; ++ ++ /* If true, the current function has local indirect jumps, like ++ "indirect_jump" or "tablejump". */ ++ BOOL_BITFIELD has_local_indirect_jump : 1; ++ + /* During prologue/epilogue generation, the current frame state. + Otherwise, the frame state at the end of the prologue. */ + struct machine_frame_state fs; +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index e09e961..b943849 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -11276,13 +11276,18 @@ + { + if (TARGET_X32) + operands[0] = convert_memory_address (word_mode, operands[0]); ++ cfun->machine->has_local_indirect_jump = true; + }) + + (define_insn "*indirect_jump" + [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rw"))] + "" +- "jmp\t%A0" +- [(set_attr "type" "ibr") ++ "* return ix86_output_indirect_jmp (operands[0], false);" ++ [(set (attr "type") ++ (if_then_else (match_test "(cfun->machine->indirect_branch_type ++ != indirect_branch_keep)") ++ (const_string "multi") ++ (const_string "ibr"))) + (set_attr "length_immediate" "0")]) + + (define_expand "tablejump" +@@ -11324,14 +11329,19 @@ + + if (TARGET_X32) + operands[0] = convert_memory_address (word_mode, operands[0]); ++ cfun->machine->has_local_indirect_jump = true; + }) + + (define_insn "*tablejump_1" + [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rw")) + (use (label_ref (match_operand 1)))] + "" +- "jmp\t%A0" +- [(set_attr "type" "ibr") ++ "* return ix86_output_indirect_jmp (operands[0], false);" ++ [(set (attr "type") ++ (if_then_else (match_test "(cfun->machine->indirect_branch_type ++ != indirect_branch_keep)") ++ (const_string "multi") ++ (const_string "ibr"))) + (set_attr "length_immediate" "0")]) + + ;; Convert setcc + movzbl to xor + setcc if operands don't overlap. +@@ -11773,8 +11783,12 @@ + [(simple_return) + (use (match_operand:SI 0 "register_operand" "r"))] + "reload_completed" +- "jmp\t%A0" +- [(set_attr "type" "ibr") ++ "* return ix86_output_indirect_jmp (operands[0], true);" ++ [(set (attr "type") ++ (if_then_else (match_test "(cfun->machine->indirect_branch_type ++ != indirect_branch_keep)") ++ (const_string "multi") ++ (const_string "ibr"))) + (set_attr "length_immediate" "0")]) + + (define_insn "nop" +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index e93aa5a..0f6965a 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -630,3 +630,23 @@ Support RTM built-in functions and code generation + mpku + Target Report Mask(ISA_PKU) Var(ix86_isa_flags) Save + Support PKU built-in functions and code generation ++ ++mindirect-branch= ++Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_indirect_branch) Init(indirect_branch_keep) ++Convert indirect call and jump to call and return thunks. ++ ++Enum ++Name(indirect_branch) Type(enum indirect_branch) ++Known indirect branch choices (for use with the -mindirect-branch= option): ++ ++EnumValue ++Enum(indirect_branch) String(keep) Value(indirect_branch_keep) ++ ++EnumValue ++Enum(indirect_branch) String(thunk) Value(indirect_branch_thunk) ++ ++EnumValue ++Enum(indirect_branch) String(thunk-inline) Value(indirect_branch_thunk_inline) ++ ++EnumValue ++Enum(indirect_branch) String(thunk-extern) Value(indirect_branch_thunk_extern) +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index e495af5..847991c 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -3811,6 +3811,16 @@ Specify which floating-point unit to use. The + @code{target("fpmath=sse,387")} option must be specified as + @code{target("fpmath=sse+387")} because the comma would separate + different options. ++ ++@item indirect_branch("@var{choice}") ++@cindex @code{indirect_branch} function attribute, x86 ++On x86 targets, the @code{indirect_branch} attribute causes the compiler ++to convert indirect call and jump with @var{choice}. @samp{keep} ++keeps indirect call and jump unmodified. @samp{thunk} converts indirect ++call and jump to call and return thunk. @samp{thunk-inline} converts ++indirect call and jump to inlined call and return thunk. ++@samp{thunk-extern} converts indirect call and jump to external call ++and return thunk provided in a separate object file. + @end table + + On the PowerPC, the following options are allowed: +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 313a6c5..b299fbf 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -657,7 +657,8 @@ Objective-C and Objective-C++ Dialects}. + -mcmodel=@var{code-model} -mabi=@var{name} -maddress-mode=@var{mode} @gol + -m32 -m64 -mx32 -mlarge-data-threshold=@var{num} @gol + -msse2avx -mfentry -m8bit-idiv @gol +--mavx256-split-unaligned-load -mavx256-split-unaligned-store} ++-mavx256-split-unaligned-load -mavx256-split-unaligned-store @gol ++-mindirect-branch=@var{choice}} + + @emph{i386 and x86-64 Windows Options} + @gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol +@@ -14646,6 +14647,17 @@ to 255, 8-bit unsigned integer divide is used instead of + @opindex avx256-split-unaligned-store + Split 32-byte AVX unaligned load and store. + ++@item -mindirect-branch=@var{choice} ++@opindex -mindirect-branch ++Convert indirect call and jump with @var{choice}. The default is ++@samp{keep}, which keeps indirect call and jump unmodified. ++@samp{thunk} converts indirect call and jump to call and return thunk. ++@samp{thunk-inline} converts indirect call and jump to inlined call ++and return thunk. @samp{thunk-extern} converts indirect call and jump ++to external call and return thunk provided in a separate object file. ++You can control this behavior for a specific function by using the ++function attribute @code{indirect_branch}. @xref{Function Attributes}. ++ + @end table + + These @samp{-m} switches are supported in addition to the above +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c +new file mode 100644 +index 0000000..87f6dae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++ ++void ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++} ++ ++/* Our gcc-4.8 based compiler is not as aggressive at sibcalls ++ where the target is in a MEM. Thus we have to scan for different ++ patterns here than in newer compilers. */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c +new file mode 100644 +index 0000000..6bc4f0a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch[256]; ++ ++void ++male_indirect_jump (long offset) ++{ ++ dispatch[offset](offset); ++} ++ ++/* Our gcc-4.8 based compiler is not as aggressive at sibcalls ++ where the target is in a MEM. Thus we have to scan for different ++ patterns here than in newer compilers. */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c +new file mode 100644 +index 0000000..f20d35c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++ ++int ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c +new file mode 100644 +index 0000000..0eff8fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch[256]; ++ ++int ++male_indirect_jump (long offset) ++{ ++ dispatch[offset](offset); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +new file mode 100644 +index 0000000..afdb600 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++ ++void func0 (void); ++void func1 (void); ++void func2 (void); ++void func3 (void); ++void func4 (void); ++void func4 (void); ++void func5 (void); ++ ++void ++bar (int i) ++{ ++ switch (i) ++ { ++ default: ++ func0 (); ++ break; ++ case 1: ++ func1 (); ++ break; ++ case 2: ++ func2 (); ++ break; ++ case 3: ++ func3 (); ++ break; ++ case 4: ++ func4 (); ++ break; ++ case 5: ++ func5 (); ++ break; ++ } ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c +new file mode 100644 +index 0000000..efccdec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++ ++extern void male_indirect_jump (long) ++ __attribute__ ((indirect_branch("thunk"))); ++ ++void ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++} ++ ++/* Our gcc-4.8 based compiler is not as aggressive at sibcalls ++ where the target is in a MEM. Thus we have to scan for different ++ patterns here than in newer compilers. */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c +new file mode 100644 +index 0000000..ca3814e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch[256]; ++ ++__attribute__ ((indirect_branch("thunk"))) ++void ++male_indirect_jump (long offset) ++{ ++ dispatch[offset](offset); ++} ++ ++/* Our gcc-4.8 based compiler is not as aggressive at sibcalls ++ where the target is in a MEM. Thus we have to scan for different ++ patterns here than in newer compilers. */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c +new file mode 100644 +index 0000000..97744d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++extern int male_indirect_jump (long) ++ __attribute__ ((indirect_branch("thunk-inline"))); ++ ++int ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ ++/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c +new file mode 100644 +index 0000000..bfce3ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch[256]; ++ ++__attribute__ ((indirect_branch("thunk-inline"))) ++int ++male_indirect_jump (long offset) ++{ ++ dispatch[offset](offset); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ ++/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c +new file mode 100644 +index 0000000..0833606 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++extern int male_indirect_jump (long) ++ __attribute__ ((indirect_branch("thunk-extern"))); ++ ++int ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c +new file mode 100644 +index 0000000..2eba0fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch[256]; ++ ++__attribute__ ((indirect_branch("thunk-extern"))) ++int ++male_indirect_jump (long offset) ++{ ++ dispatch[offset](offset); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c +new file mode 100644 +index 0000000..f58427e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-pic" } */ ++ ++void func0 (void); ++void func1 (void); ++void func2 (void); ++void func3 (void); ++void func4 (void); ++void func4 (void); ++void func5 (void); ++ ++__attribute__ ((indirect_branch("thunk-extern"))) ++void ++bar (int i) ++{ ++ switch (i) ++ { ++ default: ++ func0 (); ++ break; ++ case 1: ++ func1 (); ++ break; ++ case 2: ++ func2 (); ++ break; ++ case 3: ++ func3 (); ++ break; ++ case 4: ++ func4 (); ++ break; ++ case 5: ++ func5 (); ++ break; ++ } ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c +new file mode 100644 +index 0000000..564ed39 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++ ++void func0 (void); ++void func1 (void); ++void func2 (void); ++void func3 (void); ++void func4 (void); ++void func4 (void); ++void func5 (void); ++ ++__attribute__ ((indirect_branch("keep"))) ++void ++bar (int i) ++{ ++ switch (i) ++ { ++ default: ++ func0 (); ++ break; ++ case 1: ++ func1 (); ++ break; ++ case 2: ++ func2 (); ++ break; ++ case 3: ++ func3 (); ++ break; ++ case 4: ++ func4 (); ++ break; ++ case 5: ++ func5 (); ++ break; ++ } ++} ++ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c +new file mode 100644 +index 0000000..7fd01d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++ ++void ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++} ++ ++/* Our gcc-4.8 based compiler is not as aggressive at sibcalls ++ where the target is in a MEM. Thus we have to scan for different ++ patterns here than in newer compilers. */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c +new file mode 100644 +index 0000000..825f6b2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch[256]; ++ ++void ++male_indirect_jump (long offset) ++{ ++ dispatch[offset](offset); ++} ++ ++/* Our gcc-4.8 based compiler is not as aggressive at sibcalls ++ where the target is in a MEM. Thus we have to scan for different ++ patterns here than in newer compilers. */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c +new file mode 100644 +index 0000000..395634e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++ ++int ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c +new file mode 100644 +index 0000000..fd3f633 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch[256]; ++ ++int ++male_indirect_jump (long offset) ++{ ++ dispatch[offset](offset); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c +new file mode 100644 +index 0000000..6652523 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c +@@ -0,0 +1,43 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */ ++ ++void func0 (void); ++void func1 (void); ++void func2 (void); ++void func3 (void); ++void func4 (void); ++void func4 (void); ++void func5 (void); ++ ++void ++bar (int i) ++{ ++ switch (i) ++ { ++ default: ++ func0 (); ++ break; ++ case 1: ++ func1 (); ++ break; ++ case 2: ++ func2 (); ++ break; ++ case 3: ++ func3 (); ++ break; ++ case 4: ++ func4 (); ++ break; ++ case 5: ++ func5 (); ++ break; ++ } ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c +new file mode 100644 +index 0000000..48c4dd4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++ ++void ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++} ++ ++/* Our gcc-4.8 based compiler is not as aggressive at sibcalls ++ where the target is in a MEM. Thus we have to scan for different ++ patterns here than in newer compilers. */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c +new file mode 100644 +index 0000000..355dad5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch[256]; ++ ++void ++male_indirect_jump (long offset) ++{ ++ dispatch[offset](offset); ++} ++ ++/* Our gcc-4.8 based compiler is not as aggressive at sibcalls ++ where the target is in a MEM. Thus we have to scan for different ++ patterns here than in newer compilers. */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c +new file mode 100644 +index 0000000..244fec7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++ ++int ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ ++/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ ++/* { dg-final { scan-assembler-times {\tpause} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlfence} 1 } } */ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c +new file mode 100644 +index 0000000..107ebe3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch[256]; ++ ++int ++male_indirect_jump (long offset) ++{ ++ dispatch[offset](offset); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ ++/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ ++/* { dg-final { scan-assembler-times {\tpause} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlfence} 1 } } */ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +new file mode 100644 +index 0000000..d02b1dc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */ ++ ++void func0 (void); ++void func1 (void); ++void func2 (void); ++void func3 (void); ++void func4 (void); ++void func4 (void); ++void func5 (void); ++ ++void ++bar (int i) ++{ ++ switch (i) ++ { ++ default: ++ func0 (); ++ break; ++ case 1: ++ func1 (); ++ break; ++ case 2: ++ func2 (); ++ break; ++ case 3: ++ func3 (); ++ break; ++ case 4: ++ func4 (); ++ break; ++ case 5: ++ func5 (); ++ break; ++ } ++} ++ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ diff --git a/gcc48-rh1535655-3.patch b/gcc48-rh1535655-3.patch new file mode 100644 index 0000000..903a76b --- /dev/null +++ b/gcc48-rh1535655-3.patch @@ -0,0 +1,1096 @@ +commit 2ad2b4c4d8f0776012d36f1f3ae17c5fef55c7f9 +Author: root +Date: Thu Jan 18 17:43:15 2018 -0500 + + HJ's patch #2 + +diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h +index ecdf108..4e4b210 100644 +--- a/gcc/config/i386/i386-protos.h ++++ b/gcc/config/i386/i386-protos.h +@@ -307,6 +307,7 @@ extern enum attr_cpu ix86_schedule; + + extern const char * ix86_output_call_insn (rtx insn, rtx call_op); + extern const char * ix86_output_indirect_jmp (rtx call_op, bool ret_p); ++extern const char * ix86_output_function_return (bool long_p); + + #ifdef RTX_CODE + /* Target data for multipass lookahead scheduling. +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index ebc9a90..9dffd02f 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -4635,6 +4635,31 @@ ix86_set_indirect_branch_type (tree fndecl) + else + cfun->machine->indirect_branch_type = ix86_indirect_branch; + } ++ ++ if (cfun->machine->function_return_type == indirect_branch_unset) ++ { ++ tree attr = lookup_attribute ("function_return", ++ DECL_ATTRIBUTES (fndecl)); ++ if (attr != NULL) ++ { ++ tree args = TREE_VALUE (attr); ++ if (args == NULL) ++ gcc_unreachable (); ++ tree cst = TREE_VALUE (args); ++ if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0) ++ cfun->machine->function_return_type = indirect_branch_keep; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0) ++ cfun->machine->function_return_type = indirect_branch_thunk; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0) ++ cfun->machine->function_return_type = indirect_branch_thunk_inline; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0) ++ cfun->machine->function_return_type = indirect_branch_thunk_extern; ++ else ++ gcc_unreachable (); ++ } ++ else ++ cfun->machine->function_return_type = ix86_function_return; ++ } + } + + /* Remember the last target of ix86_set_current_function. */ +@@ -8733,8 +8758,11 @@ static int indirect_thunks_used; + /* Fills in the label name that should be used for the indirect thunk. */ + + static void +-indirect_thunk_name (char name[32], int regno) ++indirect_thunk_name (char name[32], int regno, bool ret_p) + { ++ if (regno >= 0 && ret_p) ++ gcc_unreachable (); ++ + if (USE_HIDDEN_LINKONCE) + { + if (regno >= 0) +@@ -8748,14 +8776,22 @@ indirect_thunk_name (char name[32], int regno) + reg_prefix, reg_names[regno]); + } + else +- sprintf (name, "__x86_indirect_thunk"); ++ { ++ const char *ret = ret_p ? "return" : "indirect"; ++ sprintf (name, "__x86_%s_thunk", ret); ++ } + } + else + { + if (regno >= 0) + ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno); + else +- ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0); ++ { ++ if (ret_p) ++ ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0); ++ else ++ ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0); ++ } + } + } + +@@ -8841,7 +8877,7 @@ output_indirect_thunk_function (int regno) + tree decl; + + /* Create __x86_indirect_thunk. */ +- indirect_thunk_name (name, regno); ++ indirect_thunk_name (name, regno, false); + decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, + get_identifier (name), + build_function_type_list (void_type_node, NULL_TREE)); +@@ -8885,6 +8921,36 @@ output_indirect_thunk_function (int regno) + ASM_OUTPUT_LABEL (asm_out_file, name); + } + ++ if (regno < 0) ++ { ++ /* Create alias for __x86.return_thunk/__x86.return_thunk_bnd. */ ++ char alias[32]; ++ ++ indirect_thunk_name (alias, regno, true); ++#if TARGET_MACHO ++ if (TARGET_MACHO) ++ { ++ fputs ("\t.weak_definition\t", asm_out_file); ++ assemble_name (asm_out_file, alias); ++ fputs ("\n\t.private_extern\t", asm_out_file); ++ assemble_name (asm_out_file, alias); ++ putc ('\n', asm_out_file); ++ ASM_OUTPUT_LABEL (asm_out_file, alias); ++ } ++#else ++ ASM_OUTPUT_DEF (asm_out_file, alias, name); ++ if (USE_HIDDEN_LINKONCE) ++ { ++ fputs ("\t.globl\t", asm_out_file); ++ assemble_name (asm_out_file, alias); ++ putc ('\n', asm_out_file); ++ fputs ("\t.hidden\t", asm_out_file); ++ assemble_name (asm_out_file, alias); ++ putc ('\n', asm_out_file); ++ } ++#endif ++ } ++ + DECL_INITIAL (decl) = make_node (BLOCK); + current_function_decl = decl; + allocate_struct_function (decl, false); +@@ -24353,7 +24419,7 @@ ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p) + i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1); + indirect_thunks_used |= 1 << i; + } +- indirect_thunk_name (thunk_name_buf, regno); ++ indirect_thunk_name (thunk_name_buf, regno, false); + thunk_name = thunk_name_buf; + } + else +@@ -24437,7 +24503,7 @@ ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm, + { + if (cfun->machine->indirect_branch_type == indirect_branch_thunk) + indirect_thunk_needed = true; +- indirect_thunk_name (thunk_name_buf, regno); ++ indirect_thunk_name (thunk_name_buf, regno, false); + thunk_name = thunk_name_buf; + } + else +@@ -24559,6 +24625,37 @@ ix86_output_indirect_jmp (rtx call_op, bool ret_p) + return "jmp\t%A0"; + } + ++/* Output function return. CALL_OP is the jump target. Add a REP ++ prefix to RET if LONG_P is true and function return is kept. */ ++ ++const char * ++ix86_output_function_return (bool long_p) ++{ ++ if (cfun->machine->function_return_type != indirect_branch_keep) ++ { ++ char thunk_name[32]; ++ ++ if (cfun->machine->function_return_type ++ != indirect_branch_thunk_inline) ++ { ++ bool need_thunk = (cfun->machine->function_return_type ++ == indirect_branch_thunk); ++ indirect_thunk_name (thunk_name, -1, true); ++ indirect_thunk_needed |= need_thunk; ++ fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name); ++ } ++ else ++ output_indirect_thunk (-1); ++ ++ return ""; ++ } ++ ++ if (!long_p) ++ return "ret"; ++ ++ return "rep%; ret"; ++} ++ + /* Output the assembly for a call instruction. */ + + const char * +@@ -35972,6 +36069,28 @@ ix86_handle_fndecl_attribute (tree *node, tree name, + } + } + ++ if (is_attribute_p ("function_return", name)) ++ { ++ tree cst = TREE_VALUE (args); ++ if (TREE_CODE (cst) != STRING_CST) ++ { ++ warning (OPT_Wattributes, ++ "%qE attribute requires a string constant argument", ++ name); ++ *no_add_attrs = true; ++ } ++ else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) ++ { ++ warning (OPT_Wattributes, ++ "argument to %qE attribute is not " ++ "(keep|thunk|thunk-inline|thunk-extern)", name); ++ *no_add_attrs = true; ++ } ++ } ++ + return NULL_TREE; + } + +@@ -39492,6 +39611,9 @@ static const struct attribute_spec ix86_attribute_table[] = + ix86_handle_callee_pop_aggregate_return, true }, + { "indirect_branch", 1, 1, true, false, false, + ix86_handle_fndecl_attribute, false }, ++ { "function_return", 1, 1, true, false, false, ++ ix86_handle_fndecl_attribute, false }, ++ + /* End element. */ + { NULL, 0, 0, false, false, false, NULL, false } + }; +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 8183cee7..8ff7026 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2329,6 +2329,9 @@ struct GTY(()) machine_function { + "indirect_jump" or "tablejump". */ + BOOL_BITFIELD has_local_indirect_jump : 1; + ++ /* How to generate function return. */ ++ ENUM_BITFIELD(indirect_branch) function_return_type : 3; ++ + /* During prologue/epilogue generation, the current frame state. + Otherwise, the frame state at the end of the prologue. */ + struct machine_frame_state fs; +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index b943849..ef16cf5 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -11749,7 +11749,7 @@ + (define_insn "simple_return_internal" + [(simple_return)] + "reload_completed" +- "ret" ++ "* return ix86_output_function_return (false);" + [(set_attr "length" "1") + (set_attr "atom_unit" "jeu") + (set_attr "length_immediate" "0") +@@ -11762,7 +11762,7 @@ + [(simple_return) + (unspec [(const_int 0)] UNSPEC_REP)] + "reload_completed" +- "rep%; ret" ++ "* return ix86_output_function_return (true);" + [(set_attr "length" "2") + (set_attr "atom_unit" "jeu") + (set_attr "length_immediate" "0") +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index 0f6965a..9dfa2cb 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -635,9 +635,13 @@ mindirect-branch= + Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_indirect_branch) Init(indirect_branch_keep) + Convert indirect call and jump to call and return thunks. + ++mfunction-return= ++Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_function_return) Init(indirect_branch_keep) ++Convert function return to call and return thunk. ++ + Enum + Name(indirect_branch) Type(enum indirect_branch) +-Known indirect branch choices (for use with the -mindirect-branch= option): ++Known indirect branch choices (for use with the -mindirect-branch=/-mfunction-return= options): + + EnumValue + Enum(indirect_branch) String(keep) Value(indirect_branch_keep) +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 847991c..add4f18 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -3821,6 +3821,15 @@ call and jump to call and return thunk. @samp{thunk-inline} converts + indirect call and jump to inlined call and return thunk. + @samp{thunk-extern} converts indirect call and jump to external call + and return thunk provided in a separate object file. ++ ++@item function_return("@var{choice}") ++@cindex @code{function_return} function attribute, x86 ++On x86 targets, the @code{function_return} attribute causes the compiler ++to convert function return with @var{choice}. @samp{keep} keeps function ++return unmodified. @samp{thunk} converts function return to call and ++return thunk. @samp{thunk-inline} converts function return to inlined ++call and return thunk. @samp{thunk-extern} converts function return to ++external call and return thunk provided in a separate object file. + @end table + + On the PowerPC, the following options are allowed: +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index b299fbf..5acd23a 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -658,7 +658,7 @@ Objective-C and Objective-C++ Dialects}. + -m32 -m64 -mx32 -mlarge-data-threshold=@var{num} @gol + -msse2avx -mfentry -m8bit-idiv @gol + -mavx256-split-unaligned-load -mavx256-split-unaligned-store @gol +--mindirect-branch=@var{choice}} ++-mindirect-branch=@var{choice} -mfunction-return==@var{choice}} + + @emph{i386 and x86-64 Windows Options} + @gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol +@@ -14658,6 +14658,17 @@ to external call and return thunk provided in a separate object file. + You can control this behavior for a specific function by using the + function attribute @code{indirect_branch}. @xref{Function Attributes}. + ++@item -mfunction-return=@var{choice} ++@opindex -mfunction-return ++Convert function return with @var{choice}. The default is @samp{keep}, ++which keeps function return unmodified. @samp{thunk} converts function ++return to call and return thunk. @samp{thunk-inline} converts function ++return to inlined call and return thunk. @samp{thunk-extern} converts ++function return to external call and return thunk provided in a separate ++object file. You can control this behavior for a specific function by ++using the function attribute @code{function_return}. ++@xref{Function Attributes}. ++ + @end table + + These @samp{-m} switches are supported in addition to the above +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c +index 87f6dae..034b4cc 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c +index 6bc4f0a..e0c57cb 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c +index f20d35c..3c0d4c3 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +@@ -12,7 +12,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ + /* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c +index 0eff8fb..14d4ef6 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +@@ -12,7 +12,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ + /* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +index afdb600..bc6b47a 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + void func0 (void); + void func1 (void); +@@ -35,7 +35,7 @@ bar (int i) + } + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c +index efccdec..7c45142 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c +index ca3814e..9eebc84 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c +index 97744d6..f938db0 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +@@ -14,7 +14,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler {\tpause} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c +index bfce3ea..4e58599 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +@@ -13,7 +13,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler {\tpause} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c +index 0833606..b8d5024 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +@@ -14,7 +14,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c +index 2eba0fb..455adab 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +@@ -13,7 +13,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c +index f58427e..4595b84 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ + + void func0 (void); + void func1 (void); +@@ -36,7 +36,7 @@ bar (int i) + } + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" } } */ + /* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c +index 564ed39..d730d31 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + void func0 (void); + void func1 (void); +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c +index 7fd01d6..f424181 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c +index 825f6b2..ac54868 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c +index 395634e..06ebf1c 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +@@ -12,7 +12,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c +index fd3f633..1c8f944 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +@@ -12,7 +12,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c +index 6652523..86e9fd1 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ + + void func0 (void); + void func1 (void); +@@ -35,7 +35,7 @@ bar (int i) + } + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ + /* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c +index 48c4dd4..4117a35 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c +index 355dad5..650d55c 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c +index 244fec7..9540996 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +@@ -12,7 +12,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times {\tpause} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c +index 107ebe3..f3db6e2 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +@@ -12,7 +12,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times {\tpause} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +index d02b1dc..764a375 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ + + void func0 (void); + void func1 (void); +@@ -35,7 +35,7 @@ bar (int i) + } + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-1.c b/gcc/testsuite/gcc.target/i386/ret-thunk-1.c +new file mode 100644 +index 0000000..7223f67 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=thunk" } */ ++ ++void ++foo (void) ++{ ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-10.c b/gcc/testsuite/gcc.target/i386/ret-thunk-10.c +new file mode 100644 +index 0000000..3a6727b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-10.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=thunk-inline -mindirect-branch=thunk -fno-pic" } */ ++ ++extern void (*bar) (void); ++ ++int ++foo (void) ++{ ++ bar (); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler-times {\tpause} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlfence} 2 } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "__x86_indirect_thunk:" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ ++/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" { target { x32 } } } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-11.c b/gcc/testsuite/gcc.target/i386/ret-thunk-11.c +new file mode 100644 +index 0000000..b8f6818 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-11.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=thunk-extern -mindirect-branch=thunk -fno-pic" } */ ++ ++extern void (*bar) (void); ++ ++int ++foo (void) ++{ ++ bar (); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler-times {\tpause} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlfence} 1 } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "__x86_indirect_thunk:" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ ++/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" { target { x32 } } } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-12.c b/gcc/testsuite/gcc.target/i386/ret-thunk-12.c +new file mode 100644 +index 0000000..01b0a02 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-12.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ ++ ++extern void (*bar) (void); ++ ++int ++foo (void) ++{ ++ bar (); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler-times {\tpause} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlfence} 1 } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "__x86_indirect_thunk:" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ ++/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" { target { x32 } } } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-13.c b/gcc/testsuite/gcc.target/i386/ret-thunk-13.c +new file mode 100644 +index 0000000..4b497b5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-13.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ ++ ++extern void (*bar) (void); ++extern int foo (void) __attribute__ ((function_return("thunk"))); ++ ++int ++foo (void) ++{ ++ bar (); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler-times {\tpause} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlfence} 2 } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ ++/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 3 } } */ ++/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 3 } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_indirect_thunk" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-14.c b/gcc/testsuite/gcc.target/i386/ret-thunk-14.c +new file mode 100644 +index 0000000..4ae4c44 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-14.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ ++ ++extern void (*bar) (void); ++ ++__attribute__ ((function_return("thunk-inline"))) ++int ++foo (void) ++{ ++ bar (); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler-times {\tpause} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlfence} 1 } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-15.c b/gcc/testsuite/gcc.target/i386/ret-thunk-15.c +new file mode 100644 +index 0000000..5b5bc76 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-15.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=keep -fno-pic" } */ ++ ++extern void (*bar) (void); ++ ++__attribute__ ((function_return("thunk-extern"), indirect_branch("thunk"))) ++int ++foo (void) ++{ ++ bar (); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-times {\tpause} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlfence} 1 } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-16.c b/gcc/testsuite/gcc.target/i386/ret-thunk-16.c +new file mode 100644 +index 0000000..a16cad1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-16.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=thunk-inline -mindirect-branch=thunk-extern -fno-pic" } */ ++ ++extern void (*bar) (void); ++ ++__attribute__ ((function_return("keep"), indirect_branch("keep"))) ++int ++foo (void) ++{ ++ bar (); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ ++/* { dg-final { scan-assembler-not "__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-2.c b/gcc/testsuite/gcc.target/i386/ret-thunk-2.c +new file mode 100644 +index 0000000..c6659e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-2.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=thunk-inline" } */ ++ ++void ++foo (void) ++{ ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-3.c b/gcc/testsuite/gcc.target/i386/ret-thunk-3.c +new file mode 100644 +index 0000000..0f7f388 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-3.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=thunk-extern" } */ ++ ++void ++foo (void) ++{ ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-4.c b/gcc/testsuite/gcc.target/i386/ret-thunk-4.c +new file mode 100644 +index 0000000..9ae37e8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-4.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=keep" } */ ++ ++void ++foo (void) ++{ ++} ++ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-5.c b/gcc/testsuite/gcc.target/i386/ret-thunk-5.c +new file mode 100644 +index 0000000..4bd0d2a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-5.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=keep" } */ ++ ++extern void foo (void) __attribute__ ((function_return("thunk"))); ++ ++void ++foo (void) ++{ ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-6.c b/gcc/testsuite/gcc.target/i386/ret-thunk-6.c +new file mode 100644 +index 0000000..053841f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-6.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=keep" } */ ++ ++__attribute__ ((function_return("thunk-inline"))) ++void ++foo (void) ++{ ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-7.c b/gcc/testsuite/gcc.target/i386/ret-thunk-7.c +new file mode 100644 +index 0000000..262e678 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-7.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=keep" } */ ++ ++__attribute__ ((function_return("thunk-extern"))) ++void ++foo (void) ++{ ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-8.c b/gcc/testsuite/gcc.target/i386/ret-thunk-8.c +new file mode 100644 +index 0000000..c1658e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-8.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=thunk-inline" } */ ++ ++extern void foo (void) __attribute__ ((function_return("keep"))); ++ ++void ++foo (void) ++{ ++} ++ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-9.c b/gcc/testsuite/gcc.target/i386/ret-thunk-9.c +new file mode 100644 +index 0000000..fa24a1f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-9.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mfunction-return=thunk -mindirect-branch=thunk -fno-pic" } */ ++ ++extern void (*bar) (void); ++ ++int ++foo (void) ++{ ++ bar (); ++ return 0; ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "__x86_indirect_thunk:" } } */ ++/* { dg-final { scan-assembler-times {\tpause} 1 { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times {\tlfence} 1 { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ ++/* { dg-final { scan-assembler-times {\tpause} 2 { target { x32 } } } } */ ++/* { dg-final { scan-assembler-times {\tlfence} 2 { target { x32 } } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ diff --git a/gcc48-rh1535655-4.patch b/gcc48-rh1535655-4.patch new file mode 100644 index 0000000..9856ee5 --- /dev/null +++ b/gcc48-rh1535655-4.patch @@ -0,0 +1,521 @@ +commit 94695137d1ea3c094dd37ab5b73d66b09639f3f4 +Author: hjl +Date: Tue Jan 16 11:17:49 2018 +0000 + + HJ patch #3 + +diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md +index d567fd7..43faabb 100644 +--- a/gcc/config/i386/constraints.md ++++ b/gcc/config/i386/constraints.md +@@ -135,7 +135,8 @@ + + (define_constraint "w" + "@internal Call memory operand." +- (and (not (match_test "TARGET_X32")) ++ (and (not (match_test "ix86_indirect_branch_register")) ++ (not (match_test "TARGET_X32")) + (match_operand 0 "memory_operand"))) + + ;; Integer constant constraints. +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index ef16cf5..228f8f6 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -11274,7 +11274,7 @@ + [(set (pc) (match_operand 0 "indirect_branch_operand"))] + "" + { +- if (TARGET_X32) ++ if (TARGET_X32 || ix86_indirect_branch_register) + operands[0] = convert_memory_address (word_mode, operands[0]); + cfun->machine->has_local_indirect_jump = true; + }) +@@ -11327,7 +11327,7 @@ + OPTAB_DIRECT); + } + +- if (TARGET_X32) ++ if (TARGET_X32 || ix86_indirect_branch_register) + operands[0] = convert_memory_address (word_mode, operands[0]); + cfun->machine->has_local_indirect_jump = true; + }) +@@ -11514,7 +11514,7 @@ + }) + + (define_insn "*call_pop" +- [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) ++ [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lwz")) + (match_operand 1)) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) +@@ -11612,7 +11612,7 @@ + + (define_insn "*call_value_pop" + [(set (match_operand 0) +- (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) ++ (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lwz")) + (match_operand 2))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index 9dfa2cb..0a8ae8f 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -654,3 +654,7 @@ Enum(indirect_branch) String(thunk-inline) Value(indirect_branch_thunk_inline) + + EnumValue + Enum(indirect_branch) String(thunk-extern) Value(indirect_branch_thunk_extern) ++ ++mindirect-branch-register ++Target Report Var(ix86_indirect_branch_register) Init(0) ++Force indirect call and jump via register. +diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md +index 61614e1..6c7a593 100644 +--- a/gcc/config/i386/predicates.md ++++ b/gcc/config/i386/predicates.md +@@ -540,7 +540,8 @@ + ;; Test for a valid operand for indirect branch. + (define_predicate "indirect_branch_operand" + (ior (match_operand 0 "register_operand") +- (and (not (match_test "TARGET_X32")) ++ (and (not (match_test "ix86_indirect_branch_register")) ++ (not (match_test "TARGET_X32")) + (match_operand 0 "memory_operand")))) + + ;; Test for a valid operand for a call instruction. +@@ -549,8 +550,9 @@ + (ior (match_test "constant_call_address_operand + (op, mode == VOIDmode ? mode : Pmode)") + (match_operand 0 "call_register_no_elim_operand") +- (and (not (match_test "TARGET_X32")) +- (match_operand 0 "memory_operand")))) ++ (and (not (match_test "ix86_indirect_branch_register")) ++ (and (not (match_test "TARGET_X32")) ++ (match_operand 0 "memory_operand"))))) + + ;; Similarly, but for tail calls, in which we cannot allow memory references. + (define_special_predicate "sibcall_insn_operand" +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 5acd23a..4a365c7 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -658,7 +658,8 @@ Objective-C and Objective-C++ Dialects}. + -m32 -m64 -mx32 -mlarge-data-threshold=@var{num} @gol + -msse2avx -mfentry -m8bit-idiv @gol + -mavx256-split-unaligned-load -mavx256-split-unaligned-store @gol +--mindirect-branch=@var{choice} -mfunction-return==@var{choice}} ++-mindirect-branch=@var{choice} -mfunction-return==@var{choice} ++-mindirect-branch-register} + + @emph{i386 and x86-64 Windows Options} + @gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol +@@ -14669,6 +14670,10 @@ object file. You can control this behavior for a specific function by + using the function attribute @code{function_return}. + @xref{Function Attributes}. + ++@item -mindirect-branch-register ++@opindex -mindirect-branch-register ++Force indirect call and jump via register. ++ + @end table + + These @samp{-m} switches are supported in addition to the above +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c +index 034b4cc..321db77 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c +index e0c57cb..d584516 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c +index 3c0d4c3..9e24a38 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mno-indirect-branch-register -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c +index 14d4ef6..127b5d9 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mno-indirect-branch-register -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +index bc6b47a..17c2d0f 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + void func0 (void); + void func1 (void); +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c +index 7c45142..cd7e8d7 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c +index 9eebc84..4dbd7a5 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c +index f938db0..4aeec18 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c +index 4e58599..ac0e599 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c +index b8d5024..573cf1e 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c +index 455adab..b2b37fc 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c +index 4595b84..4a43e19 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -fno-pic" } */ + + void func0 (void); + void func1 (void); +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c +index f424181..72de88e 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c +index ac54868..d4137b3 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c +index 06ebf1c..d9964c2 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c +index 1c8f944..d4dca4d 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c +index 86e9fd1..aece938 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ + + void func0 (void); + void func1 (void); +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c +index 4117a35..e3cea3f 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c +index 650d55c..6222996 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c +index 9540996..2eef6f3 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c +index f3db6e2..e825a10 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ + + typedef void (*dispatch_t)(long offset); + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +index 764a375..c67066c 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ + + void func0 (void); + void func1 (void); +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-register-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-register-1.c +new file mode 100644 +index 0000000..7d396a3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-register-1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk -mindirect-branch-register -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++ ++void ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "mov\[ \t\](%eax|%rax), \\((%esp|%rsp)\\)" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler-not "push(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" } } */ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk\n" } } */ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk_bnd\n" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-register-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-register-2.c +new file mode 100644 +index 0000000..e7e616b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-register-2.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-inline -mindirect-branch-register -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++ ++void ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "mov\[ \t\](%eax|%rax), \\((%esp|%rsp)\\)" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler-not "push(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" } } */ ++/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-register-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-register-3.c +new file mode 100644 +index 0000000..5320e92 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-register-3.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-extern -mindirect-branch-register -fno-pic" } */ ++ ++typedef void (*dispatch_t)(long offset); ++ ++dispatch_t dispatch; ++ ++void ++male_indirect_jump (long offset) ++{ ++ dispatch(offset); ++} ++ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler-not "push(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" } } */ ++/* { dg-final { scan-assembler-not {\t(pause|pause|nop)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-10.c b/gcc/testsuite/gcc.target/i386/ret-thunk-10.c +index 3a6727b..e6fea84 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-10.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-10.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=thunk-inline -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mno-indirect-branch-register -mfunction-return=thunk-inline -mindirect-branch=thunk -fno-pic" } */ + + extern void (*bar) (void); + +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-11.c b/gcc/testsuite/gcc.target/i386/ret-thunk-11.c +index b8f6818..e239ec4 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-11.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-11.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=thunk-extern -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mno-indirect-branch-register -mno-indirect-branch-register -mno-indirect-branch-register -mfunction-return=thunk-extern -mindirect-branch=thunk -fno-pic" } */ + + extern void (*bar) (void); + +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-12.c b/gcc/testsuite/gcc.target/i386/ret-thunk-12.c +index 01b0a02..fa31813 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-12.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-12.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mno-indirect-branch-register -mno-indirect-branch-register -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ + + extern void (*bar) (void); + +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-13.c b/gcc/testsuite/gcc.target/i386/ret-thunk-13.c +index 4b497b5..fd5b41f 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-13.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-13.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ + + extern void (*bar) (void); + extern int foo (void) __attribute__ ((function_return("thunk"))); +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-14.c b/gcc/testsuite/gcc.target/i386/ret-thunk-14.c +index 4ae4c44..d606373 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-14.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-14.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ + + extern void (*bar) (void); + +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-15.c b/gcc/testsuite/gcc.target/i386/ret-thunk-15.c +index 5b5bc76..75e45e2 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-15.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-15.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=keep -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mno-indirect-branch-register -mno-indirect-branch-register -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=keep -fno-pic" } */ + + extern void (*bar) (void); + +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-9.c b/gcc/testsuite/gcc.target/i386/ret-thunk-9.c +index fa24a1f..d1db41c 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-9.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-9.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mfunction-return=thunk -mindirect-branch=thunk -fno-pic" } */ ++/* { dg-options "-O2 -mno-indirect-branch-register -mno-indirect-branch-register -mfunction-return=thunk -mindirect-branch=thunk -fno-pic" } */ + + extern void (*bar) (void); + diff --git a/gcc48-rh1535655-5.patch b/gcc48-rh1535655-5.patch new file mode 100644 index 0000000..16539c9 --- /dev/null +++ b/gcc48-rh1535655-5.patch @@ -0,0 +1,77 @@ +commit 6effbc703b711779a196e5dbaf6335f39fab71c2 +Author: hjl +Date: Tue Jan 16 11:19:51 2018 +0000 + + HJ patch #4 + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index 9dffd02f..e73389b 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -14497,6 +14497,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, + If CODE is 'h', pretend the reg is the 'high' byte register. + If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. + If CODE is 'd', duplicate the operand for AVX instruction. ++ If CODE is 'V', print naked full integer register name without %. + */ + + void +@@ -14506,7 +14507,7 @@ print_reg (rtx x, int code, FILE *file) + unsigned int regno; + bool duplicated = code == 'd' && TARGET_AVX; + +- if (ASSEMBLER_DIALECT == ASM_ATT) ++ if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V') + putc ('%', file); + + if (x == pc_rtx) +@@ -14542,6 +14543,14 @@ print_reg (rtx x, int code, FILE *file) + else + code = GET_MODE_SIZE (GET_MODE (x)); + ++ if (code == 'V') ++ { ++ if (GENERAL_REGNO_P (regno)) ++ code = GET_MODE_SIZE (word_mode); ++ else ++ error ("'V' modifier on non-integer register"); ++ } ++ + /* Irritatingly, AMD extended registers use different naming convention + from the normal registers: "r%d[bwd]" */ + if (REX_INT_REGNO_P (regno)) +@@ -14695,6 +14704,7 @@ get_some_local_dynamic_name (void) + & -- print some in-use local-dynamic symbol name. + H -- print a memory address offset by 8; used for sse high-parts + Y -- print condition for XOP pcom* instruction. ++ V -- print naked full integer register name without %. + + -- print a branch hint as 'cs' or 'ds' prefix + ; -- print a semicolon (after prefixes due to bug in older gas). + ~ -- print "i" if TARGET_AVX2, "f" otherwise. +@@ -14919,6 +14929,7 @@ ix86_print_operand (FILE *file, rtx x, int code) + case 'X': + case 'P': + case 'p': ++ case 'V': + break; + + case 's': +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-register-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-register-4.c +new file mode 100644 +index 0000000..f0cd9b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-register-4.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mindirect-branch=keep -fno-pic" } */ ++ ++extern void (*func_p) (void); ++ ++void ++foo (void) ++{ ++ asm("call __x86_indirect_thunk_%V0" : : "a" (func_p)); ++} ++ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_eax" { target ia32 } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_rax" { target { ! ia32 } } } } */ diff --git a/gcc48-rh1535655-6.patch b/gcc48-rh1535655-6.patch new file mode 100644 index 0000000..815e0c6 --- /dev/null +++ b/gcc48-rh1535655-6.patch @@ -0,0 +1,233 @@ +commit 5315d05c7295fbb9345d85d6bf7cbe7c975a19c8 +Author: hjl +Date: Tue Jan 16 11:22:01 2018 +0000 + + HJ patch #5 + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index e73389b..15cfe83 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -4634,6 +4634,19 @@ ix86_set_indirect_branch_type (tree fndecl) + } + else + cfun->machine->indirect_branch_type = ix86_indirect_branch; ++ ++ /* -mcmodel=large is not compatible with -mindirect-branch=thunk ++ nor -mindirect-branch=thunk-extern. */ ++ if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) ++ && ((cfun->machine->indirect_branch_type ++ == indirect_branch_thunk_extern) ++ || (cfun->machine->indirect_branch_type ++ == indirect_branch_thunk))) ++ error ("%<-mindirect-branch=%s%> and %<-mcmodel=large%> are not " ++ "compatible", ++ ((cfun->machine->indirect_branch_type ++ == indirect_branch_thunk_extern) ++ ? "thunk-extern" : "thunk")); + } + + if (cfun->machine->function_return_type == indirect_branch_unset) +@@ -4659,6 +4672,19 @@ ix86_set_indirect_branch_type (tree fndecl) + } + else + cfun->machine->function_return_type = ix86_function_return; ++ ++ /* -mcmodel=large is not compatible with -mfunction-return=thunk ++ nor -mfunction-return=thunk-extern. */ ++ if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) ++ && ((cfun->machine->function_return_type ++ == indirect_branch_thunk_extern) ++ || (cfun->machine->function_return_type ++ == indirect_branch_thunk))) ++ error ("%<-mfunction-return=%s%> and %<-mcmodel=large%> are not " ++ "compatible", ++ ((cfun->machine->function_return_type ++ == indirect_branch_thunk_extern) ++ ? "thunk-extern" : "thunk")); + } + } + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 4a365c7..7b33803 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -14659,6 +14659,11 @@ to external call and return thunk provided in a separate object file. + You can control this behavior for a specific function by using the + function attribute @code{indirect_branch}. @xref{Function Attributes}. + ++Note that @option{-mcmodel=large} is incompatible with ++@option{-mindirect-branch=thunk} nor ++@option{-mindirect-branch=thunk-extern} since the thunk function may ++not be reachable in large code model. ++ + @item -mfunction-return=@var{choice} + @opindex -mfunction-return + Convert function return with @var{choice}. The default is @samp{keep}, +@@ -14670,6 +14675,11 @@ object file. You can control this behavior for a specific function by + using the function attribute @code{function_return}. + @xref{Function Attributes}. + ++Note that @option{-mcmodel=large} is incompatible with ++@option{-mfunction-return=thunk} nor ++@option{-mfunction-return=thunk-extern} since the thunk function may ++not be reachable in large code model. ++ + @item -mindirect-branch-register + @opindex -mindirect-branch-register + Force indirect call and jump via register. +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-10.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-10.c +new file mode 100644 +index 0000000..a0674bd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-10.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-inline -mfunction-return=keep -mcmodel=large" } */ ++ ++void ++bar (void) ++{ ++} +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-8.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-8.c +new file mode 100644 +index 0000000..7a80a89 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-8.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O2 -mindirect-branch=thunk -mfunction-return=keep -mcmodel=large" } */ ++ ++void ++bar (void) ++{ /* { dg-error "'-mindirect-branch=thunk' and '-mcmodel=large' are not compatible" } */ ++} +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-9.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-9.c +new file mode 100644 +index 0000000..d4d45c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-9.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O2 -mindirect-branch=thunk-extern -mfunction-return=keep -mcmodel=large" } */ ++ ++void ++bar (void) ++{ /* { dg-error "'-mindirect-branch=thunk-extern' and '-mcmodel=large' are not compatible" } */ ++} +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-10.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-10.c +new file mode 100644 +index 0000000..3a2aead +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-10.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O2 -mindirect-branch=keep -mfunction-return=keep -mcmodel=large" } */ ++/* { dg-additional-options "-fPIC" { target fpic } } */ ++ ++__attribute__ ((indirect_branch("thunk-extern"))) ++void ++bar (void) ++{ /* { dg-error "'-mindirect-branch=thunk-extern' and '-mcmodel=large' are not compatible" } */ ++} +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-11.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-11.c +new file mode 100644 +index 0000000..8e52f03 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-11.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O2 -mindirect-branch=keep -mfunction-return=keep -mcmodel=large" } */ ++/* { dg-additional-options "-fPIC" { target fpic } } */ ++ ++__attribute__ ((indirect_branch("thunk-inline"))) ++void ++bar (void) ++{ ++} +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-9.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-9.c +new file mode 100644 +index 0000000..bdaa4f6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-9.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O2 -mindirect-branch=keep -mfunction-return=keep -mcmodel=large" } */ ++/* { dg-additional-options "-fPIC" { target fpic } } */ ++ ++__attribute__ ((indirect_branch("thunk"))) ++void ++bar (void) ++{ /* { dg-error "'-mindirect-branch=thunk' and '-mcmodel=large' are not compatible" } */ ++} +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-17.c b/gcc/testsuite/gcc.target/i386/ret-thunk-17.c +new file mode 100644 +index 0000000..0605e2c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-17.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O2 -mfunction-return=thunk -mindirect-branch=keep -mcmodel=large" } */ ++ ++void ++bar (void) ++{ /* { dg-error "'-mfunction-return=thunk' and '-mcmodel=large' are not compatible" } */ ++} +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-18.c b/gcc/testsuite/gcc.target/i386/ret-thunk-18.c +new file mode 100644 +index 0000000..307019d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-18.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O2 -mfunction-return=thunk-extern -mindirect-branch=keep -mcmodel=large" } */ ++/* { dg-additional-options "-fPIC" { target fpic } } */ ++ ++void ++bar (void) ++{ /* { dg-error "'-mfunction-return=thunk-extern' and '-mcmodel=large' are not compatible" } */ ++} +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-19.c b/gcc/testsuite/gcc.target/i386/ret-thunk-19.c +new file mode 100644 +index 0000000..772617f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-19.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=keep -mcmodel=large" } */ ++ ++__attribute__ ((function_return("thunk"))) ++void ++bar (void) ++{ /* { dg-error "'-mfunction-return=thunk' and '-mcmodel=large' are not compatible" } */ ++} +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-20.c b/gcc/testsuite/gcc.target/i386/ret-thunk-20.c +new file mode 100644 +index 0000000..1e9f9bd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-20.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=keep -mcmodel=large" } */ ++/* { dg-additional-options "-fPIC" { target fpic } } */ ++ ++__attribute__ ((function_return("thunk-extern"))) ++void ++bar (void) ++{ /* { dg-error "'-mfunction-return=thunk-extern' and '-mcmodel=large' are not compatible" } */ ++} +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-21.c b/gcc/testsuite/gcc.target/i386/ret-thunk-21.c +new file mode 100644 +index 0000000..eea07f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-21.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=keep -mcmodel=large" } */ ++/* { dg-additional-options "-fPIC" { target fpic } } */ ++ ++__attribute__ ((function_return("thunk-inline"))) ++void ++bar (void) ++{ ++} diff --git a/gcc48-rh1537828-1.patch b/gcc48-rh1537828-1.patch new file mode 100644 index 0000000..45ac7eb --- /dev/null +++ b/gcc48-rh1537828-1.patch @@ -0,0 +1,38 @@ +2018-04-10 Segher Boessenkool + + PR target/85287 + * gcc/config/rs6000/rs6000.md (allocate_stack): Put the residual size + for stack clash protection in a register whenever we need it to be in + a register. + + +--- a/gcc/config/rs6000/rs6000.md 2018/04/10 21:09:30 259298 ++++ b/gcc/config/rs6000/rs6000.md 2018/04/10 21:37:34 259299 +@@ -9783,14 +9783,12 @@ + /* Now handle residuals. We just have to set operands[1] correctly + and let the rest of the expander run. */ + operands[1] = residual; +- if (!CONST_INT_P (residual)) +- operands[1] = force_reg (Pmode, operands[1]); + } + +- if (GET_CODE (operands[1]) != CONST_INT +- || INTVAL (operands[1]) < -32767 +- || INTVAL (operands[1]) > 32768) ++ if (!(CONST_INT_P (operands[1]) ++ && IN_RANGE (INTVAL (operands[1]), -32767, 32768))) + { ++ operands[1] = force_reg (Pmode, operands[1]); + neg_op0 = gen_reg_rtx (Pmode); + if (TARGET_32BIT) + emit_insn (gen_negsi2 (neg_op0, operands[1])); +@@ -9798,7 +9796,7 @@ + emit_insn (gen_negdi2 (neg_op0, operands[1])); + } + else +- neg_op0 = GEN_INT (- INTVAL (operands[1])); ++ neg_op0 = GEN_INT (-INTVAL (operands[1])); + + insn = emit_insn ((* ((TARGET_32BIT) ? gen_movsi_update_stack + : gen_movdi_di_update_stack)) + diff --git a/gcc48-rh1537828-10.patch b/gcc48-rh1537828-10.patch new file mode 100644 index 0000000..5097874 --- /dev/null +++ b/gcc48-rh1537828-10.patch @@ -0,0 +1,91 @@ +diff --git a/gcc/testsuite/gcc.dg/stack-check-5.c b/gcc/testsuite/gcc.dg/stack-check-5.c +index 850e023ea4e..604fa3cf6c5 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-5.c ++++ b/gcc/testsuite/gcc.dg/stack-check-5.c +@@ -1,7 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ +-/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector*" } { "" } } */ + + + /* Otherwise the S/390 back-end might save the stack pointer in f2 () +diff --git a/gcc/testsuite/gcc.dg/stack-check-6.c b/gcc/testsuite/gcc.dg/stack-check-6.c +index ab4b0e8894c..fe75612b737 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-6.c ++++ b/gcc/testsuite/gcc.dg/stack-check-6.c +@@ -1,7 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ +-/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector*" } { "" } } */ + + + extern void foo (char *); +diff --git a/gcc/testsuite/gcc.dg/stack-check-6a.c b/gcc/testsuite/gcc.dg/stack-check-6a.c +index 468d649a4fa..8fb9c621585 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-6a.c ++++ b/gcc/testsuite/gcc.dg/stack-check-6a.c +@@ -4,7 +4,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=16" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ +-/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector*" } { "" } } */ + + + #include "stack-check-6.c" +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-11.c b/gcc/testsuite/gcc.target/i386/stack-check-11.c +index fe5b2c2b844..43a291857b6 100644 +--- a/gcc/testsuite/gcc.target/i386/stack-check-11.c ++++ b/gcc/testsuite/gcc.target/i386/stack-check-11.c +@@ -1,6 +1,8 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector*" } { "" } } */ ++ + + #include + +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-17.c b/gcc/testsuite/gcc.target/i386/stack-check-17.c +index dcd29305a2c..da6ea016815 100644 +--- a/gcc/testsuite/gcc.target/i386/stack-check-17.c ++++ b/gcc/testsuite/gcc.target/i386/stack-check-17.c +@@ -1,6 +1,8 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -mtune=generic -fomit-frame-pointer" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector*" } { "" } } */ ++ + + + int x0, x1; +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-18.c b/gcc/testsuite/gcc.target/i386/stack-check-18.c +index 1638f776267..1cf4bbcfafb 100644 +--- a/gcc/testsuite/gcc.target/i386/stack-check-18.c ++++ b/gcc/testsuite/gcc.target/i386/stack-check-18.c +@@ -1,7 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -mtune=generic -fdump-rtl-expand" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ +-/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector*" } { "" } } */ + + int f1 (char *); + +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-19.c b/gcc/testsuite/gcc.target/i386/stack-check-19.c +index c341801189c..49f3a20af8b 100644 +--- a/gcc/testsuite/gcc.target/i386/stack-check-19.c ++++ b/gcc/testsuite/gcc.target/i386/stack-check-19.c +@@ -1,7 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -mtune=generic -fdump-rtl-expand" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ +-/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector*" } { "" } } */ + + int f1 (char *); + diff --git a/gcc48-rh1537828-2.patch b/gcc48-rh1537828-2.patch new file mode 100644 index 0000000..7729d2c --- /dev/null +++ b/gcc48-rh1537828-2.patch @@ -0,0 +1,114 @@ +commit f7765f70e0e254fd9ce4469c7281c69cd06c9467 +Author: law +Date: Wed Jan 24 21:57:16 2018 +0000 + + PR target/83994 + * i386.c (get_probe_interval): Move to earlier point. + (ix86_compute_frame_layout): If -fstack-clash-protection and + the frame is larger than the probe interval, then use pushes + to save registers rather than reg->mem moves. + (ix86_expand_prologue): Remove conditional for int_registers_saved + assertion. + + PR target/83994 + * gcc.target/i386/pr83994.c: New test. + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index 15cfe83..5230227 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -9371,6 +9371,18 @@ ix86_builtin_setjmp_frame_value (void) + return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx; + } + ++/* Return the probing interval for -fstack-clash-protection. */ ++ ++static HOST_WIDE_INT ++get_probe_interval (void) ++{ ++ if (flag_stack_clash_protection) ++ return (HOST_WIDE_INT_1U ++ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); ++ else ++ return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); ++} ++ + /* When using -fsplit-stack, the allocation routines set a field in + the TCB to the bottom of the stack plus this much space, measured + in bytes. */ +@@ -9545,7 +9557,15 @@ ix86_compute_frame_layout (struct ix86_frame *frame) + to_allocate = offset - frame->sse_reg_save_offset; + + if ((!to_allocate && frame->nregs <= 1) +- || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000)) ++ || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000) ++ /* If stack clash probing needs a loop, then it needs a ++ scratch register. But the returned register is only guaranteed ++ to be safe to use after register saves are complete. So if ++ stack clash protections are enabled and the allocated frame is ++ larger than the probe interval, then use pushes to save ++ callee saved registers. */ ++ || (flag_stack_clash_protection && to_allocate > get_probe_interval ())) ++ + frame->save_regs_using_mov = false; + + if (ix86_using_red_zone () +@@ -10181,18 +10201,6 @@ release_scratch_register_on_entry (struct scratch_reg *sr) + } + } + +-/* Return the probing interval for -fstack-clash-protection. */ +- +-static HOST_WIDE_INT +-get_probe_interval (void) +-{ +- if (flag_stack_clash_protection) +- return (HOST_WIDE_INT_1U +- << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); +- else +- return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); +-} +- + /* Emit code to adjust the stack pointer by SIZE bytes while probing it. + + This differs from the next routine in that it tries hard to prevent +@@ -11064,12 +11072,11 @@ ix86_expand_prologue (void) + && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK + || flag_stack_clash_protection)) + { +- /* This assert wants to verify that integer registers were saved +- prior to probing. This is necessary when probing may be implemented +- as a function call (Windows). It is not necessary for stack clash +- protection probing. */ +- if (!flag_stack_clash_protection) +- gcc_assert (int_registers_saved); ++ /* We expect the GP registers to be saved when probes are used ++ as the probing sequences might need a scratch register and ++ the routine to allocate one assumes the integer registers ++ have already been saved. */ ++ gcc_assert (int_registers_saved); + + if (flag_stack_clash_protection) + { +diff --git a/gcc/testsuite/gcc.target/i386/pr83994.c b/gcc/testsuite/gcc.target/i386/pr83994.c +new file mode 100644 +index 0000000..dc0b7cb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr83994.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=i686 -fpic -fstack-clash-protection" } */ ++/* { dg-require-effective-target ia32 } */ ++ ++void f1 (char *); ++ ++__attribute__ ((regparm (3))) ++int ++f2 (int arg1, int arg2, int arg3) ++{ ++ char buf[16384]; ++ f1 (buf); ++ f1 (buf); ++ return 0; ++} ++ diff --git a/gcc48-rh1537828-3.patch b/gcc48-rh1537828-3.patch new file mode 100644 index 0000000..4493a80 --- /dev/null +++ b/gcc48-rh1537828-3.patch @@ -0,0 +1,163 @@ +commit 33839c8f8aa7857cc5f22ddb3f0960999cb0dfc7 +Author: law +Date: Wed Jan 31 05:02:30 2018 +0000 + + PR target/84064 + * i386.c (ix86_adjust_stack_and_probe_stack_clash): New argument + INT_REGISTERS_SAVED. Check it prior to calling + get_scratch_register_on_entry. + (ix86_adjust_stack_and_probe): Similarly. + (ix86_emit_probe_stack_range): Similarly. + (ix86_expand_prologue): Corresponding changes. + + PR target/84064 + * gcc.target/i386/pr84064: New test. + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index 5230227..2fe2a0c 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -10206,10 +10206,14 @@ release_scratch_register_on_entry (struct scratch_reg *sr) + This differs from the next routine in that it tries hard to prevent + attacks that jump the stack guard. Thus it is never allowed to allocate + more than PROBE_INTERVAL bytes of stack space without a suitable +- probe. */ ++ probe. ++ ++ INT_REGISTERS_SAVED is true if integer registers have already been ++ pushed on the stack. */ + + static void +-ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) ++ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size, ++ const bool int_registers_saved) + { + struct machine_function *m = cfun->machine; + struct ix86_frame frame; +@@ -10318,6 +10322,12 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) + } + else + { ++ /* We expect the GP registers to be saved when probes are used ++ as the probing sequences might need a scratch register and ++ the routine to allocate one assumes the integer registers ++ have already been saved. */ ++ gcc_assert (int_registers_saved); ++ + struct scratch_reg sr; + get_scratch_register_on_entry (&sr); + +@@ -10376,10 +10386,14 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) + emit_insn (gen_blockage ()); + } + +-/* Emit code to adjust the stack pointer by SIZE bytes while probing it. */ ++/* Emit code to adjust the stack pointer by SIZE bytes while probing it. ++ ++ INT_REGISTERS_SAVED is true if integer registers have already been ++ pushed on the stack. */ + + static void +-ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) ++ix86_adjust_stack_and_probe (const HOST_WIDE_INT size, ++ const bool int_registers_saved) + { + /* We skip the probe for the first interval + a small dope of 4 words and + probe that many bytes past the specified size to maintain a protection +@@ -10440,6 +10454,12 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) + equality test for the loop condition. */ + else + { ++ /* We expect the GP registers to be saved when probes are used ++ as the probing sequences might need a scratch register and ++ the routine to allocate one assumes the integer registers ++ have already been saved. */ ++ gcc_assert (int_registers_saved); ++ + HOST_WIDE_INT rounded_size; + struct scratch_reg sr; + +@@ -10564,10 +10584,14 @@ output_adjust_stack_and_probe (rtx reg) + } + + /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, +- inclusive. These are offsets from the current stack pointer. */ ++ inclusive. These are offsets from the current stack pointer. ++ ++ INT_REGISTERS_SAVED is true if integer registers have already been ++ pushed on the stack. */ + + static void +-ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) ++ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, ++ const bool int_registers_saved) + { + /* See if we have a constant small number of probes to generate. If so, + that's the easy case. The run-time loop is made up of 7 insns in the +@@ -10595,6 +10619,12 @@ ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) + equality test for the loop condition. */ + else + { ++ /* We expect the GP registers to be saved when probes are used ++ as the probing sequences might need a scratch register and ++ the routine to allocate one assumes the integer registers ++ have already been saved. */ ++ gcc_assert (int_registers_saved); ++ + HOST_WIDE_INT rounded_size, last; + struct scratch_reg sr; + +@@ -11072,20 +11102,15 @@ ix86_expand_prologue (void) + && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK + || flag_stack_clash_protection)) + { +- /* We expect the GP registers to be saved when probes are used +- as the probing sequences might need a scratch register and +- the routine to allocate one assumes the integer registers +- have already been saved. */ +- gcc_assert (int_registers_saved); +- + if (flag_stack_clash_protection) + { +- ix86_adjust_stack_and_probe_stack_clash (allocate); ++ ix86_adjust_stack_and_probe_stack_clash (allocate, ++ int_registers_saved); + allocate = 0; + } + else if (STACK_CHECK_MOVING_SP) + { +- ix86_adjust_stack_and_probe (allocate); ++ ix86_adjust_stack_and_probe (allocate, int_registers_saved); + allocate = 0; + } + else +@@ -11096,9 +11121,11 @@ ix86_expand_prologue (void) + size = 0x80000000 - get_stack_check_protect () - 1; + + if (TARGET_STACK_PROBE) +- ix86_emit_probe_stack_range (0, size + get_stack_check_protect ()); ++ ix86_emit_probe_stack_range (0, size + get_stack_check_protect (), ++ int_registers_saved); + else +- ix86_emit_probe_stack_range (get_stack_check_protect (), size); ++ ix86_emit_probe_stack_range (get_stack_check_protect (), size, ++ int_registers_saved); + } + } + +diff --git a/gcc/testsuite/gcc.target/i386/pr84064.c b/gcc/testsuite/gcc.target/i386/pr84064.c +new file mode 100644 +index 0000000..01f8d9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr84064.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=i686 -fstack-clash-protection" } */ ++/* { dg-require-effective-target ia32 } */ ++ ++void ++f (void *p1, void *p2) ++{ ++ __builtin_memcpy (p1, p2, 1000); ++} ++ diff --git a/gcc48-rh1537828-4.patch b/gcc48-rh1537828-4.patch new file mode 100644 index 0000000..4750f19 --- /dev/null +++ b/gcc48-rh1537828-4.patch @@ -0,0 +1,182 @@ +commit 14041afe24556efd5845564aa183b6451fd9d6cc +Author: law +Date: Thu Feb 1 16:22:56 2018 +0000 + + PR target/84128 + * config/i386/i386.c (release_scratch_register_on_entry): Add new + OFFSET and RELEASE_VIA_POP arguments. Use SP+OFFSET to restore + the scratch if RELEASE_VIA_POP is false. + (ix86_adjust_stack_and_probe_stack_clash): Un-constify SIZE. + If we have to save a temporary register, decrement SIZE appropriately. + Pass new arguments to release_scratch_register_on_entry. + (ix86_adjust_stack_and_probe): Likewise. + (ix86_emit_probe_stack_range): Pass new arguments to + release_scratch_register_on_entry. + + PR target/84128 + * gcc.target/i386/pr84128.c: New test. + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index 2fe2a0c..c25d26c 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -10182,22 +10182,39 @@ get_scratch_register_on_entry (struct scratch_reg *sr) + } + } + +-/* Release a scratch register obtained from the preceding function. */ ++/* Release a scratch register obtained from the preceding function. ++ ++ If RELEASE_VIA_POP is true, we just pop the register off the stack ++ to release it. This is what non-Linux systems use with -fstack-check. ++ ++ Otherwise we use OFFSET to locate the saved register and the ++ allocated stack space becomes part of the local frame and is ++ deallcated by the epilogue. */ + + static void +-release_scratch_register_on_entry (struct scratch_reg *sr) ++release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset, ++ bool release_via_pop) + { + if (sr->saved) + { +- struct machine_function *m = cfun->machine; +- rtx x, insn = emit_insn (gen_pop (sr->reg)); ++ if (release_via_pop) ++ { ++ struct machine_function *m = cfun->machine; ++ rtx x, insn = emit_insn (gen_pop (sr->reg)); + +- /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */ +- RTX_FRAME_RELATED_P (insn) = 1; +- x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD)); +- x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); +- m->fs.sp_offset -= UNITS_PER_WORD; ++ /* The RTX FRAME_RELATED_P mechanism doesn't know about pop. */ ++ RTX_FRAME_RELATED_P (insn) = 1; ++ x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD)); ++ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); ++ m->fs.sp_offset -= UNITS_PER_WORD; ++ } ++ else ++ { ++ rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); ++ x = gen_rtx_SET (VOIDmode, sr->reg, gen_rtx_MEM (word_mode, x)); ++ emit_insn (x); ++ } + } + } + +@@ -10212,7 +10229,7 @@ release_scratch_register_on_entry (struct scratch_reg *sr) + pushed on the stack. */ + + static void +-ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size, ++ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size, + const bool int_registers_saved) + { + struct machine_function *m = cfun->machine; +@@ -10331,6 +10348,12 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size, + struct scratch_reg sr; + get_scratch_register_on_entry (&sr); + ++ /* If we needed to save a register, then account for any space ++ that was pushed (we are not going to pop the register when ++ we do the restore). */ ++ if (sr.saved) ++ size -= UNITS_PER_WORD; ++ + /* Step 1: round SIZE down to a multiple of the interval. */ + HOST_WIDE_INT rounded_size = size & -probe_interval; + +@@ -10379,7 +10402,9 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size, + m->fs.cfa_reg == stack_pointer_rtx); + dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); + +- release_scratch_register_on_entry (&sr); ++ /* This does not deallocate the space reserved for the scratch ++ register. That will be deallocated in the epilogue. */ ++ release_scratch_register_on_entry (&sr, size, false); + } + + /* Make sure nothing is scheduled before we are done. */ +@@ -10392,7 +10417,7 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size, + pushed on the stack. */ + + static void +-ix86_adjust_stack_and_probe (const HOST_WIDE_INT size, ++ix86_adjust_stack_and_probe (HOST_WIDE_INT size, + const bool int_registers_saved) + { + /* We skip the probe for the first interval + a small dope of 4 words and +@@ -10465,6 +10490,11 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size, + + get_scratch_register_on_entry (&sr); + ++ /* If we needed to save a register, then account for any space ++ that was pushed (we are not going to pop the register when ++ we do the restore). */ ++ if (sr.saved) ++ size -= UNITS_PER_WORD; + + /* Step 1: round SIZE to the previous multiple of the interval. */ + +@@ -10516,7 +10546,9 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size, + (get_probe_interval () + + dope)))); + +- release_scratch_register_on_entry (&sr); ++ /* This does not deallocate the space reserved for the scratch ++ register. That will be deallocated in the epilogue. */ ++ release_scratch_register_on_entry (&sr, size, false); + } + + gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx); +@@ -10669,7 +10701,7 @@ ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, + sr.reg), + rounded_size - size)); + +- release_scratch_register_on_entry (&sr); ++ release_scratch_register_on_entry (&sr, size, true); + } + + /* Make sure nothing is scheduled before we are done. */ +diff --git a/gcc/testsuite/gcc.target/i386/pr84128.c b/gcc/testsuite/gcc.target/i386/pr84128.c +new file mode 100644 +index 0000000..a8323fd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr84128.c +@@ -0,0 +1,30 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -march=i686 -mtune=generic -fstack-clash-protection" } */ ++/* { dg-require-effective-target ia32 } */ ++ ++__attribute__ ((noinline, noclone, weak, regparm (3))) ++int ++f1 (long arg0, int (*pf) (long, void *)) ++{ ++ unsigned char buf[32768]; ++ return pf (arg0, buf); ++} ++ ++__attribute__ ((noinline, noclone, weak)) ++int ++f2 (long arg0, void *ignored) ++{ ++ if (arg0 != 17) ++ __builtin_abort (); ++ return 19; ++} ++ ++int ++main (void) ++{ ++ if (f1 (17, f2) != 19) ++ __builtin_abort (); ++ return 0; ++} ++ ++ diff --git a/gcc48-rh1537828-5.patch b/gcc48-rh1537828-5.patch new file mode 100644 index 0000000..83597ad --- /dev/null +++ b/gcc48-rh1537828-5.patch @@ -0,0 +1,80 @@ +commit 5fdcac79eb72406c59fa72073dfb3ba21380f56d +Author: ktkachov +Date: Tue Apr 10 09:58:57 2018 +0000 + + [explow] PR target/85173: validize memory before passing it on to target probe_stack + + In this PR the expansion code emits an invalid memory address for the stack probe, which the backend fails to recognise. + The address is created explicitly in anti_adjust_stack_and_probe_stack_clash in explow.c and passed down to gen_probe_stack + without any validation in emit_stack_probe. + + This patch fixes the ICE by calling validize_mem on the memory location before passing it down to the target. + Jakub pointed out that we also want to create valid addresses for the probe_stack_address case, so this patch + creates an expand operand and legitimizes it before passing it down to the probe_stack_address expander. + + This patch passes bootstrap and testing on arm-none-linux-gnueabihf and aarch64-none-linux-gnu + and ppc64le-redhat-linux on gcc112 in the compile farm. + + PR target/85173 + * explow.c (emit_stack_probe): Call validize_mem on memory location + before passing it to gen_probe_stack. Create address operand and + legitimize it for the probe_stack_address case. + + * gcc.target/arm/pr85173.c: New test. + + + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@259266 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/explow.c b/gcc/explow.c +index 9386489..e2253ae 100644 +--- a/gcc/explow.c ++++ b/gcc/explow.c +@@ -1549,13 +1549,20 @@ emit_stack_probe (rtx address) + { + #ifdef HAVE_probe_stack_address + if (HAVE_probe_stack_address) +- emit_insn (gen_probe_stack_address (address)); ++ { ++ struct expand_operand ops[1]; ++ insn_code icode = targetm.code_for_probe_stack_address; ++ create_address_operand (ops, address); ++ maybe_legitimize_operands (icode, 0, 1, ops); ++ expand_insn (icode, 1, ops); ++ } + else + #endif + { + rtx memref = gen_rtx_MEM (word_mode, address); + + MEM_VOLATILE_P (memref) = 1; ++ memref = validize_mem (memref); + + /* See if we have an insn to probe the stack. */ + #ifdef HAVE_probe_stack +diff --git a/gcc/testsuite/gcc.target/arm/pr85173.c b/gcc/testsuite/gcc.target/arm/pr85173.c +new file mode 100644 +index 0000000..36105c9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/arm/pr85173.c +@@ -0,0 +1,20 @@ ++/* PR target/85173. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-probe-interval=14" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ ++ ++__attribute__((noinline, noclone)) void ++foo (char *p) ++{ ++ asm volatile ("" : : "r" (p) : "memory"); ++} ++ ++/* Nonconstant alloca, small local frame. */ ++__attribute__((noinline, noclone)) void ++f5 (int x) ++{ ++ char locals[128]; ++ char *vla = __builtin_alloca (x); ++ foo (vla); ++} diff --git a/gcc48-rh1537828-6.patch b/gcc48-rh1537828-6.patch new file mode 100644 index 0000000..0122698 --- /dev/null +++ b/gcc48-rh1537828-6.patch @@ -0,0 +1,64 @@ +commit 49033c9c57a415db02ac5d98badf5f53342bca83 +Author: krebbel +Date: Thu Apr 12 09:14:57 2018 +0000 + + IBM Z: Spectre: Prevent thunk cfi to be emitted with -fno-dwarf2-cfi-asm + + The CFI magic we emit as part of the indirect branch thunks in order to + have somewhat sane unwind information must not be emitted with + -fno-dwarf2-cfi-asm. + + gcc/ChangeLog: + + 2018-04-12 Andreas Krebbel + + * config/s390/s390.c (s390_output_indirect_thunk_function): Check + also for flag_dwarf2_cfi_asm. + + gcc/testsuite/ChangeLog: + + 2018-04-12 Andreas Krebbel + + * gcc.target/s390/nobp-no-dwarf2-cfi.c: New test. + + + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@259340 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff -Nrup gcc/config/s390/s390.c gcc/config/s390/s390.c +--- gcc/config/s390/s390.c 2018-04-18 13:35:43.856279249 -0600 ++++ gcc/config/s390/s390.c 2018-04-18 13:36:21.037007877 -0600 +@@ -14091,7 +14091,7 @@ s390_output_indirect_thunk_function (uns + + calls: Instead of caller->thunk the backtrace will be + caller->callee->thunk */ +- if (flag_asynchronous_unwind_tables) ++ if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm) + { + fputs ("\t.cfi_signal_frame\n", asm_out_file); + fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno); +diff --git gcc/testsuite/gcc.target/s390/nobp-no-dwarf2-cfi.c gcc/testsuite/gcc.target/s390/nobp-no-dwarf2-cfi.c +new file mode 100644 +index 0000000..75e32a1 +--- /dev/null ++++ gcc/testsuite/gcc.target/s390/nobp-no-dwarf2-cfi.c +@@ -0,0 +1,19 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -march=z900 --save-temps -mfunction-return-reg=thunk -mindirect-branch-table -fno-dwarf2-cfi-asm" } */ ++ ++/* Make sure that we do not emit .cfi directives when -fno-dwarf2-cfi-asm is being used. */ ++ ++int ++main () ++{ ++ return 0; ++} ++ ++/* 1 x main ++/* { dg-final { scan-assembler-times "jg\t__s390_indirect_jump" 1 } } */ ++/* { dg-final { scan-assembler "ex\t" } } */ ++ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_mem" } } */ diff --git a/gcc48-rh1537828-7.patch b/gcc48-rh1537828-7.patch new file mode 100644 index 0000000..d8bd816 --- /dev/null +++ b/gcc48-rh1537828-7.patch @@ -0,0 +1,414 @@ +commit 4361c221ff4b53f585a2e8c0ba38956c8132609f +Author: hjl +Date: Mon Feb 26 15:29:30 2018 +0000 + + i386: Update -mfunction-return= for return with pop + + When -mfunction-return= is used, simple_return_pop_internal should pop + return address into ECX register, adjust stack by bytes to pop from stack + and jump to the return thunk via ECX register. + + Tested on i686 and x86-64. + + PR target/84530 + * config/i386/i386-protos.h (ix86_output_indirect_jmp): Remove + the bool argument. + (ix86_output_indirect_function_return): New prototype. + (ix86_split_simple_return_pop_internal): Likewise. + * config/i386/i386.c (indirect_return_via_cx): New. + (indirect_return_via_cx_bnd): Likewise. + (indirect_thunk_name): Handle return va CX_REG. + (output_indirect_thunk_function): Create alias for + __x86_return_thunk_[re]cx and __x86_return_thunk_[re]cx_bnd. + (ix86_output_indirect_jmp): Remove the bool argument. + (ix86_output_indirect_function_return): New function. + (ix86_split_simple_return_pop_internal): Likewise. + * config/i386/i386.md (*indirect_jump): Don't pass false + to ix86_output_indirect_jmp. + (*tablejump_1): Likewise. + (simple_return_pop_internal): Change it to define_insn_and_split. + Call ix86_split_simple_return_pop_internal to split it for + -mfunction-return=. + (simple_return_indirect_internal): Call + ix86_output_indirect_function_return instead of + ix86_output_indirect_jmp. + + gcc/testsuite/ + + PR target/84530 + * gcc.target/i386/ret-thunk-22.c: New test. + * gcc.target/i386/ret-thunk-23.c: Likewise. + * gcc.target/i386/ret-thunk-24.c: Likewise. + * gcc.target/i386/ret-thunk-25.c: Likewise. + * gcc.target/i386/ret-thunk-26.c: Likewise. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@257992 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h +index 4e4b2100f79..394d4aebf96 100644 +--- a/gcc/config/i386/i386-protos.h ++++ b/gcc/config/i386/i386-protos.h +@@ -306,8 +306,10 @@ extern enum attr_cpu ix86_schedule; + #endif + + extern const char * ix86_output_call_insn (rtx insn, rtx call_op); +-extern const char * ix86_output_indirect_jmp (rtx call_op, bool ret_p); ++extern const char * ix86_output_indirect_jmp (rtx call_op); + extern const char * ix86_output_function_return (bool long_p); ++extern const char * ix86_output_indirect_function_return (rtx ret_op); ++extern void ix86_split_simple_return_pop_internal (rtx); + + #ifdef RTX_CODE + /* Target data for multipass lookahead scheduling. +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index c25d26ca826..a8238a001ee 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -8777,6 +8777,9 @@ static bool indirect_thunk_needed = false; + by call and return thunks functions. */ + static int indirect_thunks_used; + ++/* True if return thunk function via CX is needed. */ ++static bool indirect_return_via_cx; ++ + #ifndef INDIRECT_LABEL + # define INDIRECT_LABEL "LIND" + #endif +@@ -8786,26 +8789,29 @@ static int indirect_thunks_used; + static void + indirect_thunk_name (char name[32], int regno, bool ret_p) + { +- if (regno >= 0 && ret_p) ++ if (regno != INVALID_REGNUM && regno != CX_REG && ret_p) + gcc_unreachable (); + + if (USE_HIDDEN_LINKONCE) + { +- if (regno >= 0) ++ const char *prefix; ++ ++ prefix = ""; ++ ++ const char *ret = ret_p ? "return" : "indirect"; ++ ++ if (regno != INVALID_REGNUM) + { + const char *reg_prefix; + if (LEGACY_INT_REGNO_P (regno)) + reg_prefix = TARGET_64BIT ? "r" : "e"; + else + reg_prefix = ""; +- sprintf (name, "__x86_indirect_thunk_%s%s", +- reg_prefix, reg_names[regno]); ++ sprintf (name, "__x86_%s_thunk%s_%s%s", ++ ret, prefix, reg_prefix, reg_names[regno]); + } + else +- { +- const char *ret = ret_p ? "return" : "indirect"; +- sprintf (name, "__x86_%s_thunk", ret); +- } ++ sprintf (name, "__x86_%s_thunk%s", ret, prefix); + } + else + { +@@ -8947,9 +8953,18 @@ output_indirect_thunk_function (int regno) + ASM_OUTPUT_LABEL (asm_out_file, name); + } + +- if (regno < 0) ++ /* Create alias for __x86_return_thunk or ++ __x86_return_thunk_ecx. */ ++ bool need_alias; ++ if (regno == INVALID_REGNUM) ++ need_alias = true; ++ else if (regno == CX_REG) ++ need_alias = indirect_return_via_cx; ++ else ++ need_alias = false; ++ ++ if (need_alias) + { +- /* Create alias for __x86.return_thunk/__x86.return_thunk_bnd. */ + char alias[32]; + + indirect_thunk_name (alias, regno, true); +@@ -24704,21 +24719,21 @@ ix86_output_indirect_branch (rtx call_op, const char *xasm, + else + ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p); + } ++ + /* Output indirect jump. CALL_OP is the jump target. Jump is a + function return if RET_P is true. */ + + const char * +-ix86_output_indirect_jmp (rtx call_op, bool ret_p) ++ix86_output_indirect_jmp (rtx call_op) + { + if (cfun->machine->indirect_branch_type != indirect_branch_keep) + { + struct ix86_frame frame; + ix86_compute_frame_layout (&frame); + +- /* We can't have red-zone if this isn't a function return since +- "call" in the indirect thunk pushes the return address onto +- stack, destroying red-zone. */ +- if (!ret_p && frame.red_zone_size != 0) ++ /* We can't have red-zone since "call" in the indirect thunk ++ pushes the return address onto the stack, destroying the red-zone. */ ++ if (frame.red_zone_size != 0) + gcc_unreachable (); + + ix86_output_indirect_branch (call_op, "%0", true); +@@ -24759,6 +24774,75 @@ ix86_output_function_return (bool long_p) + return "rep%; ret"; + } + ++/* Output indirect function return. RET_OP is the function return ++ target. */ ++ ++const char * ++ix86_output_indirect_function_return (rtx ret_op) ++{ ++ if (cfun->machine->function_return_type != indirect_branch_keep) ++ { ++ char thunk_name[32]; ++ enum indirect_thunk_prefix need_prefix ++ = indirect_thunk_need_prefix (current_output_insn); ++ unsigned int regno = REGNO (ret_op); ++ gcc_assert (regno == CX_REG); ++ ++ if (cfun->machine->function_return_type ++ != indirect_branch_thunk_inline) ++ { ++ bool need_thunk = (cfun->machine->function_return_type ++ == indirect_branch_thunk); ++ indirect_thunk_name (thunk_name, regno, need_prefix, true); ++ if (need_thunk) ++ { ++ indirect_return_via_cx = true; ++ indirect_thunks_used |= 1 << CX_REG; ++ } ++ fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name); ++ } ++ else ++ output_indirect_thunk (need_prefix, regno); ++ ++ return ""; ++ } ++ else ++ return "jmp\t%A0"; ++} ++ ++/* Split simple return with popping POPC bytes from stack to indirect ++ branch with stack adjustment . */ ++ ++void ++ix86_split_simple_return_pop_internal (rtx popc) ++{ ++ struct machine_function *m = cfun->machine; ++ rtx ecx = gen_rtx_REG (SImode, CX_REG); ++ rtx insn; ++ ++ /* There is no "pascal" calling convention in any 64bit ABI. */ ++ gcc_assert (!TARGET_64BIT); ++ ++ insn = emit_insn (gen_pop (ecx)); ++ m->fs.cfa_offset -= UNITS_PER_WORD; ++ m->fs.sp_offset -= UNITS_PER_WORD; ++ ++ rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); ++ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); ++ add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (VOIDmode, ecx, pc_rtx)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, popc); ++ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); ++ insn = emit_insn (x); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ /* Now return address is in ECX. */ ++ emit_jump_insn (gen_simple_return_indirect_internal (ecx)); ++} ++ + /* Output the assembly for a call instruction. */ + + const char * +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index 228f8f6d77a..3320ec233d2 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -11282,7 +11282,7 @@ + (define_insn "*indirect_jump" + [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rw"))] + "" +- "* return ix86_output_indirect_jmp (operands[0], false);" ++ "* return ix86_output_indirect_jmp (operands[0]);" + [(set (attr "type") + (if_then_else (match_test "(cfun->machine->indirect_branch_type + != indirect_branch_keep)") +@@ -11336,7 +11336,7 @@ + [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rw")) + (use (label_ref (match_operand 1)))] + "" +- "* return ix86_output_indirect_jmp (operands[0], false);" ++ "* return ix86_output_indirect_jmp (operands[0]);" + [(set (attr "type") + (if_then_else (match_test "(cfun->machine->indirect_branch_type + != indirect_branch_keep)") +@@ -11769,11 +11769,14 @@ + (set_attr "prefix_rep" "1") + (set_attr "modrm" "0")]) + +-(define_insn "simple_return_pop_internal" ++(define_insn_and_split "simple_return_pop_internal" + [(simple_return) + (use (match_operand:SI 0 "const_int_operand"))] + "reload_completed" + "ret\t%0" ++ "&& cfun->machine->function_return_type != indirect_branch_keep" ++ [(const_int 0)] ++ "ix86_split_simple_return_pop_internal (operands[0]); DONE;" + [(set_attr "length" "3") + (set_attr "atom_unit" "jeu") + (set_attr "length_immediate" "2") +@@ -11783,7 +11786,7 @@ + [(simple_return) + (use (match_operand:SI 0 "register_operand" "r"))] + "reload_completed" +- "* return ix86_output_indirect_jmp (operands[0], true);" ++ "* return ix86_output_indirect_function_return (operands[0]);" + [(set (attr "type") + (if_then_else (match_test "(cfun->machine->indirect_branch_type + != indirect_branch_keep)") +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-22.c b/gcc/testsuite/gcc.target/i386/ret-thunk-22.c +new file mode 100644 +index 00000000000..89e086de97b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-22.c +@@ -0,0 +1,15 @@ ++/* PR target/r84530 */ ++/* { dg-do compile { target ia32 } } */ ++/* { dg-options "-O2 -mfunction-return=thunk" } */ ++ ++struct s { _Complex unsigned short x; }; ++struct s gs = { 100 + 200i }; ++struct s __attribute__((noinline)) foo (void) { return gs; } ++ ++/* { dg-final { scan-assembler-times "popl\[\\t \]*%ecx" 1 } } */ ++/* { dg-final { scan-assembler "lea\[l\]?\[\\t \]*4\\(%esp\\), %esp" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk_ecx" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-23.c b/gcc/testsuite/gcc.target/i386/ret-thunk-23.c +new file mode 100644 +index 00000000000..43f0ccaa854 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-23.c +@@ -0,0 +1,15 @@ ++/* PR target/r84530 */ ++/* { dg-do compile { target ia32 } } */ ++/* { dg-options "-O2 -mfunction-return=thunk-extern" } */ ++ ++struct s { _Complex unsigned short x; }; ++struct s gs = { 100 + 200i }; ++struct s __attribute__((noinline)) foo (void) { return gs; } ++ ++/* { dg-final { scan-assembler-times "popl\[\\t \]*%ecx" 1 } } */ ++/* { dg-final { scan-assembler "lea\[l\]?\[\\t \]*4\\(%esp\\), %esp" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk_ecx" } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not {\tpause} } } */ ++/* { dg-final { scan-assembler-not {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-24.c b/gcc/testsuite/gcc.target/i386/ret-thunk-24.c +new file mode 100644 +index 00000000000..8729e35147e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-24.c +@@ -0,0 +1,15 @@ ++/* PR target/r84530 */ ++/* { dg-do compile { target ia32 } } */ ++/* { dg-options "-O2 -mfunction-return=thunk-inline" } */ ++ ++struct s { _Complex unsigned short x; }; ++struct s gs = { 100 + 200i }; ++struct s __attribute__((noinline)) foo (void) { return gs; } ++ ++/* { dg-final { scan-assembler-times "popl\[\\t \]*%ecx" 1 } } */ ++/* { dg-final { scan-assembler "lea\[l\]?\[\\t \]*4\\(%esp\\), %esp" } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk_ecx" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-25.c b/gcc/testsuite/gcc.target/i386/ret-thunk-25.c +new file mode 100644 +index 00000000000..f73553c9a9f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-25.c +@@ -0,0 +1,14 @@ ++/* PR target/r84530 */ ++/* { dg-do compile { target ia32 } } */ ++/* { dg-options "-O2 -mfunction-return=thunk -fno-pic" } */ ++ ++struct s { _Complex unsigned short x; }; ++struct s gs = { 100 + 200i }; ++struct s __attribute__((noinline)) foo (void) { return gs; } ++ ++/* { dg-final { scan-assembler-times "popl\[\\t \]*%ecx" 1 } } */ ++/* { dg-final { scan-assembler "lea\[l\]?\[\\t \]*4\\(%esp\\), %esp" } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler {\tpause} } } */ ++/* { dg-final { scan-assembler {\tlfence} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-26.c b/gcc/testsuite/gcc.target/i386/ret-thunk-26.c +new file mode 100644 +index 00000000000..9144e988735 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-26.c +@@ -0,0 +1,40 @@ ++/* PR target/r84530 */ ++/* { dg-do run } */ ++/* { dg-options "-Os -mfunction-return=thunk" } */ ++ ++struct S { int i; }; ++__attribute__((const, noinline, noclone)) ++struct S foo (int x) ++{ ++ struct S s; ++ s.i = x; ++ return s; ++} ++ ++int a[2048], b[2048], c[2048], d[2048]; ++struct S e[2048]; ++ ++__attribute__((noinline, noclone)) void ++bar (void) ++{ ++ int i; ++ for (i = 0; i < 1024; i++) ++ { ++ e[i] = foo (i); ++ a[i+2] = a[i] + a[i+1]; ++ b[10] = b[10] + i; ++ c[i] = c[2047 - i]; ++ d[i] = d[i + 1]; ++ } ++} ++ ++int ++main () ++{ ++ int i; ++ bar (); ++ for (i = 0; i < 1024; i++) ++ if (e[i].i != i) ++ __builtin_abort (); ++ return 0; ++} diff --git a/gcc48-rh1537828-8.patch b/gcc48-rh1537828-8.patch new file mode 100644 index 0000000..4f309a7 --- /dev/null +++ b/gcc48-rh1537828-8.patch @@ -0,0 +1,175 @@ +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index a8238a001ee..34f27c597a2 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -8770,13 +8770,16 @@ ix86_setup_frame_addresses (void) + labels in call and return thunks. */ + static int indirectlabelno; + +-/* True if call and return thunk functions are needed. */ ++/* True if call thunk function is needed. */ + static bool indirect_thunk_needed = false; + + /* Bit masks of integer registers, which contain branch target, used +- by call and return thunks functions. */ ++ by call thunk functions. */ + static int indirect_thunks_used; + ++/* True if return thunk function is needed. */ ++static bool indirect_return_needed = false; ++ + /* True if return thunk function via CX is needed. */ + static bool indirect_return_via_cx; + +@@ -8899,17 +8902,19 @@ output_indirect_thunk (int regno) + } + + /* Output a funtion with a call and return thunk for indirect branch. +- If REGNO != -1, the function address is in REGNO. Otherwise, the +- function address is on the top of stack. */ ++ If REGNO != UNVALID_REGNUM, ++ the function address is in REGNO. Otherwise, the function address is ++ on the top of stack. Thunk is used for function return if RET_P is ++ true. */ + + static void +-output_indirect_thunk_function (int regno) ++output_indirect_thunk_function (unsigned int regno, bool ret_p) + { + char name[32]; + tree decl; + + /* Create __x86_indirect_thunk. */ +- indirect_thunk_name (name, regno, false); ++ indirect_thunk_name (name, regno, ret_p); + decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, + get_identifier (name), + build_function_type_list (void_type_node, NULL_TREE)); +@@ -8953,45 +8958,6 @@ output_indirect_thunk_function (int regno) + ASM_OUTPUT_LABEL (asm_out_file, name); + } + +- /* Create alias for __x86_return_thunk or +- __x86_return_thunk_ecx. */ +- bool need_alias; +- if (regno == INVALID_REGNUM) +- need_alias = true; +- else if (regno == CX_REG) +- need_alias = indirect_return_via_cx; +- else +- need_alias = false; +- +- if (need_alias) +- { +- char alias[32]; +- +- indirect_thunk_name (alias, regno, true); +-#if TARGET_MACHO +- if (TARGET_MACHO) +- { +- fputs ("\t.weak_definition\t", asm_out_file); +- assemble_name (asm_out_file, alias); +- fputs ("\n\t.private_extern\t", asm_out_file); +- assemble_name (asm_out_file, alias); +- putc ('\n', asm_out_file); +- ASM_OUTPUT_LABEL (asm_out_file, alias); +- } +-#else +- ASM_OUTPUT_DEF (asm_out_file, alias, name); +- if (USE_HIDDEN_LINKONCE) +- { +- fputs ("\t.globl\t", asm_out_file); +- assemble_name (asm_out_file, alias); +- putc ('\n', asm_out_file); +- fputs ("\t.hidden\t", asm_out_file); +- assemble_name (asm_out_file, alias); +- putc ('\n', asm_out_file); +- } +-#endif +- } +- + DECL_INITIAL (decl) = make_node (BLOCK); + current_function_decl = decl; + allocate_struct_function (decl, false); +@@ -9038,14 +9004,19 @@ ix86_code_end (void) + rtx xops[2]; + int regno; + ++ if (indirect_return_needed) ++ output_indirect_thunk_function (INVALID_REGNUM, true); ++ if (indirect_return_via_cx) ++ output_indirect_thunk_function (CX_REG, true); + if (indirect_thunk_needed) +- output_indirect_thunk_function (-1); ++ output_indirect_thunk_function (INVALID_REGNUM, false); + + for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++) + { + int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1; + if ((indirect_thunks_used & (1 << i))) +- output_indirect_thunk_function (regno); ++ output_indirect_thunk_function (regno, false); ++ + } + + for (regno = AX_REG; regno <= SP_REG; regno++) +@@ -9054,7 +9025,7 @@ ix86_code_end (void) + tree decl; + + if ((indirect_thunks_used & (1 << regno))) +- output_indirect_thunk_function (regno); ++ output_indirect_thunk_function (regno, false); + + if (!(pic_labels_used & (1 << regno))) + continue; +@@ -24758,8 +24729,8 @@ ix86_output_function_return (bool long_p) + { + bool need_thunk = (cfun->machine->function_return_type + == indirect_branch_thunk); +- indirect_thunk_name (thunk_name, -1, true); +- indirect_thunk_needed |= need_thunk; ++ indirect_thunk_name (thunk_name, INVALID_REGNUM, true); ++ indirect_return_needed |= need_thunk; + fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name); + } + else +@@ -24783,8 +24754,6 @@ ix86_output_indirect_function_return (rtx ret_op) + if (cfun->machine->function_return_type != indirect_branch_keep) + { + char thunk_name[32]; +- enum indirect_thunk_prefix need_prefix +- = indirect_thunk_need_prefix (current_output_insn); + unsigned int regno = REGNO (ret_op); + gcc_assert (regno == CX_REG); + +@@ -24793,7 +24762,7 @@ ix86_output_indirect_function_return (rtx ret_op) + { + bool need_thunk = (cfun->machine->function_return_type + == indirect_branch_thunk); +- indirect_thunk_name (thunk_name, regno, need_prefix, true); ++ indirect_thunk_name (thunk_name, regno, true); + if (need_thunk) + { + indirect_return_via_cx = true; +@@ -24802,7 +24771,7 @@ ix86_output_indirect_function_return (rtx ret_op) + fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name); + } + else +- output_indirect_thunk (need_prefix, regno); ++ output_indirect_thunk (regno); + + return ""; + } +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-9.c b/gcc/testsuite/gcc.target/i386/ret-thunk-9.c +index d1db41cc128..a605c26c46f 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-9.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-9.c +@@ -13,7 +13,7 @@ foo (void) + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ +-/* { dg-final { scan-assembler "__x86_indirect_thunk:" } } */ ++/* { dg-final { scan-assembler "__x86_return_thunk:" } } */ + /* { dg-final { scan-assembler-times {\tpause} 1 { target { ! x32 } } } } */ + /* { dg-final { scan-assembler-times {\tlfence} 1 { target { ! x32 } } } } */ + /* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ diff --git a/gcc48-rh1537828-9.patch b/gcc48-rh1537828-9.patch new file mode 100644 index 0000000..7d26b7e --- /dev/null +++ b/gcc48-rh1537828-9.patch @@ -0,0 +1,516 @@ +diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md +index 43faabb7e87..328a90f45d1 100644 +--- a/gcc/config/i386/constraints.md ++++ b/gcc/config/i386/constraints.md +@@ -135,7 +135,7 @@ + + (define_constraint "w" + "@internal Call memory operand." +- (and (not (match_test "ix86_indirect_branch_register")) ++ (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER")) + (not (match_test "TARGET_X32")) + (match_operand 0 "memory_operand"))) + +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 8ff702615b6..95206478001 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2413,6 +2413,10 @@ extern void debug_dispatch_window (int); + #define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0) + #define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0) + ++#define TARGET_INDIRECT_BRANCH_REGISTER \ ++ (ix86_indirect_branch_register \ ++ || cfun->machine->indirect_branch_type != indirect_branch_keep) ++ + #define IX86_HLE_ACQUIRE (1 << 16) + #define IX86_HLE_RELEASE (1 << 17) + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index 3320ec233d2..7a83d079bfc 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -11274,7 +11274,7 @@ + [(set (pc) (match_operand 0 "indirect_branch_operand"))] + "" + { +- if (TARGET_X32 || ix86_indirect_branch_register) ++ if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER) + operands[0] = convert_memory_address (word_mode, operands[0]); + cfun->machine->has_local_indirect_jump = true; + }) +@@ -11327,7 +11327,7 @@ + OPTAB_DIRECT); + } + +- if (TARGET_X32 || ix86_indirect_branch_register) ++ if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER) + operands[0] = convert_memory_address (word_mode, operands[0]); + cfun->machine->has_local_indirect_jump = true; + }) +diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md +index 6c7a593084c..f6cf50ad6f5 100644 +--- a/gcc/config/i386/predicates.md ++++ b/gcc/config/i386/predicates.md +@@ -540,7 +540,7 @@ + ;; Test for a valid operand for indirect branch. + (define_predicate "indirect_branch_operand" + (ior (match_operand 0 "register_operand") +- (and (not (match_test "ix86_indirect_branch_register")) ++ (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER")) + (not (match_test "TARGET_X32")) + (match_operand 0 "memory_operand")))) + +@@ -550,7 +550,7 @@ + (ior (match_test "constant_call_address_operand + (op, mode == VOIDmode ? mode : Pmode)") + (match_operand 0 "call_register_no_elim_operand") +- (and (not (match_test "ix86_indirect_branch_register")) ++ (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER")) + (and (not (match_test "TARGET_X32")) + (match_operand 0 "memory_operand"))))) + +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c +index 321db770c35..135bc73b9ce 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c +@@ -14,7 +14,7 @@ male_indirect_jump (long offset) + /* Our gcc-4.8 based compiler is not as aggressive at sibcalls + where the target is in a MEM. Thus we have to scan for different + patterns here than in newer compilers. */ +-/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c +index d58451660f8..867df67143b 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c +@@ -14,7 +14,7 @@ male_indirect_jump (long offset) + /* Our gcc-4.8 based compiler is not as aggressive at sibcalls + where the target is in a MEM. Thus we have to scan for different + patterns here than in newer compilers. */ +-/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c +index 9e24a385387..2c7fb52b59d 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c +@@ -12,9 +12,8 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler {\tpause} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c +index 127b5d94523..0d3f895009d 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c +@@ -12,9 +12,8 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler {\tpause} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +index 17c2d0faf88..3c72036dbaf 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +@@ -35,9 +35,8 @@ bar (int i) + } + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler {\tpause} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c +index cd7e8d78199..e20816781f9 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c +@@ -17,7 +17,7 @@ male_indirect_jump (long offset) + /* Our gcc-4.8 based compiler is not as aggressive at sibcalls + where the target is in a MEM. Thus we have to scan for different + patterns here than in newer compilers. */ +-/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c +index 4dbd7a5e5d3..0f30d74ee37 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c +@@ -15,7 +15,7 @@ male_indirect_jump (long offset) + /* Our gcc-4.8 based compiler is not as aggressive at sibcalls + where the target is in a MEM. Thus we have to scan for different + patterns here than in newer compilers. */ +-/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c +index 4aeec1833cd..89a2bac8403 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c +@@ -14,10 +14,9 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler {\tpause} } } */ + /* { dg-final { scan-assembler {\tlfence} } } */ + /* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ +-/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c +index ac0e5999f63..3eb83c3779a 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c +@@ -13,10 +13,9 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler {\tpause} } } */ + /* { dg-final { scan-assembler {\tlfence} } } */ + /* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */ +-/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c +index 573cf1ef09e..0098dd1133d 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c +@@ -14,9 +14,8 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c +index b2b37fc6e2e..ece8de15a4b 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c +@@ -13,9 +13,8 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c +index 4a43e199931..d53fc887dcc 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c +@@ -36,9 +36,8 @@ bar (int i) + } + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ + /* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c +index 72de88e04aa..1f78b07f84a 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c +@@ -14,7 +14,7 @@ male_indirect_jump (long offset) + /* Our gcc-4.8 based compiler is not as aggressive at sibcalls + where the target is in a MEM. Thus we have to scan for different + patterns here than in newer compilers. */ +-/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ + /* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c +index d4137b38a1e..5397a5874aa 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c +@@ -14,7 +14,7 @@ male_indirect_jump (long offset) + /* Our gcc-4.8 based compiler is not as aggressive at sibcalls + where the target is in a MEM. Thus we have to scan for different + patterns here than in newer compilers. */ +-/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ + /* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c +index d9964c25bbd..385626850a2 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c +@@ -12,9 +12,8 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ ++/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c +index d4dca4dc5fe..1ae49b137ca 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c +@@ -12,9 +12,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */ + /* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c +index aece9383697..2b9a33e93dc 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c +@@ -35,9 +35,8 @@ bar (int i) + } + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ + /* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */ + /* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c +index e3cea3fa3c2..dbda34ab038 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c +@@ -14,7 +14,7 @@ male_indirect_jump (long offset) + /* Our gcc-4.8 based compiler is not as aggressive at sibcalls + where the target is in a MEM. Thus we have to scan for different + patterns here than in newer compilers. */ +-/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler {\tpause} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c +index 62229969c90..810824666ef 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c +@@ -14,7 +14,7 @@ male_indirect_jump (long offset) + /* Our gcc-4.8 based compiler is not as aggressive at sibcalls + where the target is in a MEM. Thus we have to scan for different + patterns here than in newer compilers. */ +-/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler {\tpause} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c +index 2eef6f35a75..4a63ebed8ab 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c +@@ -12,7 +12,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times {\tpause} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c +index e825a10f14c..a395ffca018 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c +@@ -12,7 +12,7 @@ male_indirect_jump (long offset) + return 0; + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { { ! x32 } && *-*-linux* } } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?dispatch" { target *-*-linux* } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */ + /* { dg-final { scan-assembler-times {\tpause} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +index c67066cf197..ea009245a58 100644 +--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c ++++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +@@ -35,8 +35,8 @@ bar (int i) + } + } + +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%(r|e)ax" } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler {\tpause} } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-10.c b/gcc/testsuite/gcc.target/i386/ret-thunk-10.c +index e6fea84a4d9..af9023af613 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-10.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-10.c +@@ -15,9 +15,6 @@ foo (void) + /* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */ + /* { dg-final { scan-assembler-times {\tpause} 2 } } */ + /* { dg-final { scan-assembler-times {\tlfence} 2 } } */ +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "__x86_indirect_thunk:" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ +-/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" { target { x32 } } } } */ +-/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-11.c b/gcc/testsuite/gcc.target/i386/ret-thunk-11.c +index e239ec4542f..ba467c59b36 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-11.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-11.c +@@ -15,9 +15,6 @@ foo (void) + /* { dg-final { scan-assembler-times {\tlfence} 1 } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "__x86_indirect_thunk:" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ +-/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" { target { x32 } } } } */ +-/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-12.c b/gcc/testsuite/gcc.target/i386/ret-thunk-12.c +index fa3181303c9..43e57cac2c3 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-12.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-12.c +@@ -15,8 +15,6 @@ foo (void) + /* { dg-final { scan-assembler-times {\tlfence} 1 } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "__x86_indirect_thunk:" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ +-/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" { target { x32 } } } } */ +-/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-13.c b/gcc/testsuite/gcc.target/i386/ret-thunk-13.c +index fd5b41fdd3f..55f156c4376 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-13.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-13.c +@@ -14,9 +14,8 @@ foo (void) + /* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */ + /* { dg-final { scan-assembler-times {\tpause} 2 } } */ + /* { dg-final { scan-assembler-times {\tlfence} 2 } } */ +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ + /* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 3 } } */ + /* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 3 } } */ + /* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_indirect_thunk" } } */ +-/* { dg-final { scan-assembler-not "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ +-/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ ++/* { dg-final { scan-assembler-not "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-14.c b/gcc/testsuite/gcc.target/i386/ret-thunk-14.c +index d606373ead1..1c790436a53 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-14.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-14.c +@@ -16,7 +16,6 @@ foo (void) + /* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */ + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ +-/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?bar" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-15.c b/gcc/testsuite/gcc.target/i386/ret-thunk-15.c +index 75e45e226b8..58aba319cba 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-15.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-15.c +@@ -16,7 +16,6 @@ foo (void) + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler-times {\tpause} 1 } } */ + /* { dg-final { scan-assembler-times {\tlfence} 1 } } */ +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */ +-/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?bar" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-9.c b/gcc/testsuite/gcc.target/i386/ret-thunk-9.c +index a605c26c46f..eee230ca2f6 100644 +--- a/gcc/testsuite/gcc.target/i386/ret-thunk-9.c ++++ b/gcc/testsuite/gcc.target/i386/ret-thunk-9.c +@@ -14,11 +14,8 @@ foo (void) + /* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */ + /* { dg-final { scan-assembler "__x86_return_thunk:" } } */ +-/* { dg-final { scan-assembler-times {\tpause} 1 { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler-times {\tlfence} 1 { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { { ! x32 } && *-*-linux* } } } } */ +-/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */ +-/* { dg-final { scan-assembler-times {\tpause} 2 { target { x32 } } } } */ +-/* { dg-final { scan-assembler-times {\tlfence} 2 { target { x32 } } } } */ +-/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */ +-/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */ ++/* { dg-final { scan-assembler "mov(?:l|q)\[ \t\]*_?bar" { target *-*-linux* } } } */ ++/* { dg-final { scan-assembler-times {\tpause} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlfence} 2 } } */ ++/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" } } */ ++/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" } } */ diff --git a/gcc48-rh1546372.patch b/gcc48-rh1546372.patch new file mode 100644 index 0000000..7fb0c3f --- /dev/null +++ b/gcc48-rh1546372.patch @@ -0,0 +1,162 @@ + * cif-code.def: Add NEVER_EXECUTED. + * ipa-inline-analysis.c (reset_inline_summary, + compute_inline_parameters, estimate_calls_size_and_time, + inline_update_overall_summary): Track number of calls. + (never_executed_edge_p): New predicate. + * ipa-inline.c (want_inline_self_recursive_call_p): do not inline + recursively for calls that are not going to be executed. + (inline_small_functions): Do not inline never exeucted edge if callee + has too many calls. + * ipa-inline.h (inline_summary): Add num calls. + (never_executed_edge_p): New. + +--- gcc/cif-code.def (revision 257016) ++++ gcc/cif-code.def (working copy) +@@ -103,3 +103,6 @@ DEFCIFCODE(TARGET_OPTION_MISMATCH, N_("t + + /* We can't inline because of mismatched optimization levels. */ + DEFCIFCODE(OPTIMIZATION_MISMATCH, N_("optimization level attribute mismatch")) ++ ++/* We know that the call will be optimized out. */ ++DEFCIFCODE(NEVER_EXECUTED, N_("never executed")) +--- gcc/ipa-inline-analysis.c (revision 257016) ++++ gcc/ipa-inline-analysis.c (working copy) +@@ -990,6 +990,7 @@ reset_inline_summary (struct cgraph_node + info->stack_frame_offset = 0; + info->size = 0; + info->time = 0; ++ info->num_calls = 0; + info->growth = 0; + info->scc_no = 0; + if (info->loop_iterations) +@@ -2704,6 +2705,7 @@ compute_inline_parameters (struct cgraph + /* Inlining characteristics are maintained by the cgraph_mark_inline. */ + info->time = info->self_time; + info->size = info->self_size; ++ info->num_calls = 0; + info->stack_frame_offset = 0; + info->estimated_stack_size = info->estimated_self_stack_size; + #ifdef ENABLE_CHECKING +@@ -2816,7 +2818,7 @@ estimate_edge_size_and_time (struct cgra + + static void + estimate_calls_size_and_time (struct cgraph_node *node, int *size, int *time, +- inline_hints *hints, ++ inline_hints *hints, int *num, + clause_t possible_truths, + vec known_vals, + vec known_binfos, +@@ -2826,6 +2828,7 @@ estimate_calls_size_and_time (struct cgr + for (e = node->callees; e; e = e->next_callee) + { + struct inline_edge_summary *es = inline_edge_summary (e); ++ (*num)++; + if (!es->predicate + || evaluate_predicate (es->predicate, possible_truths)) + { +@@ -2838,7 +2841,7 @@ estimate_calls_size_and_time (struct cgr + known_aggs, hints); + } + else +- estimate_calls_size_and_time (e->callee, size, time, hints, ++ estimate_calls_size_and_time (e->callee, size, time, hints, num, + possible_truths, + known_vals, known_binfos, + known_aggs); +@@ -2846,6 +2849,7 @@ estimate_calls_size_and_time (struct cgr + } + for (e = node->indirect_calls; e; e = e->next_callee) + { ++ (*num)++; + struct inline_edge_summary *es = inline_edge_summary (e); + if (!es->predicate + || evaluate_predicate (es->predicate, possible_truths)) +@@ -2936,7 +2940,8 @@ estimate_node_size_and_time (struct cgra + if (DECL_DECLARED_INLINE_P (node->symbol.decl)) + hints |= INLINE_HINT_declared_inline; + +- estimate_calls_size_and_time (node, &size, &time, &hints, possible_truths, ++ int num = 0; ++ estimate_calls_size_and_time (node, &size, &time, &hints, &num, possible_truths, + known_vals, known_binfos, known_aggs); + gcc_checking_assert (size >= 0); + gcc_checking_assert (time >= 0); +@@ -3369,13 +3374,14 @@ inline_update_overall_summary (struct cg + + info->size = 0; + info->time = 0; ++ info->num_calls = 0; + for (i = 0; vec_safe_iterate (info->entry, i, &e); i++) + { + info->size += e->size, info->time += e->time; + if (info->time > MAX_TIME * INLINE_TIME_SCALE) + info->time = MAX_TIME * INLINE_TIME_SCALE; + } +- estimate_calls_size_and_time (node, &info->size, &info->time, NULL, ++ estimate_calls_size_and_time (node, &info->size, &info->time, NULL, &info->num_calls, + ~(clause_t) (1 << predicate_false_condition), + vNULL, vNULL, vNULL); + info->time = (info->time + INLINE_TIME_SCALE / 2) / INLINE_TIME_SCALE; +@@ -3528,6 +3534,14 @@ do_estimate_edge_hints (struct cgraph_ed + return hints; + } + ++/* Return true if edge is never executed. */ ++bool ++never_executed_edge_p (struct cgraph_edge *e) ++{ ++ struct inline_edge_summary *es = inline_edge_summary (e); ++ return es->predicate && false_predicate_p (es->predicate); ++} ++ + + /* Estimate self time of the function NODE after inlining EDGE. */ + +--- gcc/ipa-inline.c (revision 257016) ++++ gcc/ipa-inline.c (working copy) +@@ -656,6 +656,11 @@ want_inline_self_recursive_call_p (struc + reason = "--param max-inline-recursive-depth exceeded."; + want_inline = false; + } ++ else if (never_executed_edge_p (edge)) ++ { ++ reason = "edge is never executed."; ++ want_inline = false; ++ } + + if (outer_node->global.inlined_to) + caller_freq = outer_node->callers->frequency; +@@ -1597,6 +1602,14 @@ inline_small_functions (void) + outer_node = where, depth++; + where = where->callers->caller; + } ++ if (never_executed_edge_p (edge) ++ && inline_summary (edge->callee)->num_calls > 30) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Never executed edge\n"); ++ edge->inline_failed = CIF_NEVER_EXECUTED; ++ continue; ++ } + if (outer_node + && !want_inline_self_recursive_call_p (edge, outer_node, + true, depth)) +--- gcc/ipa-inline.h (revision 257016) ++++ gcc/ipa-inline.h (working copy) +@@ -132,6 +132,7 @@ struct GTY(()) inline_summary + /* Estimated size of the function after inlining. */ + int time; + int size; ++ int num_calls; + + /* Conditional size/time information. The summaries are being + merged during inlining. */ +@@ -226,6 +227,7 @@ inline_hints do_estimate_edge_hints (str + void initialize_growth_caches (void); + void free_growth_caches (void); + void compute_inline_parameters (struct cgraph_node *, bool); ++bool never_executed_edge_p (struct cgraph_edge *); + + /* In ipa-inline-transform.c */ + bool inline_call (struct cgraph_edge *, bool, vec *, int *, bool); + diff --git a/gcc48-rh1546728.patch b/gcc48-rh1546728.patch new file mode 100644 index 0000000..e9944e5 --- /dev/null +++ b/gcc48-rh1546728.patch @@ -0,0 +1,48 @@ +2015-09-03 Bill Schmidt + + * optabs.c (expand_binop): Don't create a broadcast vector with a + source element wider than the inner mode. + + * gcc.target/powerpc/vec-shift.c: New test. + +--- gcc/optabs.c ++++ gcc/optabs.c +@@ -1608,6 +1608,15 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1, + + if (otheroptab && optab_handler (otheroptab, mode) != CODE_FOR_nothing) + { ++ /* The scalar may have been extended to be too wide. Truncate ++ it back to the proper size to fit in the broadcast vector. */ ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ if (!CONST_INT_P (op1) ++ && (GET_MODE_BITSIZE (inner_mode) ++ < GET_MODE_BITSIZE (GET_MODE (op1)))) ++ op1 = force_reg (inner_mode, ++ simplify_gen_unary (TRUNCATE, inner_mode, op1, ++ GET_MODE (op1))); + rtx vop1 = expand_vector_broadcast (mode, op1); + if (vop1) + { +--- /dev/null ++++ gcc/testsuite/gcc.target/powerpc/vec-shift.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile { target { powerpc*-*-* } } } */ ++/* { dg-require-effective-target powerpc_altivec_ok } */ ++/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ ++/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ ++/* { dg-options "-mcpu=power7 -O2" } */ ++ ++/* This used to ICE. During gimplification, "i" is widened to an unsigned ++ int. We used to fail at expand time as we tried to cram an SImode item ++ into a QImode memory slot. This has been fixed to properly truncate the ++ shift amount when splatting it into a vector. */ ++ ++typedef unsigned char v16ui __attribute__((vector_size(16))); ++ ++v16ui vslb(v16ui v, unsigned char i) ++{ ++ return v << i; ++} ++ ++/* { dg-final { scan-assembler "vspltb" } } */ ++/* { dg-final { scan-assembler "vslb" } } */ diff --git a/gcc48-rh1552021.patch b/gcc48-rh1552021.patch new file mode 100644 index 0000000..c931115 --- /dev/null +++ b/gcc48-rh1552021.patch @@ -0,0 +1,1966 @@ +diff -Nrup gcc/config/s390/s390.c gcc/config/s390/s390.c +--- gcc/config/s390/s390.c 2018-03-27 09:33:20.158140823 -0600 ++++ gcc/config/s390/s390.c 2018-03-27 09:33:58.826861609 -0600 +@@ -958,6 +958,35 @@ s390_expand_builtin (tree exp, rtx targe + } + + ++/* Masks per jump target register indicating which thunk need to be ++ generated. */ ++static GTY(()) int indirect_branch_prez10thunk_mask = 0; ++static GTY(()) int indirect_branch_z10thunk_mask = 0; ++ ++#define INDIRECT_BRANCH_NUM_OPTIONS 4 ++ ++enum s390_indirect_branch_option ++ { ++ s390_opt_indirect_branch_jump = 0, ++ s390_opt_indirect_branch_call, ++ s390_opt_function_return_reg, ++ s390_opt_function_return_mem ++ }; ++ ++static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 }; ++const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \ ++ { "LJUMP", "LCALL", "LRETREG", "LRETMEM" }; ++const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \ ++ { ".s390_indirect_jump", ".s390_indirect_call", ++ ".s390_return_reg", ".s390_return_mem" }; ++ ++bool ++s390_return_addr_from_memory () ++{ ++ return (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM ++ && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM); ++} ++ + static const int s390_hotpatch_hw_max = 1000000; + static int s390_hotpatch_hw_before_label = 0; + static int s390_hotpatch_hw_after_label = 0; +@@ -2669,6 +2698,34 @@ s390_option_override (void) + if (TARGET_64BIT && !TARGET_ZARCH) + error ("64-bit ABI not supported in ESA/390 mode"); + ++ if (s390_indirect_branch != indirect_branch_keep) ++ { ++ if (!global_options_set.x_s390_indirect_branch_call) ++ s390_indirect_branch_call = s390_indirect_branch; ++ ++ if (!global_options_set.x_s390_indirect_branch_jump) ++ s390_indirect_branch_jump = s390_indirect_branch; ++ } ++ ++ if (s390_function_return != indirect_branch_keep) ++ { ++ if (!global_options_set.x_s390_function_return_reg) ++ s390_function_return_reg = s390_function_return; ++ ++ if (!global_options_set.x_s390_function_return_mem) ++ s390_function_return_mem = s390_function_return; ++ } ++ ++ if (!TARGET_CPU_ZARCH) ++ { ++ if (s390_indirect_branch_call != indirect_branch_keep ++ || s390_indirect_branch_jump != indirect_branch_keep) ++ error ("-mindirect-branch* options require -march=z900 or higher"); ++ if (s390_function_return_reg != indirect_branch_keep ++ || s390_function_return_mem != indirect_branch_keep) ++ error ("-mfunction-return* options require -march=z900 or higher"); ++ } ++ + /* Use hardware DFP if available and not explicitly disabled by + user. E.g. with -m31 -march=z10 -mzarch */ + if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP) +@@ -10873,7 +10930,6 @@ s390_emit_epilogue (bool sibcall) + rtx frame_pointer, return_reg, cfa_restores = NULL_RTX; + int area_bottom, area_top, offset = 0; + int next_offset; +- rtvec p; + int i; + + if (TARGET_TPF_PROFILING) +@@ -11023,8 +11079,14 @@ s390_emit_epilogue (bool sibcall) + && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM)) + { + int return_regnum = find_unused_clobbered_reg(); +- if (!return_regnum) +- return_regnum = 4; ++ if (!return_regnum ++ || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION ++ && !TARGET_CPU_Z10 ++ && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM)) ++ { ++ gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4); ++ return_regnum = 4; ++ } + return_reg = gen_rtx_REG (Pmode, return_regnum); + + addr = plus_constant (Pmode, frame_pointer, +@@ -11054,16 +11116,7 @@ s390_emit_epilogue (bool sibcall) + } + + if (! sibcall) +- { +- +- /* Return to caller. */ +- +- p = rtvec_alloc (2); +- +- RTVEC_ELT (p, 0) = ret_rtx; +- RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg); +- emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p)); +- } ++ emit_jump_insn (gen_return_use (return_reg)); + } + + +@@ -12371,6 +12424,84 @@ s390_output_mi_thunk (FILE *file, tree t + final_end_function (); + } + ++/* Output either an indirect jump or a an indirect call ++ (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO ++ using a branch trampoline disabling branch target prediction. */ ++ ++void ++s390_indirect_branch_via_thunk (unsigned int regno, ++ unsigned int return_addr_regno, ++ rtx comparison_operator, ++ enum s390_indirect_branch_type type) ++{ ++ enum s390_indirect_branch_option option; ++ ++ if (type == s390_indirect_branch_type_return) ++ { ++ if (s390_function_return_reg != indirect_branch_keep ++ && !s390_return_addr_from_memory ()) ++ option = s390_opt_function_return_reg; ++ ++ if (s390_function_return_mem != indirect_branch_keep ++ && s390_return_addr_from_memory ()) ++ option = s390_opt_function_return_mem; ++ } ++ else if (type == s390_indirect_branch_type_jump) ++ option = s390_opt_indirect_branch_jump; ++ else if (type == s390_indirect_branch_type_call) ++ option = s390_opt_indirect_branch_call; ++ else ++ gcc_unreachable (); ++ ++ if (TARGET_INDIRECT_BRANCH_TABLE) ++ { ++ char label[32]; ++ ++ ASM_GENERATE_INTERNAL_LABEL (label, ++ indirect_branch_table_label[option], ++ indirect_branch_table_label_no[option]++); ++ ASM_OUTPUT_LABEL (asm_out_file, label); ++ } ++ ++ if (return_addr_regno != INVALID_REGNUM) ++ { ++ gcc_assert (comparison_operator == NULL_RTX); ++ fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno); ++ } ++ else ++ { ++ fputs (" \tjg", asm_out_file); ++ if (comparison_operator != NULL_RTX) ++ print_operand (asm_out_file, comparison_operator, 'C'); ++ ++ fputs ("\t", asm_out_file); ++ } ++ ++ if (TARGET_CPU_Z10) ++ fprintf (asm_out_file, ++ TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n", ++ regno); ++ else ++ fprintf (asm_out_file, ++ TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n", ++ INDIRECT_BRANCH_THUNK_REGNUM, regno); ++ ++ if ((option == s390_opt_indirect_branch_jump ++ && s390_indirect_branch_jump == indirect_branch_thunk) ++ || (option == s390_opt_indirect_branch_call ++ && s390_indirect_branch_call == indirect_branch_thunk) ++ || (option == s390_opt_function_return_reg ++ && s390_function_return_reg == indirect_branch_thunk) ++ || (option == s390_opt_function_return_mem ++ && s390_function_return_mem == indirect_branch_thunk)) ++ { ++ if (TARGET_CPU_Z10) ++ indirect_branch_z10thunk_mask |= (1 << regno); ++ else ++ indirect_branch_prez10thunk_mask |= (1 << regno); ++ } ++} ++ + static bool + s390_valid_pointer_mode (enum machine_mode mode) + { +@@ -12476,6 +12607,14 @@ s390_function_ok_for_sibcall (tree decl, + if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl)) + return false; + ++ /* The thunks for indirect branches require r1 if no exrl is ++ available. r1 might not be available when doing a sibling ++ call. */ ++ if (TARGET_INDIRECT_BRANCH_NOBP_CALL ++ && !TARGET_CPU_Z10 ++ && !decl) ++ return false; ++ + /* Register 6 on s390 is available as an argument register but unfortunately + "caller saved". This makes functions needing this register for arguments + not suitable for sibcalls. */ +@@ -12509,9 +12648,13 @@ s390_emit_call (rtx addr_location, rtx t + { + bool plt_call = false; + rtx insn; +- rtx call; +- rtx clobber; +- rtvec vec; ++ rtx vec[4] = { NULL_RTX }; ++ int elts = 0; ++ rtx *call = &vec[0]; ++ rtx *clobber_ret_reg = &vec[1]; ++ rtx *use = &vec[2]; ++ rtx *clobber_thunk_reg = &vec[3]; ++ int i; + + /* Direct function calls need special treatment. */ + if (GET_CODE (addr_location) == SYMBOL_REF) +@@ -12520,7 +12663,7 @@ s390_emit_call (rtx addr_location, rtx t + replace the symbol itself with the PLT stub. */ + if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location)) + { +- if (retaddr_reg != NULL_RTX) ++ if (TARGET_64BIT || retaddr_reg != NULL_RTX) + { + addr_location = gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, addr_location), +@@ -12563,26 +12706,57 @@ s390_emit_call (rtx addr_location, rtx t + addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM); + } + ++ if (TARGET_INDIRECT_BRANCH_NOBP_CALL ++ && GET_CODE (addr_location) != SYMBOL_REF ++ && !plt_call) ++ { ++ /* Indirect branch thunks require the target to be a single GPR. */ ++ addr_location = force_reg (Pmode, addr_location); ++ ++ /* Without exrl the indirect branch thunks need an additional ++ register for larl;ex */ ++ if (!TARGET_CPU_Z10) ++ { ++ *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM); ++ *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg); ++ } ++ } ++ + addr_location = gen_rtx_MEM (QImode, addr_location); +- call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx); ++ *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx); + + if (result_reg != NULL_RTX) +- call = gen_rtx_SET (VOIDmode, result_reg, call); ++ *call = gen_rtx_SET (VOIDmode, result_reg, *call); + + if (retaddr_reg != NULL_RTX) + { +- clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg); ++ *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg); + + if (tls_call != NULL_RTX) +- vec = gen_rtvec (3, call, clobber, +- gen_rtx_USE (VOIDmode, tls_call)); +- else +- vec = gen_rtvec (2, call, clobber); ++ *use = gen_rtx_USE (VOIDmode, tls_call); ++ } ++ ++ for (i = 0; i < 4; i++) ++ if (vec[i] != NULL_RTX) ++ elts++; + +- call = gen_rtx_PARALLEL (VOIDmode, vec); ++ if (elts > 1) ++ { ++ rtvec v; ++ int e = 0; ++ ++ v = rtvec_alloc (elts); ++ for (i = 0; i < 4; i++) ++ if (vec[i] != NULL_RTX) ++ { ++ RTVEC_ELT (v, e) = vec[i]; ++ e++; ++ } ++ ++ *call = gen_rtx_PARALLEL (VOIDmode, v); + } + +- insn = emit_call_insn (call); ++ insn = emit_call_insn (*call); + + /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */ + if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX) +@@ -13819,6 +13993,190 @@ s390_asm_file_end (void) + file_end_indicate_exec_stack (); + } + ++#ifdef HAVE_GAS_HIDDEN ++# define USE_HIDDEN_LINKONCE 1 ++#else ++# define USE_HIDDEN_LINKONCE 0 ++#endif ++ ++/* Output an indirect branch trampoline for target register REGNO. */ ++ ++static void ++s390_output_indirect_thunk_function (unsigned int regno, bool z10_p) ++{ ++ tree decl; ++ char thunk_label[32]; ++ ++ int i; ++ ++ if (z10_p) ++ sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno); ++ else ++ sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX, ++ INDIRECT_BRANCH_THUNK_REGNUM, regno); ++ ++ decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, ++ get_identifier (thunk_label), ++ build_function_type_list (void_type_node, NULL_TREE)); ++ DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, ++ NULL_TREE, void_type_node); ++ TREE_PUBLIC (decl) = 1; ++ TREE_STATIC (decl) = 1; ++ DECL_IGNORED_P (decl) = 1; ++ ++ if (USE_HIDDEN_LINKONCE) ++ { ++ DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl); ++ ++ targetm.asm_out.unique_section (decl, 0); ++ switch_to_section (get_named_section (decl, NULL, 0)); ++ ++ targetm.asm_out.globalize_label (asm_out_file, thunk_label); ++ fputs ("\t.hidden\t", asm_out_file); ++ assemble_name (asm_out_file, thunk_label); ++ putc ('\n', asm_out_file); ++ ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl); ++ } ++ else ++ { ++ switch_to_section (text_section); ++ ASM_OUTPUT_LABEL (asm_out_file, thunk_label); ++ } ++ ++ DECL_INITIAL (decl) = make_node (BLOCK); ++ current_function_decl = decl; ++ allocate_struct_function (decl, false); ++ init_function_start (decl); ++ cfun->is_thunk = true; ++ first_function_block_is_cold = false; ++ final_start_function (emit_barrier (), asm_out_file, 1); ++ ++ /* This makes CFI at least usable for indirect jumps. ++ ++ jumps: stopping in the thunk: backtrace will point to the thunk ++ target is if it was interrupted by a signal ++ ++ calls: Instead of caller->thunk the backtrace will be ++ caller->callee->thunk */ ++ if (flag_asynchronous_unwind_tables) ++ { ++ fputs ("\t.cfi_signal_frame\n", asm_out_file); ++ fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno); ++ for (i = 0; i < FPR15_REGNUM; i++) ++ fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]); ++ } ++ ++ if (z10_p) ++ { ++ /* exrl 0,1f */ ++ ++ /* We generate a thunk for z10 compiled code although z10 is ++ currently not enabled. Tell the assembler to accept the ++ instruction. */ ++ if (!TARGET_CPU_Z10) ++ { ++ fputs ("\t.machine push\n", asm_out_file); ++ fputs ("\t.machine z10\n", asm_out_file); ++ } ++ /* We use exrl even if -mzarch hasn't been specified on the ++ command line so we have to tell the assembler to accept ++ it. */ ++ if (!TARGET_ZARCH) ++ fputs ("\t.machinemode zarch\n", asm_out_file); ++ ++ fputs ("\texrl\t0,1f\n", asm_out_file); ++ ++ if (!TARGET_ZARCH) ++ fputs ("\t.machinemode esa\n", asm_out_file); ++ ++ if (!TARGET_CPU_Z10) ++ fputs ("\t.machine pop\n", asm_out_file); ++ } ++ else if (TARGET_CPU_ZARCH) ++ { ++ /* larl %r1,1f */ ++ fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n", ++ INDIRECT_BRANCH_THUNK_REGNUM); ++ ++ /* ex 0,0(%r1) */ ++ fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n", ++ INDIRECT_BRANCH_THUNK_REGNUM); ++ } ++ else ++ gcc_unreachable (); ++ ++ /* 0: j 0b */ ++ fputs ("0:\tj\t0b\n", asm_out_file); ++ ++ /* 1: br */ ++ fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno); ++ ++ final_end_function (); ++ init_insn_lengths (); ++ free_after_compilation (cfun); ++ set_cfun (NULL); ++ current_function_decl = NULL; ++} ++ ++/* Implement the asm.code_end target hook. */ ++ ++static void ++s390_code_end (void) ++{ ++ int i; ++ ++ for (i = 1; i < 16; i++) ++ { ++ if (indirect_branch_z10thunk_mask & (1 << i)) ++ s390_output_indirect_thunk_function (i, true); ++ ++ if (indirect_branch_prez10thunk_mask & (1 << i)) ++ s390_output_indirect_thunk_function (i, false); ++ } ++ ++ if (TARGET_INDIRECT_BRANCH_TABLE) ++ { ++ int o; ++ int i; ++ ++ for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++) ++ { ++ if (indirect_branch_table_label_no[o] == 0) ++ continue; ++ ++ switch_to_section (get_section (indirect_branch_table_name[o], ++ 0, ++ NULL_TREE)); ++ for (i = 0; i < indirect_branch_table_label_no[o]; i++) ++ { ++ char label_start[32]; ++ ++ ASM_GENERATE_INTERNAL_LABEL (label_start, ++ indirect_branch_table_label[o], i); ++ ++ fputs ("\t.long\t", asm_out_file); ++ assemble_name_raw (asm_out_file, label_start); ++ fputs ("-.\n", asm_out_file); ++ } ++ switch_to_section (current_function_section ()); ++ } ++ } ++} ++ ++/* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */ ++ ++unsigned int ++s390_case_values_threshold (void) ++{ ++ /* Disabling branch prediction for indirect jumps makes jump table ++ much more expensive. */ ++ if (TARGET_INDIRECT_BRANCH_NOBP_JUMP) ++ return 20; ++ ++ return default_case_values_threshold (); ++} ++ ++ + /* Initialize GCC target structure. */ + + #undef TARGET_ASM_ALIGNED_HI_OP +@@ -14015,6 +14373,12 @@ s390_asm_file_end (void) + #undef TARGET_ASM_FILE_END + #define TARGET_ASM_FILE_END s390_asm_file_end + ++#undef TARGET_ASM_CODE_END ++#define TARGET_ASM_CODE_END s390_code_end ++ ++#undef TARGET_CASE_VALUES_THRESHOLD ++#define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + #include "gt-s390.h" +diff -Nrup gcc/config/s390/s390.h gcc/config/s390/s390.h +--- gcc/config/s390/s390.h 2018-03-27 09:33:19.762143683 -0600 ++++ gcc/config/s390/s390.h 2018-03-27 09:33:58.827861602 -0600 +@@ -1006,4 +1006,37 @@ extern const int processor_flags_table[] + s390_register_target_pragmas (); \ + } while (0) + ++ ++#define TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION \ ++ (s390_function_return_reg != indirect_branch_keep \ ++ || s390_function_return_mem != indirect_branch_keep) ++ ++#define TARGET_INDIRECT_BRANCH_NOBP_RET \ ++ ((s390_function_return_reg != indirect_branch_keep \ ++ && !s390_return_addr_from_memory ()) \ ++ || (s390_function_return_mem != indirect_branch_keep \ ++ && s390_return_addr_from_memory ())) ++ ++#define TARGET_INDIRECT_BRANCH_NOBP_JUMP \ ++ (s390_indirect_branch_jump != indirect_branch_keep) ++ ++#define TARGET_INDIRECT_BRANCH_NOBP_JUMP_THUNK \ ++ (s390_indirect_branch_jump == indirect_branch_thunk \ ++ || s390_indirect_branch_jump == indirect_branch_thunk_extern) ++ ++#define TARGET_INDIRECT_BRANCH_NOBP_JUMP_INLINE_THUNK \ ++ (s390_indirect_branch_jump == indirect_branch_thunk_inline) ++ ++#define TARGET_INDIRECT_BRANCH_NOBP_CALL \ ++ (s390_indirect_branch_call != indirect_branch_keep) ++ ++#ifndef TARGET_DEFAULT_INDIRECT_BRANCH_TABLE ++#define TARGET_DEFAULT_INDIRECT_BRANCH_TABLE 0 ++#endif ++ ++#define TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "__s390_indirect_jump_r%d" ++#define TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "__s390_indirect_jump_r%duse_r%d" ++ ++#define TARGET_INDIRECT_BRANCH_TABLE s390_indirect_branch_table ++ + #endif /* S390_H */ +diff -Nrup gcc/config/s390/s390.md gcc/config/s390/s390.md +--- gcc/config/s390/s390.md 2018-03-27 09:33:19.763143675 -0600 ++++ gcc/config/s390/s390.md 2018-03-27 09:33:58.831861573 -0600 +@@ -285,6 +285,8 @@ + [ + ; Sibling call register. + (SIBCALL_REGNUM 1) ++ ; A call-clobbered reg which can be used in indirect branch thunks ++ (INDIRECT_BRANCH_THUNK_REGNUM 1) + ; Literal pool base register. + (BASE_REGNUM 13) + ; Return address register. +@@ -304,6 +306,7 @@ + ; Floating point registers. + (FPR0_REGNUM 16) + (FPR2_REGNUM 18) ++ (FPR15_REGNUM 31) + (VR0_REGNUM 16) + (VR16_REGNUM 38) + (VR23_REGNUM 45) +@@ -402,7 +405,10 @@ + z196_cracked" + (const_string "none")) + +-(define_attr "mnemonic" "bcr_flush,unknown" (const_string "unknown")) ++; mnemonics which only get defined through if_then_else currently ++; don't get added to the list values automatically and hence need to ++; be listed here. ++(define_attr "mnemonic" "b,br,bas,bc,bcr,bcr_flush,unknown" (const_string "unknown")) + + ;; Length in bytes. + +@@ -8436,7 +8442,7 @@ + (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)]) + (match_operand 0 "address_operand" "ZQZR") + (pc)))] +- "" ++ "!TARGET_INDIRECT_BRANCH_NOBP_JUMP" + { + if (get_attr_op_type (insn) == OP_TYPE_RR) + return "b%C1r\t%0"; +@@ -8446,6 +8452,9 @@ + [(set (attr "op_type") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "RR") (const_string "RX"))) ++ (set (attr "mnemonic") ++ (if_then_else (match_operand 0 "register_operand" "") ++ (const_string "bcr") (const_string "bc"))) + (set_attr "type" "branch") + (set_attr "atype" "agen")]) + +@@ -8499,7 +8508,7 @@ + (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)]) + (pc) + (match_operand 0 "address_operand" "ZQZR")))] +- "" ++ "!TARGET_INDIRECT_BRANCH_NOBP_JUMP" + { + if (get_attr_op_type (insn) == OP_TYPE_RR) + return "b%D1r\t%0"; +@@ -8509,6 +8518,9 @@ + [(set (attr "op_type") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "RR") (const_string "RX"))) ++ (set (attr "mnemonic") ++ (if_then_else (match_operand 0 "register_operand" "") ++ (const_string "bcr") (const_string "bc"))) + (set_attr "type" "branch") + (set_attr "atype" "agen")]) + +@@ -9005,29 +9017,125 @@ + ; indirect-jump instruction pattern(s). + ; + +-(define_insn "indirect_jump" +- [(set (pc) (match_operand 0 "address_operand" "ZQZR"))] +- "" ++(define_expand "indirect_jump" ++ [(set (pc) (match_operand 0 "address_operand" "ZQZR"))] ++ "" ++{ ++ if (TARGET_INDIRECT_BRANCH_NOBP_JUMP_THUNK) ++ { ++ operands[0] = force_reg (Pmode, operands[0]); ++ if (TARGET_CPU_Z10) ++ { ++ if (TARGET_64BIT) ++ emit_jump_insn (gen_indirect_jump_via_thunkdi_z10 (operands[0])); ++ else ++ emit_jump_insn (gen_indirect_jump_via_thunksi_z10 (operands[0])); ++ } ++ else ++ { ++ if (TARGET_64BIT) ++ emit_jump_insn (gen_indirect_jump_via_thunkdi (operands[0])); ++ else ++ emit_jump_insn (gen_indirect_jump_via_thunksi (operands[0])); ++ } ++ DONE; ++ } ++}) ++ ++(define_insn "*indirect_jump" ++ [(set (pc) ++ (match_operand 0 "address_operand" "ZR"))] ++ "!TARGET_INDIRECT_BRANCH_NOBP_JUMP_THUNK" + { + if (get_attr_op_type (insn) == OP_TYPE_RR) + return "br\t%0"; + else + return "b\t%a0"; + } +- [(set (attr "op_type") +- (if_then_else (match_operand 0 "register_operand" "") +- (const_string "RR") (const_string "RX"))) +- (set_attr "type" "branch") +- (set_attr "atype" "agen")]) ++ [(set (attr "op_type") ++ (if_then_else (match_operand 0 "register_operand" "") ++ (const_string "RR") (const_string "RX"))) ++ (set (attr "mnemonic") ++ (if_then_else (match_operand 0 "register_operand" "") ++ (const_string "br") (const_string "b"))) ++ (set_attr "type" "branch") ++ (set_attr "atype" "agen")]) ++ ++(define_insn "indirect_jump_via_thunk_z10" ++ [(set (pc) ++ (match_operand:P 0 "register_operand" "a"))] ++ "TARGET_INDIRECT_BRANCH_NOBP_JUMP_THUNK ++ && TARGET_CPU_Z10" ++{ ++ s390_indirect_branch_via_thunk (REGNO (operands[0]), ++ INVALID_REGNUM, ++ NULL_RTX, ++ s390_indirect_branch_type_jump); ++ return ""; ++} ++ [(set_attr "op_type" "RIL") ++ (set_attr "mnemonic" "jg") ++ (set_attr "type" "branch") ++ (set_attr "atype" "agen")]) ++ ++(define_insn "indirect_jump_via_thunk" ++ [(set (pc) ++ (match_operand:P 0 "register_operand" " a")) ++ (clobber (reg:P INDIRECT_BRANCH_THUNK_REGNUM))] ++ "TARGET_INDIRECT_BRANCH_NOBP_JUMP_THUNK ++ && !TARGET_CPU_Z10" ++{ ++ s390_indirect_branch_via_thunk (REGNO (operands[0]), ++ INVALID_REGNUM, ++ NULL_RTX, ++ s390_indirect_branch_type_jump); ++ return ""; ++} ++ [(set_attr "op_type" "RIL") ++ (set_attr "mnemonic" "jg") ++ (set_attr "type" "branch") ++ (set_attr "atype" "agen")]) + + ; + ; casesi instruction pattern(s). + ; + +-(define_insn "casesi_jump" +- [(set (pc) (match_operand 0 "address_operand" "ZQZR")) +- (use (label_ref (match_operand 1 "" "")))] +- "" ++(define_expand "casesi_jump" ++ [(parallel ++ [(set (pc) (match_operand 0 "address_operand")) ++ (use (label_ref (match_operand 1 "")))])] ++ "" ++{ ++ if (TARGET_INDIRECT_BRANCH_NOBP_JUMP_THUNK) ++ { ++ operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); ++ ++ if (TARGET_CPU_Z10) ++ { ++ if (TARGET_64BIT) ++ emit_jump_insn (gen_casesi_jump_via_thunkdi_z10 (operands[0], ++ operands[1])); ++ else ++ emit_jump_insn (gen_casesi_jump_via_thunksi_z10 (operands[0], ++ operands[1])); ++ } ++ else ++ { ++ if (TARGET_64BIT) ++ emit_jump_insn (gen_casesi_jump_via_thunkdi (operands[0], ++ operands[1])); ++ else ++ emit_jump_insn (gen_casesi_jump_via_thunksi (operands[0], ++ operands[1])); ++ } ++ DONE; ++ } ++}) ++ ++(define_insn "*casesi_jump" ++ [(set (pc) (match_operand 0 "address_operand" "ZR")) ++ (use (label_ref (match_operand 1 "" "")))] ++ "!TARGET_INDIRECT_BRANCH_NOBP_JUMP_THUNK" + { + if (get_attr_op_type (insn) == OP_TYPE_RR) + return "br\t%0"; +@@ -9035,11 +9143,50 @@ + return "b\t%a0"; + } + [(set (attr "op_type") ++ (if_then_else (match_operand 0 "register_operand" "") ++ (const_string "RR") (const_string "RX"))) ++ (set (attr "mnemonic") + (if_then_else (match_operand 0 "register_operand" "") +- (const_string "RR") (const_string "RX"))) ++ (const_string "br") (const_string "b"))) ++ (set_attr "type" "branch") ++ (set_attr "atype" "agen")]) ++ ++(define_insn "casesi_jump_via_thunk_z10" ++ [(set (pc) (match_operand:P 0 "register_operand" "a")) ++ (use (label_ref (match_operand 1 "" "")))] ++ "TARGET_INDIRECT_BRANCH_NOBP_JUMP_THUNK ++ && TARGET_CPU_Z10" ++{ ++ s390_indirect_branch_via_thunk (REGNO (operands[0]), ++ INVALID_REGNUM, ++ NULL_RTX, ++ s390_indirect_branch_type_jump); ++ return ""; ++} ++ [(set_attr "op_type" "RIL") ++ (set_attr "mnemonic" "jg") + (set_attr "type" "branch") + (set_attr "atype" "agen")]) + ++(define_insn "casesi_jump_via_thunk" ++ [(set (pc) (match_operand:P 0 "register_operand" "a")) ++ (use (label_ref (match_operand 1 "" ""))) ++ (clobber (reg:P INDIRECT_BRANCH_THUNK_REGNUM))] ++ "TARGET_INDIRECT_BRANCH_NOBP_JUMP_THUNK ++ && !TARGET_CPU_Z10" ++{ ++ s390_indirect_branch_via_thunk (REGNO (operands[0]), ++ INVALID_REGNUM, ++ NULL_RTX, ++ s390_indirect_branch_type_jump); ++ return ""; ++} ++ [(set_attr "op_type" "RIL") ++ (set_attr "mnemonic" "jg") ++ (set_attr "type" "branch") ++ (set_attr "atype" "agen")]) ++ ++ + (define_expand "casesi" + [(match_operand:SI 0 "general_operand" "") + (match_operand:SI 1 "general_operand" "") +@@ -9141,11 +9288,30 @@ + + (define_insn "*sibcall_br" + [(call (mem:QI (reg SIBCALL_REGNUM)) +- (match_operand 0 "const_int_operand" "n"))] ++ (match_operand 0 "const_int_operand" "n"))] + "SIBLING_CALL_P (insn) +- && GET_MODE (XEXP (XEXP (PATTERN (insn), 0), 0)) == Pmode" +- "br\t%%r1" +- [(set_attr "op_type" "RR") ++ && GET_MODE (XEXP (XEXP (PATTERN (insn), 0), 0)) == Pmode" ++{ ++ if (TARGET_INDIRECT_BRANCH_NOBP_CALL) ++ { ++ gcc_assert (TARGET_CPU_Z10); ++ s390_indirect_branch_via_thunk (SIBCALL_REGNUM, ++ INVALID_REGNUM, ++ NULL_RTX, ++ s390_indirect_branch_type_call); ++ return ""; ++ } ++ else ++ return "br\t%%r1"; ++} ++ [(set (attr "op_type") ++ (if_then_else (match_test "TARGET_INDIRECT_BRANCH_NOBP_CALL") ++ (const_string "RIL") ++ (const_string "RR"))) ++ (set (attr "mnemonic") ++ (if_then_else (match_test "TARGET_INDIRECT_BRANCH_NOBP_CALL") ++ (const_string "jg") ++ (const_string "br"))) + (set_attr "type" "branch") + (set_attr "atype" "agen")]) + +@@ -9185,8 +9351,27 @@ + (match_operand 1 "const_int_operand" "n")))] + "SIBLING_CALL_P (insn) + && GET_MODE (XEXP (XEXP (XEXP (PATTERN (insn), 1), 0), 0)) == Pmode" +- "br\t%%r1" +- [(set_attr "op_type" "RR") ++{ ++ if (TARGET_INDIRECT_BRANCH_NOBP_CALL) ++ { ++ gcc_assert (TARGET_CPU_Z10); ++ s390_indirect_branch_via_thunk (SIBCALL_REGNUM, ++ INVALID_REGNUM, ++ NULL_RTX, ++ s390_indirect_branch_type_call); ++ return ""; ++ } ++ else ++ return "br\t%%r1"; ++} ++ [(set (attr "op_type") ++ (if_then_else (match_test "TARGET_INDIRECT_BRANCH_NOBP_CALL") ++ (const_string "RIL") ++ (const_string "RR"))) ++ (set (attr "mnemonic") ++ (if_then_else (match_test "TARGET_INDIRECT_BRANCH_NOBP_CALL") ++ (const_string "jg") ++ (const_string "br"))) + (set_attr "type" "branch") + (set_attr "atype" "agen")]) + +@@ -9252,7 +9437,9 @@ + [(call (mem:QI (match_operand 0 "address_operand" "ZQZR")) + (match_operand 1 "const_int_operand" "n")) + (clobber (match_operand 2 "register_operand" "=r"))] +- "!SIBLING_CALL_P (insn) && GET_MODE (operands[2]) == Pmode" ++ "!TARGET_INDIRECT_BRANCH_NOBP_CALL ++ && !SIBLING_CALL_P (insn) ++ && GET_MODE (operands[2]) == Pmode" + { + if (get_attr_op_type (insn) == OP_TYPE_RR) + return "basr\t%2,%0"; +@@ -9262,6 +9449,50 @@ + [(set (attr "op_type") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "RR") (const_string "RX"))) ++ (set (attr "mnemonic") ++ (if_then_else (match_operand 0 "register_operand" "") ++ (const_string "basr") (const_string "bas"))) ++ (set_attr "type" "jsr") ++ (set_attr "atype" "agen") ++ (set_attr "z196prop" "z196_cracked")]) ++ ++(define_insn "*basr_via_thunk_z10" ++ [(call (mem:QI (match_operand:P 0 "register_operand" "a")) ++ (match_operand 1 "const_int_operand" "n")) ++ (clobber (match_operand:P 2 "register_operand" "=&r"))] ++ "TARGET_INDIRECT_BRANCH_NOBP_CALL ++ && TARGET_CPU_Z10 ++ && !SIBLING_CALL_P (insn)" ++{ ++ s390_indirect_branch_via_thunk (REGNO (operands[0]), ++ REGNO (operands[2]), ++ NULL_RTX, ++ s390_indirect_branch_type_call); ++ return ""; ++} ++ [(set_attr "op_type" "RIL") ++ (set_attr "mnemonic" "brasl") ++ (set_attr "type" "jsr") ++ (set_attr "atype" "agen") ++ (set_attr "z196prop" "z196_cracked")]) ++ ++(define_insn "*basr_via_thunk" ++ [(call (mem:QI (match_operand:P 0 "register_operand" "a")) ++ (match_operand 1 "const_int_operand" "n")) ++ (clobber (match_operand:P 2 "register_operand" "=&r")) ++ (clobber (reg:P INDIRECT_BRANCH_THUNK_REGNUM))] ++ "TARGET_INDIRECT_BRANCH_NOBP_CALL ++ && !TARGET_CPU_Z10 ++ && !SIBLING_CALL_P (insn)" ++{ ++ s390_indirect_branch_via_thunk (REGNO (operands[0]), ++ REGNO (operands[2]), ++ NULL_RTX, ++ s390_indirect_branch_type_call); ++ return ""; ++} ++ [(set_attr "op_type" "RIL") ++ (set_attr "mnemonic" "brasl") + (set_attr "type" "jsr") + (set_attr "atype" "agen") + (set_attr "z196prop" "z196_cracked")]) +@@ -9313,7 +9544,10 @@ + (call (mem:QI (match_operand 1 "address_operand" "ZQZR")) + (match_operand 2 "const_int_operand" "n"))) + (clobber (match_operand 3 "register_operand" "=r"))] +- "!SIBLING_CALL_P (insn) && GET_MODE (operands[3]) == Pmode" ++ "!TARGET_INDIRECT_BRANCH_NOBP_CALL ++ && !SIBLING_CALL_P (insn) ++ && GET_MODE (operands[3]) == Pmode" ++ + { + if (get_attr_op_type (insn) == OP_TYPE_RR) + return "basr\t%3,%1"; +@@ -9323,6 +9557,54 @@ + [(set (attr "op_type") + (if_then_else (match_operand 1 "register_operand" "") + (const_string "RR") (const_string "RX"))) ++ (set (attr "mnemonic") ++ (if_then_else (match_operand 1 "register_operand" "") ++ (const_string "basr") (const_string "bas"))) ++ (set_attr "type" "jsr") ++ (set_attr "atype" "agen") ++ (set_attr "z196prop" "z196_cracked")]) ++ ++(define_insn "*basr_r_via_thunk_z10" ++ [(set (match_operand 0 "" "") ++ (call (mem:QI (match_operand 1 "register_operand" "a")) ++ (match_operand 2 "const_int_operand" "n"))) ++ (clobber (match_operand 3 "register_operand" "=&r"))] ++ "TARGET_INDIRECT_BRANCH_NOBP_CALL ++ && TARGET_CPU_Z10 ++ && !SIBLING_CALL_P (insn) ++ && GET_MODE (operands[3]) == Pmode" ++{ ++ s390_indirect_branch_via_thunk (REGNO (operands[1]), ++ REGNO (operands[3]), ++ NULL_RTX, ++ s390_indirect_branch_type_call); ++ return ""; ++} ++ [(set_attr "op_type" "RIL") ++ (set_attr "mnemonic" "brasl") ++ (set_attr "type" "jsr") ++ (set_attr "atype" "agen") ++ (set_attr "z196prop" "z196_cracked")]) ++ ++(define_insn "*basr_r_via_thunk" ++ [(set (match_operand 0 "" "") ++ (call (mem:QI (match_operand 1 "register_operand" "a")) ++ (match_operand 2 "const_int_operand" "n"))) ++ (clobber (match_operand 3 "register_operand" "=&r")) ++ (clobber (reg:P INDIRECT_BRANCH_THUNK_REGNUM))] ++ "TARGET_INDIRECT_BRANCH_NOBP_CALL ++ && !TARGET_CPU_Z10 ++ && !SIBLING_CALL_P (insn) ++ && GET_MODE (operands[3]) == Pmode" ++{ ++ s390_indirect_branch_via_thunk (REGNO (operands[1]), ++ REGNO (operands[3]), ++ NULL_RTX, ++ s390_indirect_branch_type_call); ++ return ""; ++} ++ [(set_attr "op_type" "RIL") ++ (set_attr "mnemonic" "brasl") + (set_attr "type" "jsr") + (set_attr "atype" "agen") + (set_attr "z196prop" "z196_cracked")]) +@@ -10056,15 +10338,78 @@ + "" + "s390_emit_epilogue (true); DONE;") + +-(define_insn "*return" ++(define_expand "return_use" ++ [(parallel ++ [(return) ++ (use (match_operand 0 "register_operand" "a"))])] ++ "" ++{ ++ if (!TARGET_CPU_Z10 ++ && TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION) ++ { ++ if (TARGET_64BIT) ++ emit_jump_insn (gen_returndi_prez10 (operands[0])); ++ else ++ emit_jump_insn (gen_returnsi_prez10 (operands[0])); ++ DONE; ++ } ++}) ++ ++(define_insn "*return" + [(return) +- (use (match_operand 0 "register_operand" "a"))] +- "GET_MODE (operands[0]) == Pmode" +- "br\t%0" +- [(set_attr "op_type" "RR") ++ (use (match_operand:P 0 "register_operand" "a"))] ++ "TARGET_CPU_Z10 || !TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION" ++{ ++ if (TARGET_INDIRECT_BRANCH_NOBP_RET) ++ { ++ s390_indirect_branch_via_thunk (REGNO (operands[0]), ++ INVALID_REGNUM, ++ NULL_RTX, ++ s390_indirect_branch_type_return); ++ return ""; ++ } ++ else ++ return "br\t%0"; ++} ++ [(set (attr "op_type") ++ (if_then_else (match_test "TARGET_INDIRECT_BRANCH_NOBP_RET") ++ (const_string "RIL") ++ (const_string "RR"))) ++ (set (attr "mnemonic") ++ (if_then_else (match_test "TARGET_INDIRECT_BRANCH_NOBP_RET") ++ (const_string "jg") ++ (const_string "br"))) + (set_attr "type" "jsr") + (set_attr "atype" "agen")]) + ++(define_insn "return_prez10" ++ [(return) ++ (use (match_operand:P 0 "register_operand" "a")) ++ (clobber (reg:P INDIRECT_BRANCH_THUNK_REGNUM))] ++ "!TARGET_CPU_Z10 && TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION" ++{ ++ if (TARGET_INDIRECT_BRANCH_NOBP_RET) ++ { ++ s390_indirect_branch_via_thunk (REGNO (operands[0]), ++ INVALID_REGNUM, ++ NULL_RTX, ++ s390_indirect_branch_type_return); ++ return ""; ++ } ++ else ++ return "br\t%0"; ++} ++ [(set (attr "op_type") ++ (if_then_else (match_test "TARGET_INDIRECT_BRANCH_NOBP_RET") ++ (const_string "RIL") ++ (const_string "RR"))) ++ (set (attr "mnemonic") ++ (if_then_else (match_test "TARGET_INDIRECT_BRANCH_NOBP_RET") ++ (const_string "jg") ++ (const_string "br"))) ++ (set_attr "type" "jsr") ++ (set_attr "atype" "agen")]) ++ + + ;; Instruction definition to extend a 31-bit pointer into a 64-bit + ;; pointer. This is used for compatibility. +diff -Nrup gcc/config/s390/s390.opt gcc/config/s390/s390.opt +--- gcc/config/s390/s390.opt 2018-03-27 09:33:19.763143675 -0600 ++++ gcc/config/s390/s390.opt 2018-03-27 09:33:58.832861566 -0600 +@@ -175,3 +175,59 @@ Target Report Joined RejectNegative UInt + Set the branch costs for conditional branch instructions. Reasonable + values are small, non-negative integers. The default branch cost is + 1. ++ ++mindirect-branch= ++Target Report RejectNegative Joined Enum(indirect_branch) Var(s390_indirect_branch) Init(indirect_branch_keep) ++Wrap all indirect branches into execute in order to disable branch ++prediction. ++ ++mindirect-branch-jump= ++Target Report RejectNegative Joined Enum(indirect_branch) Var(s390_indirect_branch_jump) Init(indirect_branch_keep) ++Wrap indirect table jumps and computed gotos into execute in order to ++disable branch prediction. Using thunk or thunk-extern with this ++option requires the thunks to be considered signal handlers to order to ++generate correct CFI. For environments where unwinding (e.g. for ++exceptions) is required please use thunk-inline instead. ++ ++mindirect-branch-call= ++Target Report RejectNegative Joined Enum(indirect_branch) Var(s390_indirect_branch_call) Init(indirect_branch_keep) ++Wrap all indirect calls into execute in order to disable branch prediction. ++ ++mfunction-return= ++Target Report RejectNegative Joined Enum(indirect_branch) Var(s390_function_return) Init(indirect_branch_keep) ++Wrap all indirect return branches into execute in order to disable branch ++prediction. ++ ++mfunction-return-mem= ++Target Report RejectNegative Joined Enum(indirect_branch) Var(s390_function_return_mem) Init(indirect_branch_keep) ++Wrap indirect return branches into execute in order to disable branch ++prediction. This affects only branches where the return address is ++going to be restored from memory. ++ ++mfunction-return-reg= ++Target Report RejectNegative Joined Enum(indirect_branch) Var(s390_function_return_reg) Init(indirect_branch_keep) ++Wrap indirect return branches into execute in order to disable branch ++prediction. This affects only branches where the return address ++doesn't need to be restored from memory. ++ ++Enum ++Name(indirect_branch) Type(enum indirect_branch) ++Known indirect branch choices (for use with the -mindirect-branch=/-mfunction-return= options): ++ ++EnumValue ++Enum(indirect_branch) String(keep) Value(indirect_branch_keep) ++ ++EnumValue ++Enum(indirect_branch) String(thunk) Value(indirect_branch_thunk) ++ ++EnumValue ++Enum(indirect_branch) String(thunk-extern) Value(indirect_branch_thunk_extern) ++ ++mindirect-branch-table ++Target Report Var(s390_indirect_branch_table) Init(TARGET_DEFAULT_INDIRECT_BRANCH_TABLE) ++Generate sections .s390_indirect_jump, .s390_indirect_call, ++.s390_return_reg, and .s390_return_mem to contain the indirect branch ++locations which have been patched as part of using one of the ++-mindirect-branch* or -mfunction-return* options. The sections ++consist of an array of 32 bit elements. Each entry holds the offset ++from the entry to the patched location. +diff -Nrup gcc/config/s390/s390-opts.h gcc/config/s390/s390-opts.h +--- gcc/config/s390/s390-opts.h 2018-03-27 09:33:19.764143668 -0600 ++++ gcc/config/s390/s390-opts.h 2018-03-27 09:33:58.821861645 -0600 +@@ -39,4 +39,12 @@ enum processor_type + PROCESSOR_max + }; + ++/* Values for -mindirect-branch and -mfunction-return options. */ ++enum indirect_branch { ++ indirect_branch_unset = 0, ++ indirect_branch_keep, ++ indirect_branch_thunk, ++ indirect_branch_thunk_extern ++}; ++ + #endif +diff -Nrup gcc/config/s390/s390-protos.h gcc/config/s390/s390-protos.h +--- gcc/config/s390/s390-protos.h 2018-03-27 09:33:19.764143668 -0600 ++++ gcc/config/s390/s390-protos.h 2018-03-27 09:33:58.821861645 -0600 +@@ -41,6 +41,7 @@ extern void s390_set_has_landing_pad_p ( + extern bool s390_hard_regno_mode_ok (unsigned int, enum machine_mode); + extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int); + extern int s390_class_max_nregs (enum reg_class, enum machine_mode); ++extern bool s390_return_addr_from_memory(void); + extern int s390_cannot_change_mode_class (enum machine_mode, enum machine_mode, + enum reg_class); + extern bool s390_function_arg_vector (enum machine_mode, const_tree); +@@ -124,6 +125,18 @@ extern int s390_compare_and_branch_condi + extern bool s390_extzv_shift_ok (int, int, unsigned HOST_WIDE_INT); + extern void s390_asm_output_function_label (FILE *, const char *, tree); + ++enum s390_indirect_branch_type ++ { ++ s390_indirect_branch_type_jump = 0, ++ s390_indirect_branch_type_call, ++ s390_indirect_branch_type_return ++ }; ++extern void s390_indirect_branch_via_thunk (unsigned int regno, ++ unsigned int return_addr_regno, ++ rtx comparison_operator, ++ enum s390_indirect_branch_type type); ++extern void s390_indirect_branch_via_inline_thunk (rtx execute_target); ++ + #endif /* RTX_CODE */ + + /* s390-c.c routines */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-function-pointer-nothunk.c gcc/testsuite/gcc.target/s390/nobp-function-pointer-nothunk.c +--- gcc/testsuite/gcc.target/s390/nobp-function-pointer-nothunk.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-function-pointer-nothunk.c 2018-03-27 09:33:58.832861566 -0600 +@@ -0,0 +1,59 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -march=z10 --save-temps -mindirect-branch-call=thunk-extern -mindirect-branch-table" } */ ++ ++int gl; ++ ++void __attribute__((noinline,noclone)) ++foo (int a) ++{ ++ gl = a + 40; ++} ++ ++int __attribute__((noinline,noclone)) ++foo_value (int a) ++{ ++ return a + 40; ++} ++ ++void* __attribute__((noinline,noclone)) ++get_fptr (int a) ++{ ++ switch (a) ++ { ++ case 0: return &foo; break; ++ case 1: return &foo_value; break; ++ default: __builtin_abort (); ++ } ++} ++ ++void (*f) (int); ++int (*g) (int); ++ ++int ++main () ++{ ++ int res; ++ ++ f = get_fptr(0); ++ f (2); ++ if (gl != 42) ++ __builtin_abort (); ++ ++ g = get_fptr(1); ++ if (g (2) != 42) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* 2 x main ++/* { dg-final { scan-assembler-times "brasl\t%r\[0-9\]*,__s390_indirect_jump" 2 } } */ ++ ++/* No thunks due to thunk-extern. */ ++/* { dg-final { scan-assembler-not "exrl" } } */ ++/* { dg-final { scan-assembler-not ".globl __s390_indirect_jump" } } */ ++ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-function-pointer-z10.c gcc/testsuite/gcc.target/s390/nobp-function-pointer-z10.c +--- gcc/testsuite/gcc.target/s390/nobp-function-pointer-z10.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-function-pointer-z10.c 2018-03-27 09:33:58.833861558 -0600 +@@ -0,0 +1,56 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -march=z10 --save-temps -mindirect-branch-call=thunk -mindirect-branch-table" } */ ++ ++int gl; ++ ++void __attribute__((noinline,noclone)) ++foo (int a) ++{ ++ gl = a + 40; ++} ++ ++int __attribute__((noinline,noclone)) ++foo_value (int a) ++{ ++ return a + 40; ++} ++ ++void* __attribute__((noinline,noclone)) ++get_fptr (int a) ++{ ++ switch (a) ++ { ++ case 0: return &foo; break; ++ case 1: return &foo_value; break; ++ default: __builtin_abort (); ++ } ++} ++ ++void (*f) (int); ++int (*g) (int); ++ ++int ++main () ++{ ++ int res; ++ ++ f = get_fptr(0); ++ f (2); ++ if (gl != 42) ++ __builtin_abort (); ++ ++ g = get_fptr(1); ++ if (g (2) != 42) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* 2 x main ++/* { dg-final { scan-assembler-times "brasl\t%r\[0-9\]*,__s390_indirect_jump" 2 } } */ ++/* { dg-final { scan-assembler "exrl" } } */ ++ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-function-pointer-z900.c gcc/testsuite/gcc.target/s390/nobp-function-pointer-z900.c +--- gcc/testsuite/gcc.target/s390/nobp-function-pointer-z900.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-function-pointer-z900.c 2018-03-27 09:33:58.833861558 -0600 +@@ -0,0 +1,56 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -march=z900 --save-temps -mindirect-branch-call=thunk -mindirect-branch-table" } */ ++ ++int gl; ++ ++void __attribute__((noinline,noclone)) ++foo (int a) ++{ ++ gl = a + 40; ++} ++ ++int __attribute__((noinline,noclone)) ++foo_value (int a) ++{ ++ return a + 40; ++} ++ ++void* __attribute__((noinline,noclone)) ++get_fptr (int a) ++{ ++ switch (a) ++ { ++ case 0: return &foo; break; ++ case 1: return &foo_value; break; ++ default: __builtin_abort (); ++ } ++} ++ ++void (*f) (int); ++int (*g) (int); ++ ++int ++main () ++{ ++ int res; ++ ++ f = get_fptr(0); ++ f (2); ++ if (gl != 42) ++ __builtin_abort (); ++ ++ g = get_fptr(1); ++ if (g (2) != 42) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* 2 x main ++/* { dg-final { scan-assembler-times "brasl\t%r\[0-9\]*,__s390_indirect_jump" 2 } } */ ++/* { dg-final { scan-assembler "ex\t" } } */ ++ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-indirect-jump-nothunk.c gcc/testsuite/gcc.target/s390/nobp-indirect-jump-nothunk.c +--- gcc/testsuite/gcc.target/s390/nobp-indirect-jump-nothunk.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-indirect-jump-nothunk.c 2018-03-27 09:33:58.833861558 -0600 +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -march=z10 --save-temps -mindirect-branch-jump=thunk-extern -mindirect-branch-table" } */ ++ ++/* This is a copy of the gcc.c-torture/execute/20040302-1.c ++ testcase. */ ++ ++int code[]={0,0,0,0,1}; ++ ++void ++foo(int x) { ++ volatile int b; ++ b = 0xffffffff; ++} ++ ++void ++bar(int *pc) { ++ static const void *l[] = {&&lab0, &&end}; ++ ++ foo(0); ++ goto *l[*pc]; ++ lab0: ++ foo(0); ++ pc++; ++ goto *l[*pc]; ++ end: ++ return; ++} ++ ++int ++main() { ++ bar(code); ++ return 0; ++} ++ ++/* 2 x bar ++/* { dg-final { scan-assembler-times "jg\t__s390_indirect_jump" 2 } } */ ++ ++/* No thunks due to thunk-extern. */ ++/* { dg-final { scan-assembler-not "exrl" } } */ ++/* { dg-final { scan-assembler-not ".globl __s390_indirect_jump" } } */ ++ ++/* { dg-final { scan-assembler "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-indirect-jump-z10.c gcc/testsuite/gcc.target/s390/nobp-indirect-jump-z10.c +--- gcc/testsuite/gcc.target/s390/nobp-indirect-jump-z10.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-indirect-jump-z10.c 2018-03-27 09:33:58.834861551 -0600 +@@ -0,0 +1,42 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -march=z10 --save-temps -mindirect-branch-jump=thunk -mindirect-branch-table" } */ ++ ++/* This is a copy of the gcc.c-torture/execute/20040302-1.c ++ testcase. */ ++ ++int code[]={0,0,0,0,1}; ++ ++void ++foo(int x) { ++ volatile int b; ++ b = 0xffffffff; ++} ++ ++void ++bar(int *pc) { ++ static const void *l[] = {&&lab0, &&end}; ++ ++ foo(0); ++ goto *l[*pc]; ++ lab0: ++ foo(0); ++ pc++; ++ goto *l[*pc]; ++ end: ++ return; ++} ++ ++int ++main() { ++ bar(code); ++ return 0; ++} ++ ++/* 2x bar */ ++/* { dg-final { scan-assembler-times "jg\t__s390_indirect_jump" 2 } } */ ++/* { dg-final { scan-assembler "exrl" } } */ ++ ++/* { dg-final { scan-assembler "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-indirect-jump-z900.c gcc/testsuite/gcc.target/s390/nobp-indirect-jump-z900.c +--- gcc/testsuite/gcc.target/s390/nobp-indirect-jump-z900.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-indirect-jump-z900.c 2018-03-27 09:33:58.834861551 -0600 +@@ -0,0 +1,42 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -march=z900 --save-temps -mindirect-branch-jump=thunk -mindirect-branch-table" } */ ++ ++/* This is a copy of the gcc.c-torture/execute/20040302-1.c ++ testcase. */ ++ ++int code[]={0,0,0,0,1}; ++ ++void ++foo(int x) { ++ volatile int b; ++ b = 0xffffffff; ++} ++ ++void ++bar(int *pc) { ++ static const void *l[] = {&&lab0, &&end}; ++ ++ foo(0); ++ goto *l[*pc]; ++ lab0: ++ foo(0); ++ pc++; ++ goto *l[*pc]; ++ end: ++ return; ++} ++ ++int ++main() { ++ bar(code); ++ return 0; ++} ++ ++/* 2 x bar ++/* { dg-final { scan-assembler-times "jg\t__s390_indirect_jump" 2 } } */ ++/* { dg-final { scan-assembler "ex\t" } } */ ++ ++/* { dg-final { scan-assembler "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-return-mem-nothunk.c gcc/testsuite/gcc.target/s390/nobp-return-mem-nothunk.c +--- gcc/testsuite/gcc.target/s390/nobp-return-mem-nothunk.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-return-mem-nothunk.c 2018-03-27 09:33:58.834861551 -0600 +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -march=z10 -mzarch --save-temps -mfunction-return-mem=thunk-extern -mindirect-branch-table" } */ ++ ++int gl = 0; ++ ++int __attribute__((noinline,noclone)) ++bar (int a) ++{ ++ return a + 2; ++} ++ ++void __attribute__((noinline,noclone)) ++foo (int a) ++{ ++ int i; ++ ++ if (a == 42) ++ return; ++ ++ for (i = 0; i < a; i++) ++ gl += bar (i); ++} ++ ++int ++main () ++{ ++ foo (3); ++ if (gl != 9) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* 1 x foo, 1 x main ++/* { dg-final { scan-assembler-times "jg\t__s390_indirect_jump" 2 } } */ ++ ++/* No thunks due to thunk-extern. */ ++/* { dg-final { scan-assembler-not "exrl" } } */ ++/* { dg-final { scan-assembler-not ".globl __s390_indirect_jump" } } */ ++ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-return-mem-z10.c gcc/testsuite/gcc.target/s390/nobp-return-mem-z10.c +--- gcc/testsuite/gcc.target/s390/nobp-return-mem-z10.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-return-mem-z10.c 2018-03-27 09:33:58.835861544 -0600 +@@ -0,0 +1,41 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -march=z10 -mzarch --save-temps -mfunction-return-mem=thunk -mindirect-branch-table" } */ ++ ++int gl = 0; ++ ++int __attribute__((noinline,noclone)) ++bar (int a) ++{ ++ return a + 2; ++} ++ ++void __attribute__((noinline,noclone)) ++foo (int a) ++{ ++ int i; ++ ++ if (a == 42) ++ return; ++ ++ for (i = 0; i < a; i++) ++ gl += bar (i); ++} ++ ++int ++main () ++{ ++ foo (3); ++ if (gl != 9) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* 1 x foo, 1 x main ++/* { dg-final { scan-assembler-times "jg\t__s390_indirect_jump" 2 } } */ ++/* { dg-final { scan-assembler "exrl" } } */ ++ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-return-mem-z900.c gcc/testsuite/gcc.target/s390/nobp-return-mem-z900.c +--- gcc/testsuite/gcc.target/s390/nobp-return-mem-z900.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-return-mem-z900.c 2018-03-27 09:33:58.835861544 -0600 +@@ -0,0 +1,42 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -march=z900 --save-temps -mfunction-return-mem=thunk -mindirect-branch-table" } */ ++ ++int gl = 0; ++ ++int __attribute__((noinline,noclone)) ++bar (int a) ++{ ++ return a + 2; ++} ++ ++void __attribute__((noinline,noclone)) ++foo (int a) ++{ ++ int i; ++ ++ if (a == 42) ++ return; ++ ++ for (i = 0; i < a; i++) ++ gl += bar (i); ++} ++ ++int ++main () ++{ ++ foo (3); ++ if (gl != 9) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* 1 x foo, 1 x main ++/* { dg-final { scan-assembler-times "jg\t__s390_indirect_jump" 2 } } */ ++ ++/* { dg-final { scan-assembler "ex\t" } } */ ++ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-return-reg-nothunk.c gcc/testsuite/gcc.target/s390/nobp-return-reg-nothunk.c +--- gcc/testsuite/gcc.target/s390/nobp-return-reg-nothunk.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-return-reg-nothunk.c 2018-03-27 09:33:58.835861544 -0600 +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -march=z10 --save-temps -mfunction-return-reg=thunk-extern -mindirect-branch-table" } */ ++ ++int gl = 0; ++ ++int __attribute__((noinline,noclone)) ++bar (int a) ++{ ++ return a + 2; ++} ++ ++void __attribute__((noinline,noclone)) ++foo (int a) ++{ ++ int i; ++ ++ if (a == 42) ++ return; ++ ++ for (i = 0; i < a; i++) ++ gl += bar (i); ++} ++ ++int ++main () ++{ ++ foo (3); ++ if (gl != 9) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* 1 x bar ++/* { dg-final { scan-assembler-times "jg\t__s390_indirect_jump" 1 } } */ ++ ++/* No thunks due to thunk-extern. */ ++/* { dg-final { scan-assembler-not "exrl" } } */ ++/* { dg-final { scan-assembler-not ".globl __s390_indirect_jump" } } */ ++ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-return-reg-z10.c gcc/testsuite/gcc.target/s390/nobp-return-reg-z10.c +--- gcc/testsuite/gcc.target/s390/nobp-return-reg-z10.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-return-reg-z10.c 2018-03-27 09:33:58.836861537 -0600 +@@ -0,0 +1,41 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -march=z10 --save-temps -mfunction-return-reg=thunk -mindirect-branch-table" } */ ++ ++int gl = 0; ++ ++int __attribute__((noinline,noclone)) ++bar (int a) ++{ ++ return a + 2; ++} ++ ++void __attribute__((noinline,noclone)) ++foo (int a) ++{ ++ int i; ++ ++ if (a == 42) ++ return; ++ ++ for (i = 0; i < a; i++) ++ gl += bar (i); ++} ++ ++int ++main () ++{ ++ foo (3); ++ if (gl != 9) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* 1 x bar ++/* { dg-final { scan-assembler-times "jg\t__s390_indirect_jump" 1 } } */ ++/* { dg-final { scan-assembler "exrl" } } */ ++ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-return-reg-z900.c gcc/testsuite/gcc.target/s390/nobp-return-reg-z900.c +--- gcc/testsuite/gcc.target/s390/nobp-return-reg-z900.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-return-reg-z900.c 2018-03-27 09:33:58.836861537 -0600 +@@ -0,0 +1,41 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -march=z900 --save-temps -mfunction-return-reg=thunk -mindirect-branch-table" } */ ++ ++int gl = 0; ++ ++int __attribute__((noinline,noclone)) ++bar (int a) ++{ ++ return a + 2; ++} ++ ++void __attribute__((noinline,noclone)) ++foo (int a) ++{ ++ int i; ++ ++ if (a == 42) ++ return; ++ ++ for (i = 0; i < a; i++) ++ gl += bar (i); ++} ++ ++int ++main () ++{ ++ foo (3); ++ if (gl != 9) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* 1 x bar ++/* { dg-final { scan-assembler-times "jg\t__s390_indirect_jump" 1 } } */ ++/* { dg-final { scan-assembler "ex\t" } } */ ++ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler "section\t.s390_return_reg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_mem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-table-jump-z10.c gcc/testsuite/gcc.target/s390/nobp-table-jump-z10.c +--- gcc/testsuite/gcc.target/s390/nobp-table-jump-z10.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-table-jump-z10.c 2018-03-27 09:33:58.836861537 -0600 +@@ -0,0 +1,77 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -march=z10 -mzarch --save-temps -mindirect-branch-jump=thunk -mindirect-branch-table" } */ ++/* case-values-threshold will be set to 20 by the back-end when jump ++ thunk are requested. */ ++ ++int __attribute__((noinline,noclone)) foo1 (void) { return 1; } ++int __attribute__((noinline,noclone)) foo2 (void) { return 2; } ++int __attribute__((noinline,noclone)) foo3 (void) { return 3; } ++int __attribute__((noinline,noclone)) foo4 (void) { return 4; } ++int __attribute__((noinline,noclone)) foo5 (void) { return 5; } ++int __attribute__((noinline,noclone)) foo6 (void) { return 6; } ++int __attribute__((noinline,noclone)) foo7 (void) { return 7; } ++int __attribute__((noinline,noclone)) foo8 (void) { return 8; } ++int __attribute__((noinline,noclone)) foo9 (void) { return 9; } ++int __attribute__((noinline,noclone)) foo10 (void) { return 10; } ++int __attribute__((noinline,noclone)) foo11 (void) { return 11; } ++int __attribute__((noinline,noclone)) foo12 (void) { return 12; } ++int __attribute__((noinline,noclone)) foo13 (void) { return 13; } ++int __attribute__((noinline,noclone)) foo14 (void) { return 14; } ++int __attribute__((noinline,noclone)) foo15 (void) { return 15; } ++int __attribute__((noinline,noclone)) foo16 (void) { return 16; } ++int __attribute__((noinline,noclone)) foo17 (void) { return 17; } ++int __attribute__((noinline,noclone)) foo18 (void) { return 18; } ++int __attribute__((noinline,noclone)) foo19 (void) { return 19; } ++int __attribute__((noinline,noclone)) foo20 (void) { return 20; } ++ ++ ++int __attribute__((noinline,noclone)) ++bar (int a) ++{ ++ int ret = 0; ++ ++ switch (a) ++ { ++ case 1: ret = foo1 (); break; ++ case 2: ret = foo2 (); break; ++ case 3: ret = foo3 (); break; ++ case 4: ret = foo4 (); break; ++ case 5: ret = foo5 (); break; ++ case 6: ret = foo6 (); break; ++ case 7: ret = foo7 (); break; ++ case 8: ret = foo8 (); break; ++ case 9: ret = foo9 (); break; ++ case 10: ret = foo10 (); break; ++ case 11: ret = foo11 (); break; ++ case 12: ret = foo12 (); break; ++ case 13: ret = foo13 (); break; ++ case 14: ret = foo14 (); break; ++ case 15: ret = foo15 (); break; ++ case 16: ret = foo16 (); break; ++ case 17: ret = foo17 (); break; ++ case 18: ret = foo18 (); break; ++ case 19: ret = foo19 (); break; ++ case 20: ret = foo20 (); break; ++ default: ++ __builtin_abort (); ++ } ++ ++ return ret; ++} ++ ++int ++main () ++{ ++ if (bar (3) != 3) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* 1 x bar ++/* { dg-final { scan-assembler-times "exrl" 1 } } */ ++ ++/* { dg-final { scan-assembler "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_fromreg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_frommem" } } */ +diff -Nrup gcc/testsuite/gcc.target/s390/nobp-table-jump-z900.c gcc/testsuite/gcc.target/s390/nobp-table-jump-z900.c +--- gcc/testsuite/gcc.target/s390/nobp-table-jump-z900.c 1969-12-31 17:00:00.000000000 -0700 ++++ gcc/testsuite/gcc.target/s390/nobp-table-jump-z900.c 2018-03-27 09:33:58.837861529 -0600 +@@ -0,0 +1,78 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -march=z900 -mzarch --save-temps -mindirect-branch-jump=thunk -mindirect-branch-table" } */ ++ ++/* case-values-threshold will be set to 20 by the back-end when jump ++ thunk are requested. */ ++ ++int __attribute__((noinline,noclone)) foo1 (void) { return 1; } ++int __attribute__((noinline,noclone)) foo2 (void) { return 2; } ++int __attribute__((noinline,noclone)) foo3 (void) { return 3; } ++int __attribute__((noinline,noclone)) foo4 (void) { return 4; } ++int __attribute__((noinline,noclone)) foo5 (void) { return 5; } ++int __attribute__((noinline,noclone)) foo6 (void) { return 6; } ++int __attribute__((noinline,noclone)) foo7 (void) { return 7; } ++int __attribute__((noinline,noclone)) foo8 (void) { return 8; } ++int __attribute__((noinline,noclone)) foo9 (void) { return 9; } ++int __attribute__((noinline,noclone)) foo10 (void) { return 10; } ++int __attribute__((noinline,noclone)) foo11 (void) { return 11; } ++int __attribute__((noinline,noclone)) foo12 (void) { return 12; } ++int __attribute__((noinline,noclone)) foo13 (void) { return 13; } ++int __attribute__((noinline,noclone)) foo14 (void) { return 14; } ++int __attribute__((noinline,noclone)) foo15 (void) { return 15; } ++int __attribute__((noinline,noclone)) foo16 (void) { return 16; } ++int __attribute__((noinline,noclone)) foo17 (void) { return 17; } ++int __attribute__((noinline,noclone)) foo18 (void) { return 18; } ++int __attribute__((noinline,noclone)) foo19 (void) { return 19; } ++int __attribute__((noinline,noclone)) foo20 (void) { return 20; } ++ ++ ++int __attribute__((noinline,noclone)) ++bar (int a) ++{ ++ int ret = 0; ++ ++ switch (a) ++ { ++ case 1: ret = foo1 (); break; ++ case 2: ret = foo2 (); break; ++ case 3: ret = foo3 (); break; ++ case 4: ret = foo4 (); break; ++ case 5: ret = foo5 (); break; ++ case 6: ret = foo6 (); break; ++ case 7: ret = foo7 (); break; ++ case 8: ret = foo8 (); break; ++ case 9: ret = foo9 (); break; ++ case 10: ret = foo10 (); break; ++ case 11: ret = foo11 (); break; ++ case 12: ret = foo12 (); break; ++ case 13: ret = foo13 (); break; ++ case 14: ret = foo14 (); break; ++ case 15: ret = foo15 (); break; ++ case 16: ret = foo16 (); break; ++ case 17: ret = foo17 (); break; ++ case 18: ret = foo18 (); break; ++ case 19: ret = foo19 (); break; ++ case 20: ret = foo20 (); break; ++ default: ++ __builtin_abort (); ++ } ++ ++ return ret; ++} ++ ++int ++main () ++{ ++ if (bar (3) != 3) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* 1 x bar ++/* { dg-final { scan-assembler-times "ex\t" 1 } } */ ++ ++/* { dg-final { scan-assembler "section\t.s390_indirect_jump" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_fromreg" } } */ ++/* { dg-final { scan-assembler-not "section\t.s390_return_frommem" } } */ diff --git a/gcc48-rh1555397.patch b/gcc48-rh1555397.patch new file mode 100644 index 0000000..317859f --- /dev/null +++ b/gcc48-rh1555397.patch @@ -0,0 +1,263 @@ +2017-06-28 Andreas Krebbel + + * config/s390/predicates.md: Use s390_rel_address_ok_p. + * config/s390/s390-protos.h: Add prototype of + s390_rel_address_ok_p. + * config/s390/s390.c (s390_got_symbol): New function. + (s390_rel_address_ok_p): New function. + (legitimize_pic_address): Use s390_rel_address_ok_p. + (s390_load_got): Use s390_got_symbol. + (s390_option_override): Issue error if + -mno-pic-data-is-text-relative is used without -fpic/-fPIC. + * config/s390/s390.h (TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE): + New macro. + * config/s390/s390.opt: New option mpic-data-is-text-relative. + +--- gcc/config/s390/predicates.md 2013-08-14 07:55:12.000000000 -0400 ++++ gcc/config/s390/predicates.md 2018-04-09 21:36:49.428209951 -0400 +@@ -116,7 +116,7 @@ + if (GET_CODE (op) == SYMBOL_REF) + return (!SYMBOL_REF_ALIGN1_P (op) + && SYMBOL_REF_TLS_MODEL (op) == 0 +- && (!flag_pic || SYMBOL_REF_LOCAL_P (op))); ++ && s390_rel_address_ok_p (op)); + + /* Everything else must have a CONST, so strip it. */ + if (GET_CODE (op) != CONST) +@@ -141,7 +141,7 @@ + if (GET_CODE (op) == SYMBOL_REF) + return ((SYMBOL_REF_FLAGS (op) & SYMBOL_FLAG_ALIGN1) == 0 + && SYMBOL_REF_TLS_MODEL (op) == 0 +- && (!flag_pic || SYMBOL_REF_LOCAL_P (op))); ++ && s390_rel_address_ok_p (op)); + + /* Now we must have a @GOTENT offset or @PLT stub + or an @INDNTPOFF TLS offset. */ +--- gcc/config/s390/s390.c 2015-06-18 10:33:04.000000000 -0400 ++++ gcc/config/s390/s390.c 2018-04-09 21:32:43.489851529 -0400 +@@ -491,6 +491,23 @@ s390_label_align (rtx label) + return align_labels_log; + } + ++static GTY(()) rtx got_symbol; ++ ++/* Return the GOT table symbol. The symbol will be created when the ++ function is invoked for the first time. */ ++ ++static rtx ++s390_got_symbol (void) ++{ ++ if (!got_symbol) ++ { ++ got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); ++ SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL; ++ } ++ ++ return got_symbol; ++} ++ + static enum machine_mode + s390_libgcc_cmp_return_mode (void) + { +@@ -1863,6 +1880,9 @@ s390_option_override (void) + if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3) + flag_prefetch_loop_arrays = 1; + ++ if (!s390_pic_data_is_text_relative && !flag_pic) ++ error ("-mno-pic-data-is-text-relative cannot be used without -fpic/-fPIC"); ++ + /* Use the alternative scheduling-pressure algorithm by default. */ + maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, + global_options.x_param_values, +@@ -3557,6 +3577,26 @@ s390_load_address (rtx dst, rtx src) + emit_insn (gen_force_la_31 (dst, src)); + } + ++/* Return true if it ok to use SYMBOL_REF in a relative address. */ ++ ++bool ++s390_rel_address_ok_p (rtx symbol_ref) ++{ ++ tree decl; ++ ++ if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref)) ++ return true; ++ ++ decl = SYMBOL_REF_DECL (symbol_ref); ++ ++ if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref)) ++ return (s390_pic_data_is_text_relative ++ || (decl ++ && TREE_CODE (decl) == FUNCTION_DECL)); ++ ++ return false; ++} ++ + /* Return a legitimate reference for ORIG (an address) using the + register REG. If REG is 0, a new pseudo is generated. + +@@ -3594,7 +3634,7 @@ legitimize_pic_address (rtx orig, rtx re + } + + if ((GET_CODE (addr) == LABEL_REF +- || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr)) ++ || (GET_CODE (addr) == SYMBOL_REF && s390_rel_address_ok_p (addr)) + || (GET_CODE (addr) == UNSPEC && + (XINT (addr, 1) == UNSPEC_GOTENT + || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT)))) +@@ -8545,7 +8585,6 @@ restore_gprs (rtx base, int offset, int + + /* Return insn sequence to load the GOT register. */ + +-static GTY(()) rtx got_symbol; + rtx + s390_load_got (void) + { +@@ -8557,23 +8596,17 @@ s390_load_got (void) + aren't usable. */ + rtx got_rtx = gen_rtx_REG (Pmode, 12); + +- if (!got_symbol) +- { +- got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); +- SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL; +- } +- + start_sequence (); + + if (TARGET_CPU_ZARCH) + { +- emit_move_insn (got_rtx, got_symbol); ++ emit_move_insn (got_rtx, s390_got_symbol ()); + } + else + { + rtx offset; + +- offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol), ++ offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, s390_got_symbol ()), + UNSPEC_LTREL_OFFSET); + offset = gen_rtx_CONST (Pmode, offset); + offset = force_const_mem (Pmode, offset); +--- gcc/config/s390/s390.h 2014-01-14 10:37:03.000000000 -0500 ++++ gcc/config/s390/s390.h 2018-04-09 21:21:28.076858052 -0400 +@@ -814,6 +814,10 @@ do { \ + + #define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X) + ++#ifndef TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE ++#define TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE 1 ++#endif ++ + + /* Assembler file format. */ + +--- gcc/config/s390/s390.opt 2015-06-18 10:33:05.000000000 -0400 ++++ gcc/config/s390/s390.opt 2018-04-09 21:28:18.842465842 -0400 +@@ -158,6 +158,10 @@ mzarch + Target Report RejectNegative Negative(mesa) Mask(ZARCH) + z/Architecture + ++mpic-data-is-text-relative ++Target Report Var(s390_pic_data_is_text_relative) Init(TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE) ++Assume data segments are relative to text segment. ++ + mbranch-cost= + Target Report Joined RejectNegative UInteger Var(s390_branch_cost) Init(1) + Set the branch costs for conditional branch instructions. Reasonable +--- gcc/config/s390/s390-protos.h 2014-01-14 10:37:04.000000000 -0500 ++++ gcc/config/s390/s390-protos.h 2018-04-09 21:21:28.072858046 -0400 +@@ -52,6 +52,7 @@ extern bool s390_contiguous_bitmask_p (u + extern bool s390_split_ok_p (rtx, rtx, enum machine_mode, int); + extern bool s390_overlap_p (rtx, rtx, HOST_WIDE_INT); + extern bool s390_offset_p (rtx, rtx, rtx); ++extern bool s390_rel_address_ok_p (rtx); + extern int tls_symbolic_operand (rtx); + + extern bool s390_match_ccmode (rtx, enum machine_mode); +--- gcc/testsuite/gcc.target/s390/nodatarel-1.c 1969-12-31 19:00:00.000000000 -0500 ++++ gcc/testsuite/gcc.target/s390/nodatarel-1.c 2018-04-09 21:21:28.077858053 -0400 +@@ -0,0 +1,83 @@ ++/* Test -mno-pic-data-is-text-relative option. No relative addressing ++ of elements in .data and .bss are allowed with that option. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -fno-optimize-sibling-calls -fpic -mno-pic-data-is-text-relative -march=z10 -mtune=z9-109 -mzarch" } */ ++ ++static int a = 3; ++ ++/* With -mno-pic-data-is-text-relative these must be addressed via ++ GOT. */ ++ ++int __attribute__((noinline,noclone)) ++foo () ++{ ++ return a; ++} ++ ++static int __attribute__((noinline,noclone)) ++foostatic (void) ++{ ++ return a; ++} ++ ++/* Just to make a potentially modified. */ ++ ++void ++bar (int b) ++{ ++ a = b; ++} ++ ++/* { dg-final { scan-assembler-times "a@GOTENT" 3 } } */ ++ ++/* The exrl target is a label_ref which should not be affected at ++ all. */ ++ ++void ++mymemcpy (char *dst, char *src, long size) ++{ ++ __builtin_memcpy (dst, src, size); ++} ++ ++/* { dg-final { scan-assembler "exrl" } } */ ++ ++ ++/* PLT slots can still be addressed relatively. */ ++ ++int ++callfoo () ++{ ++ return foo (); ++} ++ ++/* { dg-final { scan-assembler-times "foo@PLT" 1 } } */ ++ ++ ++/* GOT entries can still be addressed relatively. */ ++ ++void * ++fooptr () ++{ ++ return &foo; ++} ++ ++/* { dg-final { scan-assembler-times "foo@GOTENT" 1 } } */ ++ ++ ++/* A static function can be addressed relatively. */ ++ ++int ++callfoostatic () ++{ ++ return foostatic (); ++} ++ ++void * ++foostaticptr () ++{ ++ return &foostatic; ++} ++ ++ ++/* { dg-final { scan-assembler-not "foostatic@" } } */ diff --git a/gcc48-rh330771.patch b/gcc48-rh330771.patch new file mode 100644 index 0000000..102730f --- /dev/null +++ b/gcc48-rh330771.patch @@ -0,0 +1,27 @@ +2007-10-16 Jakub Jelinek + + * Makefile.am (libgcj_tools_la_LIBADD): Add. + * Makefile.in: Regenerated. + +--- libjava/Makefile.am.jj 2009-05-06 08:14:50.000000000 +0200 ++++ libjava/Makefile.am 2009-05-06 10:26:43.000000000 +0200 +@@ -550,7 +550,7 @@ libgcj_tools_la_LDFLAGS = -rpath $(toole + $(LIBGCJ_LD_SYMBOLIC_FUNCTIONS) $(LIBJAVA_LDFLAGS_NOUNDEF) \ + $(LIBJAVA_LDFLAGS_LIBMATH) + +-libgcj_tools_la_LIBADD = libgcj.la ++libgcj_tools_la_LIBADD = -L$(here)/.libs libgcj.la + libgcj_tools_la_DEPENDENCIES = libgcj.la libgcj.spec \ + $(libgcj_tools_la_version_dep) + if BUILD_SUBLIBS +--- libjava/Makefile.in.jj 2009-05-06 08:14:49.000000000 +0200 ++++ libjava/Makefile.in 2009-05-06 10:27:18.000000000 +0200 +@@ -1110,7 +1110,7 @@ libgcj_tools_la_LDFLAGS = -rpath $(toole + $(LIBGCJ_LD_SYMBOLIC_FUNCTIONS) $(LIBJAVA_LDFLAGS_NOUNDEF) \ + $(LIBJAVA_LDFLAGS_LIBMATH) + +-libgcj_tools_la_LIBADD = libgcj.la ++libgcj_tools_la_LIBADD = -L$(here)/.libs libgcj.la + libgcj_tools_la_DEPENDENCIES = libgcj.la libgcj.spec \ + $(libgcj_tools_la_version_dep) $(am__append_19) + libgcj_tools_la_LINK = $(LIBLINK) $(libgcj_tools_la_LDFLAGS) \ diff --git a/gcc48-s390-z13.patch b/gcc48-s390-z13.patch new file mode 100644 index 0000000..05a3d5f --- /dev/null +++ b/gcc48-s390-z13.patch @@ -0,0 +1,16938 @@ +Backport of trunk revisions: r214898, r221047, r223367, r223368, r223369, r223393, r223395, r223396, r223397, r223398, r223399, r223400, r223403, r224227, r224867, r224868, r224869, r224870, r224871, r224872, r224873, r224874, r226671, r226672, r227058, r227635, r227636, r227637, r227780, r231153, r231154, r231155, r231156, r231157, r231158, r231159, r231809, r232972, r232973, r233548, r233549, r233550, r233552, r233553, r233554, r233555, r233556, r233623, r236067 + +2016-05-10 Andreas Krebbel + + * config/s390/s390.md ("*vec_cmpdf_cconly") + ("*fixuns_truncdfdi2_z13") + ("*fixuns_trunc2_z196") + ("*fix_truncdfdi2_bfp_z13", "*floatunsdidf2_z13") + ("*extendsfdf2_z13"): Replace TARGET_Z13 with TARGET_VX. + +2016-02-23 Andreas Krebbel + + * gcc.target/s390/md/movstr-2.c: Move and rename to ... + * gcc.target/s390/vector/stpcpy-1.c: ... this one. + +2016-02-19 Andreas Krebbel + + * config/s390/vector.md: Add missing commutative operand markers + to the patterns which qualify for one. + * config/s390/vx-builtins.md: Likewise. + +2016-02-19 Andreas Krebbel + + * config/s390/vector.md (VI, VI_QHS): Add single element vector + types to mode iterators. + (vec_double): ... and mode attribute. + * config/s390/vx-builtins.md (non_vec_int): Likewise. + +2016-02-19 Andreas Krebbel + + * config/s390/vector.md ("add3", "sub3"): + Change the predicate of op2 from nonimmediate to general and let + reload fix it if necessary. + + * gcc.target/s390/vector/int128-1.c: New test. + +2016-02-19 Andreas Krebbel + + * config/s390/vecintrin.h (vec_sub_u128): Define missing macro. + +2016-02-19 Andreas Krebbel + + * config/s390/s390.c (s390_expand_vcond): Use the compare operand + mode. + + * gcc.target/s390/vector/vec-vcond-1.c: New test. + +2016-02-19 Andreas Krebbel + + * config/s390/s390-protos.h: Add s390_expand_vec_movstr prototype. + * config/s390/s390.c (s390_expand_vec_movstr): New function. + * config/s390/s390.md ("movstr"): Call + s390_expand_vec_movstr. + + * gcc.target/s390/md/movstr-2.c: New test. + +2016-02-19 Andreas Krebbel + + * config/s390/s390.md: Add missing output modifier for operand 1 + to print it as address properly. + +2016-02-19 Andreas Krebbel + + * config/s390/2827.md: Rename ooo_* insn attributes to zEC12_*. + * config/s390/2964.md: New file. + * config/s390/s390.c (s390_get_sched_attrmask): Use the right set + of insn grouping attributes depending on the CPU level. + (s390_get_unit_mask): New function. + (s390_sched_score): Remove the OOO from the scheduling macros. + Add loop to calculate a score for the instruction mix. + (s390_sched_reorder): Likewise plus improve debug output. + (s390_sched_variable_issue): Rename macros as above. Calculate + the unit distances after actually scheduling an insn. Improve + debug output. + (s390_sched_init): Clear last_scheduled_unit_distance array. + * config/s390/s390.md: Include 2964.md. + +2016-01-29 Dominik Vogt + + * config/s390/s390-c.c (s390_resolve_overloaded_builtin): Format + declaration name with %qs and print it in both error messages. + Also fix indentation. + +2016-01-29 Dominik Vogt + + PR other/69006 + * config/s390/s390-c.c (s390_resolve_overloaded_builtin): Remove + trailing blank line from error message. + +2015-12-18 Robin Dapp + + * config/s390/predicates.md: Change and rename + constm1_operand to all_ones_operand + * config/s390/s390.c (s390_expand_vcond): Use all_ones_operand + * config/s390/vector.md: Likewise + +2015-12-02 Andreas Krebbel + + * config/s390/predicates.md (const_mask_operand): New predicate. + * config/s390/s390-builtins.def: Set a smaller bitmask for a few builtins. + * config/s390/vector.md: Change predicate from immediate_operand + to either const_int_operand or const_mask_operand. Add special + insn conditions on patterns which have to exclude certain values. + * config/s390/vx-builtins.md: Likewise. + +2015-12-02 Andreas Krebbel + + * config/s390/vector.md ("*vec_set"): Change shift count + mode from DI to SI. + +2015-12-02 Andreas Krebbel + + * config/s390/s390-builtin-types.def: New builtin types added. + * config/s390/s390-builtins.def: Add s390_vec_splat_* definitions. + * config/s390/s390.c (s390_expand_builtin): Always truncate + constants to the mode in the pattern. + * config/s390/vecintrin.h: Let the vec_splat_* macros point to the + respective builtin __builtin_s390_vec_splat_*. + + * gcc.target/s390/zvector/vec-splat-2.c: New test. + +2015-12-02 Andreas Krebbel + + * config/s390/s390-builtin-types.def: Sort builtin types. + +2015-12-02 Andreas Krebbel + + * config/s390/s390-c.c (s390_get_vstring_flags): Invert the + condition for the RT flag. + +2015-12-02 Andreas Krebbel + + * config/s390/constraints.md ("jKK"): New constraint. + * config/s390/s390.c (tm-constrs.h): Include for + satisfies_constraint_*. + (s390_legitimate_constant_p): Allow jKK constants. Use + satisfies_constraint_* also for the others. + (legitimate_reload_vector_constant_p): Likewise. + (print_operand): Allow h output modifier on vectors. + * config/s390/vector.md ("mov"): Add vrepi. + + * gcc.target/s390/vector/vec-vrepi-1.c: New test. + +2015-12-02 Andreas Krebbel + + * config/s390/vector.md ("*vec_splats"): Fix constraint + latter I->K. + + * gcc.target/s390/zvector/vec-splat-1.c: New test. + +2015-09-15 Andreas Krebbel + + * config/s390/s390.c (s390_const_operand_ok): Add missing + brackets. + +2015-09-10 Andreas Krebbel + + * config/s390/s390.c (s390_contiguous_bitmask_vector_p): Reject if + the vector element is bigger than 64 bit. + + * gcc.target/s390/vector/vec-genbytemask-1.c: Add check for V1TI + initialization with a byte mask. No change expected here. + * gcc.target/s390/vector/vec-genmask-1.c: Fix whitespace. + * gcc.target/s390/vector/vec-genmask-2.c: Add check for V1TI + initialization with contigious bitmask. Literal pool is expectd + to be used here. + +2015-09-10 Andreas Krebbel + + * config/s390/vx-builtins.md ("vec_vmal", "vec_vmah") + ("vec_vmalh"): Change mode iterator from VI_HW to VI_HW_QHS. + +2015-09-10 Andreas Krebbel + + * config/s390/s390.c: Add V1TImode to constant pool modes. + +2015-08-21 Dominik Vogt + + * config/s390/s390-builtins.def: Fix value range of vec_load_bndry. + + * gcc.target/s390/zvector/vec-load_bndry-1.c: New test. + +2015-08-06 Andreas Krebbel + + * config/s390/s390.c (s390_expand_tbegin): Expand either + tbegin_1_z13 or tbegin_1 depending on VX flag. + * config/s390/s390.md ("tbegin_1_z13"): New expander. + + * gcc.target/s390/htm-builtins-z13-1.c: New test. + +2015-08-06 Andreas Krebbel + + * config/s390/s390.opt: Clarify description for -mzvector + * doc/invoke.texi: Add documentation for -mhtm, -mvx, and + -mzvector. + +2015-06-24 Andreas Krebbel + + * config/s390/vx-builtins.md + ("vec_scatter_element_") + ("vec_scatter_element_SI"): Replace gf mode + attribute with bhfgq. + +2015-06-24 Andreas Krebbel + + * config/s390/s390-builtins.def: Fix vpopct instruction comments. + +2015-06-24 Andreas Krebbel + + * config/s390/s390-builtin-types.def: Add flag to indicate the + options under which the function type is needed. + * config/s390/s390-builtins.def: Add flag to indicate the options + under which the builtin is enabled. + * config/s390/s390-builtins.h: Add flags parameter to macro + definitions. + (bflags_for_builtin): New function. + (flags_for_builtin): Renamed to ... + (opflags_for_builtin): ... this. + * config/s390/s390-c.c (s390_resolve_overloaded_builtin): Rename + flags_for_builtin to bflags_for_builtin and + flags_overloaded_builtin_var to opflags_overloaded_builtin_var. + * config/s390/s390.c: Add initialization of bflags_builtin and + opflags_builtin arrays. + Remove code for flags_builtin. + (s390_init_builtins): Only create builtin function types if one of + their flags is active. + Only create builtins if all of their flags are active. + (s390_expand_builtin): Rename flags_for_builtin to + opflags_for_builtin. + +2015-06-24 Andreas Krebbel + + * config/s390/vecintrin.h: Remove internal builtins. + +2015-06-24 Andreas Krebbel + + * config/s390/s390.c (s390_secondary_reload): Fix check for + GENERAL_REGS register class. + +2015-06-24 Andreas Krebbel + + * config/s390/s390.c (s390_support_vector_misalignment): Call + default implementation for !TARGET_VX. + +2015-06-24 Andreas Krebbel + + * config/s390/s390.c (s390_legitimate_constant_p): Add + TARGET_VX check. + +2015-06-24 Andreas Krebbel + + * config/s390/s390.c (s390_vector_abi): New variable definition. + (s390_check_type_for_vector_abi): New function. + (TARGET_ASM_FILE_END): New macro definition. + (s390_asm_file_end): New function. + (s390_function_arg): Call s390_check_type_for_vector_abi. + (s390_gimplify_va_arg): Likewise. + * configure: Regenerate. + * configure.ac: Check for .gnu_attribute Binutils feature. + + * gcc.target/s390/vector/vec-abi-1.c: Add gnu attribute check. + * gcc.target/s390/vector/vec-abi-attr-1.c: New test. + * gcc.target/s390/vector/vec-abi-attr-2.c: New test. + * gcc.target/s390/vector/vec-abi-attr-3.c: New test. + * gcc.target/s390/vector/vec-abi-attr-4.c: New test. + * gcc.target/s390/vector/vec-abi-attr-5.c: New test. + * gcc.target/s390/vector/vec-abi-attr-6.c: New test. + +2015-06-08 Jakub Jelinek + + * genattrtab.c (insn_alternatives): Change type from int * + to uint64_t *. + (check_attr_test): Shift ((uint64_t) 1) instead of 1 up. + (get_attr_value): Change type of num_alt to uint64_t. + (compute_alternative_mask): Change return type from + int to uint64_t, shift ((uint64_t) 1) instead of 1 up. + (make_alternative_compare, mk_attr_alt): Change argument type + from int to uint64_t. + (simplify_test_exp): Change type of i from int to uint64_t. + Shift ((uint64_t) 1) instead of 1 up. + (main): Adjust oballocvec first argument from int to uint64_t. + Shift ((uint64_t) 1) instead of 1 up. + +2015-05-19 Andreas Krebbel + + * lib/target-supports.exp: Vector do not always have natural + alignment on s390*. + +2015-05-19 Andreas Krebbel + + * gcc.dg/tree-ssa/gen-vect-11b.c: Disable vector instructions on + s390*. + * gcc.dg/tree-ssa/gen-vect-11c.c: Likewise. + +2015-05-19 Andreas Krebbel + + * gcc.target/s390/zvector/vec-dbl-math-compile-1.c: New test. + * gcc.target/s390/zvector/vec-genbytemask-1.c: New test. + * gcc.target/s390/zvector/vec-genmask-1.c: New test. + * gcc.target/s390/zvector/vec-lcbb-1.c: New test. + * gcc.target/s390/zvector/vec-overloading-1.c: New test. + * gcc.target/s390/zvector/vec-overloading-2.c: New test. + * gcc.target/s390/zvector/vec-overloading-3.c: New test. + * gcc.target/s390/zvector/vec-overloading-4.c: New test. + * gcc.target/s390/zvector/vec-test-mask-1.c: New test. + * gcc.target/s390/zvector/vec-elem-1.c: New test. + +2015-05-19 Andreas Krebbel + + * config.gcc: Add vecintrin.h to extra_headers. Add s390-c.o to + c_target_objs and cxx_target_objs. Add t-s390 to tmake_file. + * config/s390/s390-builtin-types.def: New file. + * config/s390/s390-builtins.def: New file. + * config/s390/s390-builtins.h: New file. + * config/s390/s390-c.c: New file. + * config/s390/s390-modes.def: Add modes CCVEQANY, CCVH, + CCVHANY, CCVHU, CCVHUANY, CCVFHANY, CCVFHEANY. + * config/s390/s390-protos.h (s390_expand_vec_compare_cc) + (s390_cpu_cpp_builtins, s390_register_target_pragmas): Add + prototypes. + * config/s390/s390.c (s390-builtins.h, s390-builtins.def): + Include. + (flags_builtin, flags_overloaded_builtin_var, s390_builtin_types) + (s390_builtin_fn_types, s390_builtin_decls, code_for_builtin): New + variable definitions. + (s390_const_operand_ok): New function. + (s390_expand_builtin): Rewrite. + (s390_init_builtins): New function. + (s390_handle_vectorbool_attribute): New function. + (s390_attribute_table): Add s390_vector_bool attribute. + (s390_match_ccmode_set): Handle new cc modes CCVH, CCVHU. + (s390_branch_condition_mask): Generate masks for new modes. + (s390_expand_vec_compare_cc): New function. + (s390_mangle_type): Add mangling for vector bool types. + (enum s390_builtin): Remove. + (s390_atomic_assign_expand_fenv): Rename constants for sfpc and + efpc builtins. + * config/s390/s390.h (TARGET_CPU_CPP_BUILTINS): Call + s390_cpu_cpp_builtins. + (REGISTER_TARGET_PRAGMAS): New macro. + * config/s390/s390.md: Define more UNSPEC_VEC_* constants. + (insn_cmp mode attribute): Add new CC modes. + (s390_sfpc, s390_efpc): Rename patterns to sfpc and efpc. + (lcbb): New pattern definition. + * config/s390/s390intrin.h: Include vecintrin.h. + * config/s390/t-s390: New file. + * config/s390/vecintrin.h: New file. + * config/s390/vector.md: Include vx-builtins.md. + * config/s390/vx-builtins.md: New file.S/390 zvector builtin + support. + +2015-05-19 Andreas Krebbel + + * config/s390/s390-modes.def: Add new modes CCVEQ, CCVFH, and + CCVFHE. + * config/s390/s390.c (s390_match_ccmode_set): Handle new modes. + (s390_select_ccmode): Likewise. + (s390_canonicalize_comparison): Swap operands if necessary. + (s390_expand_vec_compare_scalar): Expand DFmode compare using + single element vector instructions. + (s390_emit_compare): Call s390_expand_vec_compare_scalar. + (s390_branch_condition_mask): Generate CC masks for the new modes. + * config/s390/s390.md (v0, vf, vd): New mode attributes. + (VFCMP, asm_fcmp, insn_cmp): New mode iterator and attributes. + (*vec_cmpdf_cconly, *fixuns_truncdfdi2_z13) + (*fix_trunc2_bfp, *floatunsdidf2_z13) + (*floatuns2, *extendsfdf2_z13) + (*extend2): New insn definition. + (fix_trunc2_bfp, loatuns2) + (extend2): Turn into expander. + (floatdi2, truncdfsf2, add3, sub3, mul3) + (div3, *neg2, *abs2, *negabs2) + (sqrt2): Add vector instruction. + + * gcc.target/s390/vector/vec-scalar-cmp-1.c: New test. + +2015-05-19 Andreas Krebbel + + * gcc.target/s390/s390.exp + (check_effective_target_vector): New check. + * gcc.target/s390/vector/vec-abi-1.c: New test. + * gcc.target/s390/vector/vec-abi-2.c: New test. + * gcc.target/s390/vector/vec-abi-3.c: New test. + * gcc.target/s390/vector/vec-abi-4.c: New test. + * gcc.target/s390/vector/vec-abi-align-1.c: New test. + * gcc.target/s390/vector/vec-abi-single-1.c: New test. + * gcc.target/s390/vector/vec-abi-single-2.c: New test. + * gcc.target/s390/vector/vec-abi-struct-1.c: New test. + * gcc.target/s390/vector/vec-abi-vararg-1.c: New test. + * gcc.target/s390/vector/vec-abi-vararg-2.c: New test. + * gcc.target/s390/vector/vec-clobber-1.c: New test. + * gcc.target/s390/vector/vec-cmp-1.c: New test. + * gcc.target/s390/vector/vec-cmp-2.c: New test. + * gcc.target/s390/vector/vec-dbl-math-compile-1.c: New test. + * gcc.target/s390/vector/vec-genbytemask-1.c: New test. + * gcc.target/s390/vector/vec-genbytemask-2.c: New test. + * gcc.target/s390/vector/vec-genmask-1.c: New test. + * gcc.target/s390/vector/vec-genmask-2.c: New test. + * gcc.target/s390/vector/vec-init-1.c: New test. + * gcc.target/s390/vector/vec-int-math-compile-1.c: New test. + * gcc.target/s390/vector/vec-shift-1.c: New test. + * gcc.target/s390/vector/vec-sub-1.c: New test. + +2015-05-19 Andreas Krebbel + + * config/s390/constraints.md (j00, jm1, jxx, jyy, v): New + constraints. + * config/s390/predicates.md (const0_operand, constm1_operand) + (constable_operand): Accept vector operands. + * config/s390/s390-modes.def: Add supported vector modes. + * config/s390/s390-protos.h (s390_cannot_change_mode_class) + (s390_function_arg_vector, s390_contiguous_bitmask_vector_p) + (s390_bytemask_vector_p, s390_expand_vec_strlen) + (s390_expand_vec_compare, s390_expand_vcond) + (s390_expand_vec_init): Add prototypes. + * config/s390/s390.c (VEC_ARG_NUM_REG): New macro. + (s390_vector_mode_supported_p): New function. + (s390_contiguous_bitmask_p): Mask out the irrelevant bits. + (s390_contiguous_bitmask_vector_p): New function. + (s390_bytemask_vector_p): New function. + (s390_split_ok_p): Vector regs don't work either. + (regclass_map): Add VEC_REGS. + (s390_legitimate_constant_p): Handle vector constants. + (s390_cannot_force_const_mem): Handle CONST_VECTOR. + (legitimate_reload_vector_constant_p): New function. + (s390_preferred_reload_class): Handle CONST_VECTOR. + (s390_reload_symref_address): Likewise. + (s390_secondary_reload): Vector memory instructions only support + short displacements. Rename reload*_nonoffmem* to reload*_la*. + (s390_emit_ccraw_jump): New function. + (s390_expand_vec_strlen): New function. + (s390_expand_vec_compare): New function. + (s390_expand_vcond): New function. + (s390_expand_vec_init): New function. + (s390_dwarf_frame_reg_mode): New function. + (print_operand): Handle addresses with 'O' and 'R' constraints. + (NR_C_MODES, constant_modes): Add vector modes. + (s390_output_pool_entry): Handle vector constants. + (s390_hard_regno_mode_ok): Handle vector registers. + (s390_class_max_nregs): Likewise. + (s390_cannot_change_mode_class): New function. + (s390_invalid_arg_for_unprototyped_fn): New function. + (s390_function_arg_vector): New function. + (s390_function_arg_float): Remove size variable. + (s390_pass_by_reference): Handle vector arguments. + (s390_function_arg_advance): Likewise. + (s390_function_arg): Likewise. + (s390_return_in_memory): Vector values are returned in a VR if + possible. + (s390_function_and_libcall_value): Handle vector arguments. + (s390_gimplify_va_arg): Likewise. + (s390_call_saved_register_used): Consider the arguments named. + (s390_conditional_register_usage): Disable v16-v31 for non-vec + targets. + (s390_preferred_simd_mode): New function. + (s390_support_vector_misalignment): New function. + (s390_vector_alignment): New function. + (TARGET_STRICT_ARGUMENT_NAMING, TARGET_DWARF_FRAME_REG_MODE) + (TARGET_VECTOR_MODE_SUPPORTED_P) + (TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN) + (TARGET_VECTORIZE_PREFERRED_SIMD_MODE) + (TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT) + (TARGET_VECTOR_ALIGNMENT): Define target macro. + * config/s390/s390.h (FUNCTION_ARG_PADDING): Define macro. + (FIRST_PSEUDO_REGISTER): Increase value. + (VECTOR_NOFP_REGNO_P, VECTOR_REGNO_P, VECTOR_NOFP_REG_P) + (VECTOR_REG_P): Define macros. + (FIXED_REGISTERS, CALL_USED_REGISTERS) + (CALL_REALLY_USED_REGISTERS, REG_ALLOC_ORDER) + (HARD_REGNO_CALL_PART_CLOBBERED, REG_CLASS_NAMES) + (FUNCTION_ARG_REGNO_P, FUNCTION_VALUE_REGNO_P, REGISTER_NAMES): + Add vector registers. + (CANNOT_CHANGE_MODE_CLASS): Call C function. + (enum reg_class): Add VEC_REGS, ADDR_VEC_REGS, GENERAL_VEC_REGS. + (SECONDARY_MEMORY_NEEDED): Allow SF<->SI mode moves without + memory. + (DBX_REGISTER_NUMBER, FIRST_VEC_ARG_REGNO, LAST_VEC_ARG_REGNO) + (SHORT_DISP_IN_RANGE, VECTOR_STORE_FLAG_VALUE): Define macro. + * config/s390/s390.md (UNSPEC_VEC_*): New constants. + (VR*_REGNUM): New constants. + (ALL): New mode iterator. + (INTALL): Remove mode iterator. + Include vector.md. + (movti): Implement TImode moves for VRs. + Disable TImode splitter for VR targets. + Implement splitting TImode GPR<->VR moves. + (reload*_tomem_z10, reload*_toreg_z10): Replace INTALL with ALL. + (reload_nonoffmem_in, reload_nonoffmem_out): Rename to + reload_la_in, reload_la_out. + (*movdi_64, *movsi_zarch, *movhi, *movqi, *mov_64dfp) + (*mov_64, *mov_31): Add vector instructions. + (TD/TF mode splitter): Enable for GPRs only (formerly !FP). + (mov SF SD): Prefer lder, lde for loading. + Add lrl and strl instructions. + Add vector instructions. + (strlen): Rename old strlen to strlen_srst. + Call s390_expand_vec_strlen on z13. + (*cc_to_int): Change predicate to nonimmediate_operand. + (addti3): Rename to *addti3. New expander. + (subti3): Rename to *subti3. New expander. + * config/s390/vector.md: New file. + +2015-05-19 Andreas Krebbel + + * common/config/s390/s390-common.c (processor_flags_table): Add + z13. + * config.gcc: Add z13. + * config/s390/s390-opts.h (enum processor_type): Add + PROCESSOR_2964_Z13. + * config/s390/s390.c (s390_adjust_priority): Check for + PROCESSOR_2964_Z13. + (s390_reorg): Likewise. + (s390_sched_reorder): Likewise. + (s390_sched_variable_issue): Likewise. + (s390_loop_unroll_adjust): Likewise. + (s390_option_override): Likewise. Default to -mvx when available. + * config/s390/s390.h (enum processor_flags): Add PF_Z13 and PF_VX. + (TARGET_CPU_Z13, TARGET_CPU_VX, TARGET_Z13, TARGET_VX) + (TARGET_VX_ABI): Define macros. + macros. + (TARGET_DEFAULT): Add MASK_OPT_VX. + * config/s390/s390.md ("cpu" attribute): Add z13. + ("cpu_facility" attribute): Add vec. + * config/s390/s390.opt (processor_type): Add z13. + (mvx): New options. + * doc/invoke.texi: Add z13 option for -march. + +2015-05-19 Andreas Krebbel + + * optabs.c (expand_vec_perm): Don't re-use SEL as target operand. + +2015-05-19 Andreas Krebbel + + * config/s390/s390.c (s390_secondary_reload): Fix check for + load/store relative. + +2015-05-19 Andreas Krebbel + + * recog.h: Increase MAX_RECOG_ALTERNATIVES. Change type of + alternative_mask to uint64_t. + +2015-02-27 Andreas Krebbel + + * config/s390/s390.c (enum s390_builtin): + Add S390_BUILTIN_S390_SFPC and S390_BUILTIN_S390_EFPC. + (code_for_builtin): Add CODE_FOR_s390_sfpc and CODE_FOR_s390_efpc. + (s390_init_builtins): Generate new builtin functions. + * config/s390/s390.md (UNSPECV_SFPC, UNSPECV_EFPC): New constants. + (s390_sfpc, s390_efpc): New pattern definitions. + +2014-09-03 Matthew Fortune + + * target.def (TARGET_DWARF_FRAME_REG_MODE): New target hook. + * targhooks.c (default_dwarf_frame_reg_mode): New function. + * targhooks.h (default_dwarf_frame_reg_mode): New prototype. + * doc/tm.texi.in (TARGET_DWARF_FRAME_REG_MODE): Document. + * doc/tm.texi: Regenerate. + * dwarf2cfi.c (expand_builtin_init_dwarf_reg_sizes): Abstract mode + selection logic to default_dwarf_frame_reg_mode. + +--- gcc/common/config/s390/s390-common.c 2013-08-14 13:55:13.000000000 +0200 ++++ gcc/common/config/s390/s390-common.c 2016-05-11 15:53:24.000000000 +0200 +@@ -42,7 +42,10 @@ EXPORTED_CONST int processor_flags_table + /* z196 */ PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT + | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196, + /* zEC12 */ PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT ++ | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX, ++ /* z13 */ PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT + | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX ++ | PF_Z13 | PF_VX + }; + + /* Change optimizations to be performed, depending on the +--- gcc/config/s390/2827.md 2015-06-18 17:09:04.000000000 +0200 ++++ gcc/config/s390/2827.md 2016-05-11 18:03:45.000000000 +0200 +@@ -18,20 +18,19 @@ + ;; along with GCC; see the file COPYING3. If not see + ;; . + +- +-(define_attr "ooo_cracked" "" ++(define_attr "zEC12_cracked" "" + (cond [(eq_attr "mnemonic" "cgdbr,clfxtr,cdgtr,celfbr,cxgtr,clfebr,clc,lngfr,cs,cfxbr,xc,clfdbr,basr,ex,cxlgtr,clfdtr,srdl,lpgfr,cdlgbr,cgxtr,cxlftr,nc,cxftr,cdfbr,clfxbr,cdftr,clgxbr,cgdtr,cxlgbr,mvc,clgdtr,cegbr,cfebr,cdlftr,sldl,cdlgtr,csg,chhsi,clgebr,cxgbr,cxfbr,cdlfbr,cgebr,lzxr,oc,cdgbr,brasl,cgxbr,cxlfbr,clgxtr,exrl,cfdbr,celgbr,clgdbr,lxr,cpsdr,lcgfr,bras,srda,cefbr") (const_int 1)] + (const_int 0))) + +-(define_attr "ooo_expanded" "" ++(define_attr "zEC12_expanded" "" + (cond [(eq_attr "mnemonic" "dlr,dsgr,d,dsgf,stam,dsgfr,dlgr,dsg,cds,dr,stm,mvc,dl,cdsg,stmy,dlg,stmg,lam") (const_int 1)] + (const_int 0))) + +-(define_attr "ooo_endgroup" "" ++(define_attr "zEC12_endgroup" "" + (cond [(eq_attr "mnemonic" "ipm") (const_int 1)] + (const_int 0))) + +-(define_attr "ooo_groupalone" "" ++(define_attr "zEC12_groupalone" "" + (cond [(eq_attr "mnemonic" "lnxbr,madb,ltxtr,clc,axtr,msebr,slbgr,xc,alcr,lpxbr,slbr,maebr,mlg,mfy,lxdtr,maeb,lxeb,nc,mxtr,sxtr,dxbr,alc,msdbr,ltxbr,lxdb,madbr,lxdbr,lxebr,mvc,m,mseb,mlr,mlgr,slb,tcxb,msdb,sqxbr,alcgr,oc,flogr,alcg,mxbr,dxtr,axbr,mr,sxbr,slbg,ml,lcxbr,bcr_flush") (const_int 1)] + (const_int 0))) + +--- gcc/config/s390/2964.md 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/2964.md 2016-05-11 18:03:45.000000000 +0200 +@@ -0,0 +1,232 @@ ++;; Scheduling description for z13. ++;; Copyright (C) 2016 Free Software Foundation, Inc. ++;; Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify it under ++;; the terms of the GNU General Public License as published by the Free ++;; Software Foundation; either version 3, or (at your option) any later ++;; version. ++ ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++;; WARRANTY; without even the implied warranty of MERCHANTABILITY or ++;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++;; for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++ ++; generator options: vector_ecycs=12 cracked_ecycs=6 scale_ecycs=5 ++ ++(define_attr "z13_cracked" "" ++ (cond [(eq_attr "mnemonic" "celgbr,vscef,vsceg,exrl,clfebr,cefbr,chhsi,\ ++vgef,vgeg,cdlftr,lcgfr,cfdbr,cgdbr,lzxr,cfxbr,rnsbg,cgdtr,cegbr,rxsbg,ex,\ ++cgxtr,clfxtr,cdlgtr,brasl,efpc,cfebr,tbeginc,celfbr,clgxbr,vsteb,vsteh,\ ++clfdtr,cdfbr,lngfr,clgebr,stpq,cs,lpgfr,cdlgbr,lpq,cdgtr,d,cgxbr,cdftr,\ ++rosbg,clgdbr,cdgbr,bras,tbegin,clfdbr,cdlfbr,cgebr,clfxbr,lxr,csy,csg,clgdtr,\ ++clgxtr") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_expanded" "" ++ (cond [(eq_attr "mnemonic" "cxlftr,cdsg,cdsy,stam,lam,dsgf,lmg,cxlgtr,\ ++dl,cxftr,sldl,dsg,cxlfbr,cxgtr,stmg,stmy,stm,lm,cds,lmy,cxfbr,cxlgbr,srda,\ ++srdl,cxgbr,dlg") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_groupalone" "" ++ (cond [(eq_attr "mnemonic" "mvc,dxbr,lxebr,axtr,cxtr,alcr,lxdb,lxeb,mxtr,\ ++mfy,cxbr,dsgr,lcxbr,slb,mr,dr,alc,slbr,maebr,mlgr,dsgfr,sxtr,tdcxt,tabort,\ ++msebr,lxdtr,ltxtr,slbg,ml,mxbr,maeb,oc,dxtr,msdb,sqxbr,mseb,xc,m,clc,mlg,\ ++mlr,fixbra,alcgr,nc,sfpc,dlgr,fixbr,slbgr,fixtr,lpxbr,axbr,lxdbr,ltxbr,\ ++tcxb,dlr,lnxbr,sxbr,flogr,alcg,tend,madb,bcr_flush") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_endgroup" "" ++ (cond [(eq_attr "mnemonic" "ipm") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_unit_lsu" "" ++ (cond [(eq_attr "mnemonic" "vlbb,mvc,llgc,llc,llhrl,vl,llghrl,vlrepf,\ ++vlrepg,vlreph,lde,ldy,tabort,l,llh,ld,lg,ly,vlrepb,vllezb,vllezf,vllezg,\ ++vllezh,oc,xc,clc,lrl,ear,nc,lgrl,sfpc,llgf,llgfrl,llgh,llgt,lcbb,vll,sar") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_unit_fxu" "" ++ (cond [(eq_attr "mnemonic" "s,lcgr,x,nop,oiy,ppa,ng,msy,sgrk,vstl,aghik,\ ++msgf,ipm,mvi,stocg,rll,srlg,cghsi,clgit,srlk,alrk,sg,sh,sl,st,sy,vst,ark,\ ++xgr,agsi,tm,nrk,shy,llhr,agf,alcr,slgfr,sr,clgrt,laa,lder,sgf,lan,llilf,\ ++llilh,ag,llill,lay,al,n,laxg,ar,ahi,sgr,ntstg,ay,stcy,nopr,mfy,ngrk,lbr,\ ++br,dsgr,stdy,ork,ldgr,lcr,cg,ch,lgfrl,cl,stoc,cr,agfr,stgrl,cy,alfi,xg,\ ++cgfi,xi,clfhsi,cgfr,xr,slb,mghi,clfi,slg,clhhsi,agfi,clfit,sly,mr,ldr,nihf,\ ++nihh,algfi,dr,nihl,algf,algfr,algr,clgf,clgr,clgt,aghi,alc,alg,locg,alr,\ ++locr,cghi,aly,alghsik,slbr,clgfrl,mhy,cit,nr,ny,xiy,mlgr,sthy,cly,dsgfr,\ ++rllg,cgit,lgb,lgf,clgrl,lgh,lrvgr,cliy,cgrl,lgr,slrk,clrt,icy,laog,og,agr,\ ++mvhi,lhrl,or,lhr,vlvgp,lhy,nilf,oy,nilh,nill,lcdfr,mviy,tmhh,tmhl,sthrl,\ ++ltgf,ltgr,srk,clghrl,ahy,vstef,vsteg,ah,vlgvb,llgcr,tmh,tml,clmy,slr,cfi,\ ++stc,std,ste,stg,sth,locgr,slbg,sty,tmlh,la,lb,mvghi,lh,risbgn,lrvg,lr,asi,\ ++lt,ahik,lrvr,cgf,cgh,cgr,clhrl,lzdr,tmll,mh,ml,vlvgb,ms,lrv,vlvgf,xgrk,\ ++vlvgg,llgfr,vlvgh,slfi,chi,chy,mhi,lzer,alhsik,ni,ltgfr,loc,icm,oi,cgfrl,\ ++agrk,lgat,oilh,llghr,lghrl,oill,xihf,lpgr,cgrt,clrl,sgfr,lpr,lgbr,strl,\ ++algrk,alsi,srak,slgf,a,c,slgr,m,o,algsi,icmh,srag,iilf,ogrk,clg,icmy,\ ++cli,clm,clr,clt,slgrk,mlg,lao,mlr,risbg,mvhhi,lat,etnd,lax,iihf,sra,alcgr,\ ++msgr,clghsi,stey,ngr,xilf,laag,oihf,oihh,oihl,ltg,ltr,niy,lgfi,dlgr,lgfr,\ ++slgfi,llcr,slbgr,chrl,lgdr,pfpo,lang,basr,sllg,sllk,lghi,lghr,vlgvf,vlgvg,\ ++vlgvh,vlr,chsi,lngr,cghrl,srl,lhi,oilf,crl,crt,afi,xrk,llgtr,llihf,llihh,\ ++llihl,dlr,msgfi,msgfr,msg,flogr,xy,msr,clgfi,clgfr,ogr,popcnt,alcg,lndfr,\ ++larl,sll,tmy,msfi,ic,lpdfr,tend,lnr") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_unit_vfu" "" ++ (cond [(eq_attr "mnemonic" "seb,vcksm,vfadb,vleib,vchgs,vleif,vleig,vleih,\ ++vgbm,verimb,vone,verimf,verimg,verimh,dxbr,verllvb,lpebr,verllvf,verllvg,\ ++verllvh,vfeneb,wcdgb,vfenef,vfeneh,vchhs,vctzb,vctzf,vctzg,vctzh,vlcb,aeb,\ ++vlcf,vlcg,vlch,vfmsdb,vgfmab,ltebr,vgfmaf,vgfmag,vgfmah,vmaeh,vsb,vsf,vsg,\ ++vsh,vsl,vsq,lxebr,cdtr,fiebr,vupllb,vupllf,vupllh,vmrhb,madbr,vtm,vmrhf,\ ++vmrhg,vmrhh,axtr,fiebra,vleb,cxtr,vlef,vleg,vleh,vpkf,vpkg,vpkh,vmlob,vmlof,\ ++vmloh,lxdb,ldeb,mdtr,vceqfs,adb,wflndb,lxeb,vn,vo,vchlb,vx,mxtr,vchlf,vchlg,\ ++vchlh,vfcedbs,vfcedb,vceqgs,cxbr,msdbr,vcdgb,debr,vceqhs,meeb,lcxbr,vavglb,\ ++vavglf,vavglg,vavglh,wfcedbs,vmrlb,vmrlf,vmrlg,vmrlh,wfchedbs,vmxb,tcdb,\ ++vmahh,vsrlb,wcgdb,lcdbr,vistrbs,vrepb,wfmdb,vrepf,vrepg,vreph,ler,wcdlgb,\ ++ley,vistrb,vistrf,vistrh,tceb,wfsqdb,sqeb,vsumqf,vsumqg,vesrlb,vfeezbs,\ ++maebr,vesrlf,vesrlg,vesrlh,vmeb,vmef,vmeh,meebr,vflcdb,wfmadb,vperm,sxtr,\ ++vclzf,vgm,vgmb,vgmf,vgmg,vgmh,tdcxt,vzero,msebr,veslb,veslf,veslg,vfenezb,\ ++vfenezf,vfenezh,vistrfs,vchf,vchg,vchh,vmhb,vmhf,vmhh,cdb,veslvb,ledbr,\ ++veslvf,veslvg,veslvh,wclgdb,vfmdb,vmnlb,vmnlf,vmnlg,vmnlh,vclzb,vfeezfs,\ ++vclzg,vclzh,mdb,vmxlb,vmxlf,vmxlg,vmxlh,ltdtr,vsbcbiq,ceb,wfddb,sebr,vistrhs,\ ++lxdtr,lcebr,vab,vaf,vag,vah,ltxtr,vlpf,vlpg,vsegb,vaq,vsegf,vsegh,wfchdbs,\ ++sdtr,cdbr,vfeezhs,le,wldeb,vfmadb,vchlbs,vacccq,vmaleb,vsel,vmalef,vmaleh,\ ++vflndb,mdbr,vmlb,wflpdb,ldetr,vpksfs,vpksf,vpksg,vpksh,sqdb,mxbr,sqdbr,\ ++vmaeb,veslh,vmaef,vpklsf,vpklsg,vpklsh,verllb,vchb,ddtr,verllf,verllg,verllh,\ ++wfsdb,maeb,vclgdb,vftcidb,vpksgs,vmxf,vmxg,vmxh,fidbra,vmnb,vmnf,vmng,vfchedbs,\ ++lnebr,vfidb,dxtr,ddb,msdb,vmalhb,vfddb,vmalhf,vmalhh,vpkshs,vfsdb,sqxbr,\ ++vmalhw,ltdbr,vmob,vmof,vmoh,deb,vchlfs,mseb,vcdlgb,vlpb,wfmsdb,vlph,vmahb,\ ++vldeb,vmahf,vgfmb,fidbr,vfsqdb,aebr,wledb,vchlgs,vesravb,vfchdbs,cebr,vesravf,\ ++vesravg,vesravh,vcgdb,fixbra,vrepib,vrepif,vrepig,vrepih,tdcdt,vchlhs,vceqb,\ ++vscbib,vceqf,vceqg,vscbif,vscbig,vscbih,vmlhw,vscbiq,vuphb,vuphf,vuphh,\ ++vfchedb,tdcet,vslb,vpklsfs,adbr,sqebr,vfchdb,fixbr,vpklsgs,vsldb,vmleb,\ ++vmlef,vmleh,cpsdr,vmalb,vmalf,vavgb,vmlf,vavgf,vavgg,vavgh,vgfmf,vgfmg,\ ++vgfmh,fidtr,vpklshs,lndbr,vno,lpdbr,vacq,vledb,vchbs,vfeeb,vfeef,vfeeh,\ ++fixtr,vaccb,wfadb,vaccf,vaccg,vacch,vnot,vmalob,vaccq,vmalof,vmaloh,lpxbr,\ ++ledtr,vuplb,vuplf,axbr,lxdbr,ltxbr,vpopct,vpdi,vmlhb,vmlhf,vmlhh,sdbr,vnc,\ ++vsumb,vsrab,vsumh,vmaob,vmaof,vmaoh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,tcxb,\ ++vceqbs,vceqh,lnxbr,sxbr,vesrab,wflcdb,vesraf,vesrag,vesrah,vflpdb,vmnh,\ ++vsbiq,adtr,vsra,vsrl,vuplhb,sdb,vuplhf,vuplhh,vsumgf,vsumgh,ldebr,vuplhw,\ ++vchfs,madb,ddbr") (const_int 1)] ++ (const_int 0))) ++ ++(define_insn_reservation "z13_0" 0 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "s,lcgr,x,nop,oiy,vlbb,ppa,ng,sgrk,vstl,aghik,\ ++mvc,ipm,llgc,mvi,stocg,rll,jg,srlg,cghsi,clgit,srlk,alrk,sg,sh,sl,st,sy,\ ++vst,ark,xgr,agsi,tm,nrk,shy,llhr,agf,alcr,slgfr,sr,clgrt,llc,laa,lder,sgf,\ ++lan,llhrl,llilf,llilh,ag,llill,lay,al,n,laxg,ar,ahi,sgr,ntstg,ay,stcy,vl,\ ++nopr,ngrk,lbr,br,stdy,ork,ldgr,lcr,cg,ch,llghrl,lgfrl,cl,stoc,cr,agfr,stgrl,\ ++cy,alfi,xg,cgfi,xi,vlrepf,vlrepg,vlreph,clfhsi,cgfr,xr,slb,mghi,clfi,slg,\ ++lde,clhhsi,agfi,clfit,sly,ldr,ldy,nihf,nihh,algfi,nihl,algf,algfr,algr,\ ++clgf,clgr,clgt,aghi,alc,alg,locg,alr,locr,cghi,aly,alghsik,slbr,clgfrl,\ ++mhy,cit,nr,ny,xiy,sthy,cly,rllg,cgit,lgb,lgf,clgrl,lgh,lrvgr,cliy,cgrl,\ ++lgr,slrk,clrt,icy,laog,og,agr,mvhi,lhrl,or,lhr,vlvgp,lhy,nilf,oy,nilh,tabort,\ ++nill,lcdfr,mviy,tmhh,tmhl,sthrl,ltgf,ltgr,srk,clghrl,ahy,vstef,vsteg,ah,\ ++vlgvb,llgcr,tmh,tml,clmy,slr,cfi,stc,std,ste,stg,sth,l,locgr,llh,slbg,sty,\ ++tmlh,la,lb,ld,mvghi,lg,lh,risbgn,lrvg,lr,asi,lt,ahik,ly,lrvr,vlrepb,vllezb,\ ++cgf,cgh,vllezf,vllezg,vllezh,cgr,clhrl,lzdr,tmll,mh,vlvgb,lrv,vlvgf,xgrk,\ ++vlvgg,llgfr,vlvgh,slfi,chi,chy,mhi,lzer,alhsik,ni,ltgfr,loc,icm,oc,oi,cgfrl,\ ++agrk,lgat,oilh,llghr,lghrl,oill,xihf,lpgr,cgrt,clrl,sgfr,lpr,lgbr,strl,\ ++algrk,alsi,srak,brcl,slgf,xc,a,c,slgr,j,o,algsi,icmh,srag,iilf,ogrk,clc,\ ++clg,icmy,cli,clm,clr,clt,slgrk,lrl,lao,risbg,mvhhi,lat,etnd,lax,iihf,sra,\ ++alcgr,clghsi,ear,nc,lgrl,stey,ngr,xilf,laag,oihf,oihh,oihl,ltg,ltr,niy,\ ++lgfi,sfpc,lgfr,slgfi,llcr,llgf,llgfrl,llgh,slbgr,llgt,chrl,lgdr,pfpo,lang,\ ++basr,lcbb,sllg,sllk,lghi,vll,lghr,vlgvf,vlgvg,vlgvh,vlr,chsi,lngr,cghrl,\ ++srl,sar,lhi,oilf,crl,crt,afi,xrk,llgtr,llihf,llihh,llihl,xy,clgfi,clgfr,\ ++ogr,popcnt,alcg,lndfr,larl,sll,tmy,ic,lpdfr,tend,lnr,bcr_flush")) "nothing") ++ ++(define_insn_reservation "z13_1" 1 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "celgbr,vscef,vsceg,msy,msgf,cxlftr,cdsg,cdsy,\ ++exrl,clfebr,cefbr,chhsi,stam,vgef,vgeg,cdlftr,lam,mfy,lcgfr,cfdbr,dsgf,\ ++cgdbr,lzxr,lmg,cfxbr,rnsbg,cxlgtr,mr,dl,cxftr,sldl,cgdtr,cegbr,rxsbg,ex,\ ++cgxtr,clfxtr,mlgr,cdlgtr,brasl,dsg,efpc,cfebr,tbeginc,celfbr,clgxbr,vsteb,\ ++vsteh,cxlfbr,clfdtr,cxgtr,stmg,stmy,stm,lm,cds,cdfbr,ml,ms,lngfr,clgebr,\ ++stpq,lmy,cs,lpgfr,cdlgbr,lpq,cxfbr,cxlgbr,cdgtr,d,m,mlg,mlr,cgxbr,cdftr,\ ++msgr,rosbg,clgdbr,cdgbr,srda,bras,srdl,tbegin,clfdbr,cdlfbr,cxgbr,cgebr,\ ++dlg,clfxbr,lxr,csy,msgfi,msgfr,msg,flogr,msr,csg,msfi,clgdtr,clgxtr")) "nothing") ++ ++(define_insn_reservation "z13_2" 2 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "seb,vcksm,vfadb,vleib,vchgs,vleif,vleig,vleih,\ ++vgbm,verimb,vone,verimf,verimg,verimh,verllvb,lpebr,verllvf,verllvg,verllvh,\ ++vfeneb,wcdgb,vfenef,vfeneh,vchhs,vctzb,vctzf,vctzg,vctzh,vlcb,aeb,vlcf,\ ++vlcg,vlch,vfmsdb,vgfmab,ltebr,vgfmaf,vgfmag,vgfmah,vmaeh,vsb,vsf,vsg,vsh,\ ++vsl,vsq,lxebr,cdtr,fiebr,vupllb,vupllf,vupllh,vmrhb,madbr,vtm,vmrhf,vmrhg,\ ++vmrhh,axtr,fiebra,vleb,cxtr,vlef,vleg,vleh,vpkf,vpkg,vpkh,vmlob,vmlof,vmloh,\ ++lxdb,ldeb,vceqfs,adb,wflndb,lxeb,vn,vo,vchlb,vx,vchlf,vchlg,vchlh,vfcedbs,\ ++vfcedb,vceqgs,cxbr,msdbr,vcdgb,vceqhs,meeb,lcxbr,vavglb,vavglf,vavglg,vavglh,\ ++wfcedbs,vmrlb,vmrlf,vmrlg,vmrlh,wfchedbs,vmxb,tcdb,vmahh,vsrlb,wcgdb,lcdbr,\ ++vistrbs,vrepb,wfmdb,vrepf,vrepg,vreph,ler,wcdlgb,ley,vistrb,vistrf,vistrh,\ ++tceb,vsumqf,vsumqg,vesrlb,vfeezbs,maebr,vesrlf,vesrlg,vesrlh,vmeb,vmef,\ ++vmeh,meebr,vflcdb,wfmadb,vperm,sxtr,vclzf,vgm,vgmb,vgmf,vgmg,vgmh,tdcxt,\ ++vzero,msebr,veslb,veslf,veslg,vfenezb,vfenezf,vfenezh,vistrfs,vchf,vchg,\ ++vchh,vmhb,vmhf,vmhh,cdb,veslvb,ledbr,veslvf,veslvg,veslvh,wclgdb,vfmdb,\ ++vmnlb,vmnlf,vmnlg,vmnlh,vclzb,vfeezfs,vclzg,vclzh,mdb,vmxlb,vmxlf,vmxlg,\ ++vmxlh,ltdtr,vsbcbiq,ceb,sebr,vistrhs,lxdtr,lcebr,vab,vaf,vag,vah,ltxtr,\ ++vlpf,vlpg,vsegb,vaq,vsegf,vsegh,wfchdbs,sdtr,cdbr,vfeezhs,le,wldeb,vfmadb,\ ++vchlbs,vacccq,vmaleb,vsel,vmalef,vmaleh,vflndb,mdbr,vmlb,wflpdb,ldetr,vpksfs,\ ++vpksf,vpksg,vpksh,vmaeb,veslh,vmaef,vpklsf,vpklsg,vpklsh,verllb,vchb,verllf,\ ++verllg,verllh,wfsdb,maeb,vclgdb,vftcidb,vpksgs,vmxf,vmxg,vmxh,fidbra,vmnb,\ ++vmnf,vmng,vfchedbs,lnebr,vfidb,msdb,vmalhb,vmalhf,vmalhh,vpkshs,vfsdb,vmalhw,\ ++ltdbr,vmob,vmof,vmoh,vchlfs,mseb,vcdlgb,vlpb,wfmsdb,vlph,vmahb,vldeb,vmahf,\ ++vgfmb,fidbr,aebr,wledb,vchlgs,vesravb,vfchdbs,cebr,vesravf,vesravg,vesravh,\ ++vcgdb,fixbra,vrepib,vrepif,vrepig,vrepih,tdcdt,vchlhs,vceqb,vscbib,vceqf,\ ++vceqg,vscbif,vscbig,vscbih,vmlhw,vscbiq,vuphb,vuphf,vuphh,vfchedb,tdcet,\ ++vslb,vpklsfs,adbr,vfchdb,fixbr,vpklsgs,vsldb,vmleb,vmlef,vmleh,cpsdr,vmalb,\ ++vmalf,vavgb,vmlf,vavgf,vavgg,vavgh,vgfmf,vgfmg,vgfmh,fidtr,vpklshs,lndbr,\ ++vno,lpdbr,vacq,vledb,vchbs,vfeeb,vfeef,vfeeh,fixtr,vaccb,wfadb,vaccf,vaccg,\ ++vacch,vnot,vmalob,vaccq,vmalof,vmaloh,lpxbr,vuplb,vuplf,axbr,lxdbr,ltxbr,\ ++vpopct,vpdi,vmlhb,vmlhf,vmlhh,sdbr,vnc,vsumb,vsrab,vsumh,vmaob,vmaof,vmaoh,\ ++vesrlvb,vesrlvf,vesrlvg,vesrlvh,tcxb,vceqbs,vceqh,lnxbr,sxbr,vesrab,wflcdb,\ ++vesraf,vesrag,vesrah,vflpdb,vmnh,vsbiq,adtr,vsra,vsrl,vuplhb,sdb,vuplhf,\ ++vuplhh,vsumgf,vsumgh,ldebr,vuplhw,vchfs,madb")) "nothing") ++ ++(define_insn_reservation "z13_3" 3 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "ledtr")) "nothing") ++ ++(define_insn_reservation "z13_4" 4 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "dr,mxbr,dlr")) "nothing") ++ ++(define_insn_reservation "z13_6" 6 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "debr,sqeb,deb,sqebr")) "nothing") ++ ++(define_insn_reservation "z13_7" 7 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "mdtr")) "nothing") ++ ++(define_insn_reservation "z13_8" 8 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "wfddb,ddb,vfddb,ddbr")) "nothing") ++ ++(define_insn_reservation "z13_9" 9 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "dsgr,wfsqdb,dsgfr,sqdb,sqdbr,vfsqdb")) "nothing") ++ ++(define_insn_reservation "z13_13" 13 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "mxtr,ddtr")) "nothing") ++ ++(define_insn_reservation "z13_16" 16 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "sqxbr")) "nothing") ++ ++(define_insn_reservation "z13_17" 17 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "dxtr")) "nothing") ++ ++(define_insn_reservation "z13_20" 20 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "dxbr,dlgr")) "nothing") ++ +--- gcc/config/s390/constraints.md 2013-01-21 16:11:50.000000000 +0100 ++++ gcc/config/s390/constraints.md 2016-05-11 18:40:20.880008612 +0200 +@@ -29,7 +29,15 @@ + ;; c -- Condition code register 33. + ;; d -- Any register from 0 to 15. + ;; f -- Floating point registers. ++;; j -- Multiple letter constraint for constant scalar and vector values ++;; j00: constant zero scalar or vector ++;; jm1: constant scalar or vector with all bits set ++;; jxx: contiguous bitmask of 0 or 1 in all vector elements ++;; jyy: constant consisting of byte chunks being either 0 or 0xff ++;; jKK: constant vector with all elements having the same value and ++;; matching K constraint + ;; t -- Access registers 36 and 37. ++;; v -- Vector registers v0-v31. + ;; C -- A signed 8-bit constant (-128..127) + ;; D -- An unsigned 16-bit constant (0..65535) + ;; G -- Const double zero operand +@@ -109,6 +117,11 @@ + Access registers 36 and 37") + + ++(define_register_constraint "v" ++ "VEC_REGS" ++ "Vector registers v0-v31") ++ ++ + ;; + ;; General constraints for constants. + ;; +@@ -374,6 +387,33 @@ + (match_test "s390_O_constraint_str ('n', ival)"))) + + ++;; ++;; Vector constraints follow. ++;; ++ ++(define_constraint "j00" ++ "Zero scalar or vector constant" ++ (match_test "op == CONST0_RTX (GET_MODE (op))")) ++ ++(define_constraint "jm1" ++ "All one bit scalar or vector constant" ++ (match_test "op == CONSTM1_RTX (GET_MODE (op))")) ++ ++(define_constraint "jxx" ++ "@internal" ++ (and (match_code "const_vector") ++ (match_test "s390_contiguous_bitmask_vector_p (op, NULL, NULL)"))) ++ ++(define_constraint "jyy" ++ "@internal" ++ (and (match_code "const_vector") ++ (match_test "s390_bytemask_vector_p (op, NULL)"))) ++ ++(define_constraint "jKK" ++ "@internal" ++ (and (and (match_code "const_vector") ++ (match_test "s390_const_vec_duplicate_p (op)")) ++ (match_test "satisfies_constraint_K (XVECEXP (op, 0, 0))"))) + + + ;; +--- gcc/config/s390/predicates.md 2013-08-14 13:55:12.000000000 +0200 ++++ gcc/config/s390/predicates.md 2016-05-11 18:17:42.508662564 +0200 +@@ -24,16 +24,26 @@ + + ;; operands -------------------------------------------------------------- + +-;; Return true if OP a (const_int 0) operand. +- ++;; Return true if OP a const 0 operand (int/float/vector). + (define_predicate "const0_operand" +- (and (match_code "const_int, const_double") ++ (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONST0_RTX (mode)"))) + ++;; Return true if OP an all ones operand (int/vector). ++(define_predicate "all_ones_operand" ++ (and (match_code "const_int, const_double, const_vector") ++ (match_test "INTEGRAL_MODE_P (GET_MODE (op))") ++ (match_test "op == CONSTM1_RTX (mode)"))) ++ ++;; Return true if OP is a 4 bit mask operand ++(define_predicate "const_mask_operand" ++ (and (match_code "const_int") ++ (match_test "UINTVAL (op) < 16"))) ++ + ;; Return true if OP is constant. + + (define_special_predicate "consttable_operand" +- (and (match_code "symbol_ref, label_ref, const, const_int, const_double") ++ (and (match_code "symbol_ref, label_ref, const, const_int, const_double, const_vector") + (match_test "CONSTANT_P (op)"))) + + ;; Return true if OP is a valid S-type operand. +--- gcc/config/s390/s390-builtins.def 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/s390-builtins.def 2016-05-11 17:53:57.000000000 +0200 +@@ -0,0 +1,2488 @@ ++/* Builtin definitions for IBM S/390 and zSeries ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ ++ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com). ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#undef O_U1 ++#undef O_U2 ++#undef O_U3 ++#undef O_U4 ++#undef O_U5 ++#undef O_U8 ++#undef O_U12 ++#undef O_U16 ++#undef O_U32 ++ ++#undef O_S2 ++#undef O_S3 ++#undef O_S4 ++#undef O_S5 ++#undef O_S8 ++#undef O_S12 ++#undef O_S16 ++#undef O_S32 ++#undef O_ELEM ++#undef O_LIT ++ ++#undef O1_U1 ++#undef O2_U1 ++#undef O3_U1 ++#undef O4_U1 ++ ++#undef O1_U2 ++#undef O2_U2 ++#undef O3_U2 ++#undef O4_U2 ++ ++#undef O1_U3 ++#undef O2_U3 ++#undef O3_U3 ++#undef O4_U3 ++ ++#undef O1_U4 ++#undef O2_U4 ++#undef O3_U4 ++#undef O4_U4 ++ ++#undef O1_U5 ++#undef O2_U5 ++#undef O3_U5 ++#undef O4_U5 ++ ++#undef O1_U8 ++#undef O2_U8 ++#undef O3_U8 ++#undef O4_U8 ++ ++#undef O1_U12 ++#undef O2_U12 ++#undef O3_U12 ++#undef O4_U12 ++ ++#undef O1_U16 ++#undef O2_U16 ++#undef O3_U16 ++#undef O4_U16 ++ ++#undef O1_U32 ++#undef O2_U32 ++#undef O3_U32 ++#undef O4_U32 ++ ++#undef O1_S2 ++#undef O2_S2 ++#undef O3_S2 ++#undef O4_S2 ++ ++#undef O1_S3 ++#undef O2_S3 ++#undef O3_S3 ++#undef O4_S3 ++ ++#undef O1_S4 ++#undef O2_S4 ++#undef O3_S4 ++#undef O4_S4 ++ ++#undef O1_S5 ++#undef O2_S5 ++#undef O3_S5 ++#undef O4_S5 ++ ++#undef O1_S8 ++#undef O2_S8 ++#undef O3_S8 ++#undef O4_S8 ++ ++#undef O1_S12 ++#undef O2_S12 ++#undef O3_S12 ++#undef O4_S12 ++ ++#undef O1_S16 ++#undef O2_S16 ++#undef O3_S16 ++#undef O4_S16 ++ ++#undef O1_S32 ++#undef O2_S32 ++#undef O3_S32 ++#undef O4_S32 ++ ++#undef O1_ELEM ++#undef O2_ELEM ++#undef O3_ELEM ++#undef O4_ELEM ++ ++#undef O1_LIT ++#undef O2_LIT ++#undef O3_LIT ++#undef O4_LIT ++ ++#undef O_SHIFT ++#undef O_IMM_P ++#undef O_UIMM_P ++#undef O_SIMM_P ++ ++#define O_U1 1 /* unsigned 1 bit literal */ ++#define O_U2 2 /* unsigned 2 bit literal */ ++#define O_U3 3 /* unsigned 3 bit literal */ ++#define O_U4 4 /* unsigned 4 bit literal */ ++#define O_U5 5 /* unsigned 5 bit literal */ ++#define O_U8 6 /* unsigned 8 bit literal */ ++#define O_U12 7 /* unsigned 16 bit literal */ ++#define O_U16 8 /* unsigned 16 bit literal */ ++#define O_U32 9 /* unsigned 32 bit literal */ ++ ++#define O_S2 10 /* signed 2 bit literal */ ++#define O_S3 11 /* signed 3 bit literal */ ++#define O_S4 12 /* signed 4 bit literal */ ++#define O_S5 13 /* signed 5 bit literal */ ++#define O_S8 14 /* signed 8 bit literal */ ++#define O_S12 15 /* signed 12 bit literal */ ++#define O_S16 16 /* signed 16 bit literal */ ++#define O_S32 17 /* signed 32 bit literal */ ++ ++#define O_ELEM 18 /* Element selector requiring modulo arithmetic. */ ++#define O_LIT 19 /* Operand must be a literal fitting the target type. */ ++ ++#define O_SHIFT 5 ++ ++#define O_UIMM_P(X) ((X) >= O_U1 && (X) <= O_U32) ++#define O_SIMM_P(X) ((X) >= O_S2 && (X) <= O_S32) ++#define O_IMM_P(X) ((X) == O_LIT || ((X) >= O_U1 && (X) <= O_S32)) ++ ++#define O1_U1 O_U1 ++#define O2_U1 (O_U1 << O_SHIFT) ++#define O3_U1 (O_U1 << (2 * O_SHIFT)) ++#define O4_U1 (O_U1 << (3 * O_SHIFT)) ++ ++#define O1_U2 O_U2 ++#define O2_U2 (O_U2 << O_SHIFT) ++#define O3_U2 (O_U2 << (2 * O_SHIFT)) ++#define O4_U2 (O_U2 << (3 * O_SHIFT)) ++ ++#define O1_U3 O_U3 ++#define O2_U3 (O_U3 << O_SHIFT) ++#define O3_U3 (O_U3 << (2 * O_SHIFT)) ++#define O4_U3 (O_U3 << (3 * O_SHIFT)) ++ ++#define O1_U4 O_U4 ++#define O2_U4 (O_U4 << O_SHIFT) ++#define O3_U4 (O_U4 << (2 * O_SHIFT)) ++#define O4_U4 (O_U4 << (3 * O_SHIFT)) ++ ++#define O1_U5 O_U5 ++#define O2_U5 (O_U5 << O_SHIFT) ++#define O3_U5 (O_U5 << (2 * O_SHIFT)) ++#define O4_U5 (O_U5 << (3 * O_SHIFT)) ++ ++#define O1_U8 O_U8 ++#define O2_U8 (O_U8 << O_SHIFT) ++#define O3_U8 (O_U8 << (2 * O_SHIFT)) ++#define O4_U8 (O_U8 << (3 * O_SHIFT)) ++ ++#define O1_U12 O_U12 ++#define O2_U12 (O_U12 << O_SHIFT) ++#define O3_U12 (O_U12 << (2 * O_SHIFT)) ++#define O4_U12 (O_U12 << (3 * O_SHIFT)) ++ ++#define O1_U16 O_U16 ++#define O2_U16 (O_U16 << O_SHIFT) ++#define O3_U16 (O_U16 << (2 * O_SHIFT)) ++#define O4_U16 (O_U16 << (3 * O_SHIFT)) ++ ++#define O1_U32 O_U32 ++#define O2_U32 (O_U32 << O_SHIFT) ++#define O3_U32 (O_U32 << (2 * O_SHIFT)) ++#define O4_U32 (O_U32 << (3 * O_SHIFT)) ++ ++ ++#define O1_S2 O_S2 ++#define O2_S2 (O_S2 << O_SHIFT) ++#define O3_S2 (O_S2 << (2 * O_SHIFT)) ++#define O4_S2 (O_S2 << (3 * O_SHIFT)) ++ ++#define O1_S3 O_S3 ++#define O2_S3 (O_S3 << O_SHIFT) ++#define O3_S3 (O_S3 << (2 * O_SHIFT)) ++#define O4_S3 (O_S3 << (3 * O_SHIFT)) ++ ++#define O1_S4 O_S4 ++#define O2_S4 (O_S4 << O_SHIFT) ++#define O3_S4 (O_S4 << (2 * O_SHIFT)) ++#define O4_S4 (O_S4 << (3 * O_SHIFT)) ++ ++#define O1_S5 O_S5 ++#define O2_S5 (O_S5 << O_SHIFT) ++#define O3_S5 (O_S5 << (2 * O_SHIFT)) ++#define O4_S5 (O_S5 << (3 * O_SHIFT)) ++ ++#define O1_S8 O_S8 ++#define O2_S8 (O_S8 << O_SHIFT) ++#define O3_S8 (O_S8 << (2 * O_SHIFT)) ++#define O4_S8 (O_S8 << (3 * O_SHIFT)) ++ ++#define O1_S12 O_S12 ++#define O2_S12 (O_S12 << O_SHIFT) ++#define O3_S12 (O_S12 << (2 * O_SHIFT)) ++#define O4_S12 (O_S12 << (3 * O_SHIFT)) ++ ++#define O1_S16 O_S16 ++#define O2_S16 (O_S16 << O_SHIFT) ++#define O3_S16 (O_S16 << (2 * O_SHIFT)) ++#define O4_S16 (O_S16 << (3 * O_SHIFT)) ++ ++#define O1_S32 O_S32 ++#define O2_S32 (O_S32 << O_SHIFT) ++#define O3_S32 (O_S32 << (2 * O_SHIFT)) ++#define O4_S32 (O_S32 << (3 * O_SHIFT)) ++ ++#define O1_ELEM O_ELEM ++#define O2_ELEM (O_ELEM << O_SHIFT) ++#define O3_ELEM (O_ELEM << (2 * O_SHIFT)) ++#define O4_ELEM (O_ELEM << (3 * O_SHIFT)) ++ ++#define O1_LIT O_LIT ++#define O2_LIT (O_LIT << O_SHIFT) ++#define O3_LIT (O_LIT << (2 * O_SHIFT)) ++#define O4_LIT (O_LIT << (3 * O_SHIFT)) ++ ++ ++/* Builtin flags. Flags applying to the whole builtin definition. */ ++ ++#undef B_INT ++#undef B_HTM ++#undef B_VX ++ ++#undef BFLAGS_MASK_INIT ++#define BFLAGS_MASK_INIT (B_INT) ++ ++#define B_INT (1 << 0) /* Internal builtins. This builtin cannot be used in user programs. */ ++#define B_HTM (1 << 1) /* Builtins requiring the transactional execution facility. */ ++#define B_VX (1 << 2) /* Builtins requiring the z13 vector extensions. */ ++ ++ ++/* B_DEF defines a standard (not overloaded) builtin ++ B_DEF (, , , , , ) ++ ++ OB_DEF defines an overloaded builtin ++ OB_DEF (, , , , ) ++ ++ OB_DEF_VAR defines a variant of an overloaded builtin ++ OB_DEF_VAR (, , , ) */ ++ ++ ++B_DEF (tbeginc, tbeginc, 0, B_HTM, 0, BT_FN_INT) ++B_DEF (tbegin, tbegin, returns_twice_attr, B_HTM, 0, BT_FN_INT_VOIDPTR) ++B_DEF (tbegin_nofloat, tbegin_nofloat, returns_twice_attr, B_HTM, 0, BT_FN_INT_VOIDPTR) ++B_DEF (tbegin_retry, tbegin_retry, returns_twice_attr, B_HTM, 0, BT_FN_INT_VOIDPTR_INT) ++B_DEF (tbegin_retry_nofloat, tbegin_retry_nofloat,returns_twice_attr,B_HTM, 0, BT_FN_INT_VOIDPTR_INT) ++B_DEF (tend, tend, 0, B_HTM, 0, BT_FN_INT) ++B_DEF (tabort, tabort, noreturn_attr, B_HTM, 0, BT_FN_VOID_INT) ++B_DEF (tx_nesting_depth, etnd, 0, B_HTM, 0, BT_FN_INT) ++B_DEF (non_tx_store, ntstg, 0, B_HTM, 0, BT_FN_VOID_UINT64PTR_UINT64) ++B_DEF (tx_assist, tx_assist, 0, B_HTM, 0, BT_FN_VOID_INT) ++B_DEF (s390_sfpc, sfpc, 0, 0, 0, BT_FN_VOID_UINT) ++B_DEF (s390_efpc, efpc, 0, 0, 0, BT_FN_UINT) ++B_DEF (s390_lcbb, lcbb, 0, B_VX, O2_U4, BT_FN_UINT_VOIDCONSTPTR_INT) ++ ++OB_DEF (s390_vec_step, MAX, MAX, B_VX, BT_FN_INT_INT) ++ ++OB_DEF (s390_vec_gather_element, s390_vec_gather_element_s32,s390_vec_gather_element_dbl,B_VX,BT_FN_OV4SI_OV4SI_OUV4SI_INTCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_s32,s390_vgef, O4_U2, BT_OV_V4SI_V4SI_UV4SI_INTCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_b32,s390_vgef, O4_U2, BT_OV_BV4SI_BV4SI_UV4SI_UINTCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_u32,s390_vgef, O4_U2, BT_OV_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_s64,s390_vgeg, O4_U1, BT_OV_V2DI_V2DI_UV2DI_LONGLONGCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_b64,s390_vgeg, O4_U1, BT_OV_BV2DI_BV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_u64,s390_vgeg, O4_U1, BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_dbl,s390_vgeg, O4_U1, BT_OV_V2DF_V2DF_UV2DI_DBLCONSTPTR_UCHAR) ++ ++B_DEF (s390_vgef, vec_gather_elementv4si,0, B_VX, O4_U2, BT_FN_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR) ++B_DEF (s390_vgeg, vec_gather_elementv2di,0, B_VX, O4_U1, BT_FN_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR) ++B_DEF (s390_vgbm, vec_genbytemaskv16qi,0, B_VX, O1_U16, BT_FN_UV16QI_USHORT) ++B_DEF (s390_vgmb, vec_genmaskv16qi, 0, B_VX, O1_U8 | O2_U8, BT_FN_UV16QI_UCHAR_UCHAR) ++B_DEF (s390_vgmh, vec_genmaskv8hi, 0, B_VX, O1_U8 | O2_U8, BT_FN_UV8HI_UCHAR_UCHAR) ++B_DEF (s390_vgmf, vec_genmaskv4si, 0, B_VX, O1_U8 | O2_U8, BT_FN_UV4SI_UCHAR_UCHAR) ++B_DEF (s390_vgmg, vec_genmaskv2di, 0, B_VX, O1_U8 | O2_U8, BT_FN_UV2DI_UCHAR_UCHAR) ++ ++OB_DEF (s390_vec_xld2, s390_vec_xld2_s8, s390_vec_xld2_dbl, B_VX, BT_FN_V4SI_INT_VOIDPTR) ++OB_DEF_VAR (s390_vec_xld2_s8, MAX, O1_LIT, BT_OV_V16QI_LONG_SCHARPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_u8, MAX, O1_LIT, BT_OV_UV16QI_LONG_UCHARPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_s16, MAX, O1_LIT, BT_OV_V8HI_LONG_SHORTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_u16, MAX, O1_LIT, BT_OV_UV8HI_LONG_USHORTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_s32, MAX, O1_LIT, BT_OV_V4SI_LONG_INTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_u32, MAX, O1_LIT, BT_OV_UV4SI_LONG_UINTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_s64, MAX, O1_LIT, BT_OV_V2DI_LONG_LONGLONGPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_u64, MAX, O1_LIT, BT_OV_UV2DI_LONG_ULONGLONGPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_dbl, MAX, O1_LIT, BT_OV_V2DF_LONG_DBLPTR) /* vl */ ++ ++OB_DEF (s390_vec_xlw4, s390_vec_xlw4_s8, s390_vec_xlw4_u32, B_VX, BT_FN_V4SI_INT_VOIDPTR) ++OB_DEF_VAR (s390_vec_xlw4_s8, MAX, O1_LIT, BT_OV_V16QI_LONG_SCHARPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xlw4_u8, MAX, O1_LIT, BT_OV_UV16QI_LONG_UCHARPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xlw4_s16, MAX, O1_LIT, BT_OV_V8HI_LONG_SHORTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xlw4_u16, MAX, O1_LIT, BT_OV_UV8HI_LONG_USHORTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xlw4_s32, MAX, O1_LIT, BT_OV_V4SI_LONG_INTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xlw4_u32, MAX, O1_LIT, BT_OV_UV4SI_LONG_UINTPTR) /* vl */ ++ ++OB_DEF (s390_vec_splats, s390_vec_splats_s8, s390_vec_splats_dbl,B_VX, BT_FN_OV4SI_INT) ++OB_DEF_VAR (s390_vec_splats_s8, s390_vlrepb, 0, BT_OV_V16QI_SCHAR) ++OB_DEF_VAR (s390_vec_splats_u8, s390_vlrepb, 0, BT_OV_UV16QI_UCHAR) ++OB_DEF_VAR (s390_vec_splats_s16, s390_vlreph, 0, BT_OV_V8HI_SHORT) ++OB_DEF_VAR (s390_vec_splats_u16, s390_vlreph, 0, BT_OV_UV8HI_USHORT) ++OB_DEF_VAR (s390_vec_splats_s32, s390_vlrepf, 0, BT_OV_V4SI_INT) ++OB_DEF_VAR (s390_vec_splats_u32, s390_vlrepf, 0, BT_OV_UV4SI_UINT) ++OB_DEF_VAR (s390_vec_splats_s64, s390_vlrepg, 0, BT_OV_V2DI_LONGLONG) ++OB_DEF_VAR (s390_vec_splats_u64, s390_vlrepg, 0, BT_OV_UV2DI_ULONGLONG) ++OB_DEF_VAR (s390_vec_splats_dbl, s390_vlrepg_dbl, 0, BT_OV_V2DF_DBL) /* vlrepg */ ++ ++B_DEF (s390_vlrepb, vec_splatsv16qi, 0, B_VX, 0, BT_FN_UV16QI_UCHAR) ++B_DEF (s390_vlreph, vec_splatsv8hi, 0, B_VX, 0, BT_FN_UV8HI_USHORT) ++B_DEF (s390_vlrepf, vec_splatsv4si, 0, B_VX, 0, BT_FN_UV4SI_UINT) ++B_DEF (s390_vlrepg, vec_splatsv2di, 0, B_VX, 0, BT_FN_UV2DI_ULONGLONG) ++B_DEF (s390_vlrepg_dbl, vec_splatsv2df, 0, B_VX | B_INT, 0, BT_FN_V2DF_DBL) ++B_DEF (s390_vrepib, vec_splatsv16qi, 0, B_VX, O1_U8, BT_FN_V16QI_UCHAR) ++B_DEF (s390_vrepih, vec_splatsv8hi, 0, B_VX, O1_S16, BT_FN_V8HI_SHORT) ++B_DEF (s390_vrepif, vec_splatsv4si, 0, B_VX, O1_S16, BT_FN_V4SI_SHORT) ++B_DEF (s390_vrepig, vec_splatsv2di, 0, B_VX, O1_S16, BT_FN_V2DI_SHORT) ++ ++B_DEF (s390_vec_splat_u8, vec_splatsv16qi, 0, B_VX, O1_U8, BT_FN_UV16QI_UCHAR) ++B_DEF (s390_vec_splat_s8, vec_splatsv16qi, 0, B_VX, O1_S8, BT_FN_V16QI_SCHAR) ++B_DEF (s390_vec_splat_u16, vec_splatsv8hi, 0, B_VX, O1_U16, BT_FN_UV8HI_USHORT) ++B_DEF (s390_vec_splat_s16, vec_splatsv8hi, 0, B_VX, O1_S16, BT_FN_V8HI_SHORT) ++B_DEF (s390_vec_splat_u32, vec_splatsv4si, 0, B_VX, O1_U16, BT_FN_UV4SI_USHORT) ++B_DEF (s390_vec_splat_s32, vec_splatsv4si, 0, B_VX, O1_S16, BT_FN_V4SI_SHORT) ++B_DEF (s390_vec_splat_u64, vec_splatsv2di, 0, B_VX, O1_U16, BT_FN_UV2DI_USHORT) ++B_DEF (s390_vec_splat_s64, vec_splatsv2di, 0, B_VX, O1_S16, BT_FN_V2DI_SHORT) ++ ++OB_DEF (s390_vec_insert, s390_vec_insert_s8, s390_vec_insert_dbl,B_VX, BT_FN_OV4SI_INT_OV4SI_INT) ++OB_DEF_VAR (s390_vec_insert_s8, s390_vlvgb, O3_ELEM, BT_OV_V16QI_SCHAR_V16QI_INT) ++OB_DEF_VAR (s390_vec_insert_u8, s390_vlvgb, O3_ELEM, BT_OV_UV16QI_UCHAR_UV16QI_INT) ++OB_DEF_VAR (s390_vec_insert_b8, s390_vlvgb, O3_ELEM, BT_OV_UV16QI_UCHAR_BV16QI_INT) ++OB_DEF_VAR (s390_vec_insert_s16, s390_vlvgh, O3_ELEM, BT_OV_V8HI_SHORT_V8HI_INT) ++OB_DEF_VAR (s390_vec_insert_u16, s390_vlvgh, O3_ELEM, BT_OV_UV8HI_USHORT_UV8HI_INT) ++OB_DEF_VAR (s390_vec_insert_b16, s390_vlvgh, O3_ELEM, BT_OV_UV8HI_USHORT_BV8HI_INT) ++OB_DEF_VAR (s390_vec_insert_s32, s390_vlvgf, O3_ELEM, BT_OV_V4SI_INT_V4SI_INT) ++OB_DEF_VAR (s390_vec_insert_u32, s390_vlvgf, O3_ELEM, BT_OV_UV4SI_UINT_UV4SI_INT) ++OB_DEF_VAR (s390_vec_insert_b32, s390_vlvgf, O3_ELEM, BT_OV_UV4SI_UINT_BV4SI_INT) ++OB_DEF_VAR (s390_vec_insert_s64, s390_vlvgg, O3_ELEM, BT_OV_V2DI_LONGLONG_V2DI_INT) ++OB_DEF_VAR (s390_vec_insert_u64, s390_vlvgg, O3_ELEM, BT_OV_UV2DI_ULONGLONG_UV2DI_INT) ++OB_DEF_VAR (s390_vec_insert_b64, s390_vlvgg, O3_ELEM, BT_OV_UV2DI_ULONGLONG_BV2DI_INT) ++OB_DEF_VAR (s390_vec_insert_dbl, s390_vlvgg_dbl, O3_ELEM, BT_OV_V2DF_DBL_V2DF_INT) ++ ++B_DEF (s390_vlvgb, vec_insertv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UCHAR_INT) ++B_DEF (s390_vlvgh, vec_insertv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_USHORT_INT) ++B_DEF (s390_vlvgf, vec_insertv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UINT_INT) ++B_DEF (s390_vlvgg, vec_insertv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_ULONGLONG_INT) ++B_DEF (s390_vlvgg_dbl, vec_insertv2df, 0, B_VX | B_INT, 0, BT_FN_V2DF_V2DF_DBL_INT) ++ ++OB_DEF (s390_vec_promote, s390_vec_promote_s8,s390_vec_promote_dbl,B_VX, BT_FN_OV4SI_INT_INT) ++OB_DEF_VAR (s390_vec_promote_s8, s390_vlvgb_noin, O2_ELEM, BT_OV_V16QI_SCHAR_INT) /* vlvgb */ ++OB_DEF_VAR (s390_vec_promote_u8, s390_vlvgb_noin, O2_ELEM, BT_OV_UV16QI_UCHAR_INT) /* vlvgb */ ++OB_DEF_VAR (s390_vec_promote_s16, s390_vlvgh_noin, O2_ELEM, BT_OV_V8HI_SHORT_INT) /* vlvgh */ ++OB_DEF_VAR (s390_vec_promote_u16, s390_vlvgh_noin, O2_ELEM, BT_OV_UV8HI_USHORT_INT) /* vlvgh */ ++OB_DEF_VAR (s390_vec_promote_s32, s390_vlvgf_noin, O2_ELEM, BT_OV_V4SI_INT_INT) /* vlvgf */ ++OB_DEF_VAR (s390_vec_promote_u32, s390_vlvgf_noin, O2_ELEM, BT_OV_UV4SI_UINT_INT) /* vlvgf */ ++OB_DEF_VAR (s390_vec_promote_s64, s390_vlvgg_noin, O2_ELEM, BT_OV_V2DI_LONGLONG_INT) /* vlvgg */ ++OB_DEF_VAR (s390_vec_promote_u64, s390_vlvgg_noin, O2_ELEM, BT_OV_UV2DI_ULONGLONG_INT) /* vlvgg */ ++OB_DEF_VAR (s390_vec_promote_dbl, s390_vlvgg_dbl_noin,O2_ELEM, BT_OV_V2DF_DBL_INT) /* vlvgg */ ++ ++B_DEF (s390_vlvgb_noin, vec_promotev16qi, 0, B_VX | B_INT, 0, BT_FN_UV16QI_UCHAR_INT) ++B_DEF (s390_vlvgh_noin, vec_promotev8hi, 0, B_VX | B_INT, 0, BT_FN_UV8HI_USHORT_INT) ++B_DEF (s390_vlvgf_noin, vec_promotev4si, 0, B_VX | B_INT, 0, BT_FN_UV4SI_UINT_INT) ++B_DEF (s390_vlvgg_noin, vec_promotev2di, 0, B_VX | B_INT, 0, BT_FN_UV2DI_ULONGLONG_INT) ++B_DEF (s390_vlvgg_dbl_noin, vec_promotev2df, 0, B_VX | B_INT, 0, BT_FN_V2DF_DBL_INT) ++ ++OB_DEF (s390_vec_extract, s390_vec_extract_s8,s390_vec_extract_dbl,B_VX, BT_FN_INT_OV4SI_INT) ++OB_DEF_VAR (s390_vec_extract_s8, s390_vlgvb, O2_ELEM, BT_OV_SCHAR_V16QI_INT) ++OB_DEF_VAR (s390_vec_extract_u8, s390_vlgvb, O2_ELEM, BT_OV_UCHAR_UV16QI_INT) ++OB_DEF_VAR (s390_vec_extract_b8, s390_vlgvb, O2_ELEM, BT_OV_UCHAR_BV16QI_INT) ++OB_DEF_VAR (s390_vec_extract_s16, s390_vlgvh, O2_ELEM, BT_OV_SHORT_V8HI_INT) ++OB_DEF_VAR (s390_vec_extract_u16, s390_vlgvh, O2_ELEM, BT_OV_USHORT_UV8HI_INT) ++OB_DEF_VAR (s390_vec_extract_b16, s390_vlgvh, O2_ELEM, BT_OV_USHORT_BV8HI_INT) ++OB_DEF_VAR (s390_vec_extract_s32, s390_vlgvf, O2_ELEM, BT_OV_INT_V4SI_INT) ++OB_DEF_VAR (s390_vec_extract_u32, s390_vlgvf, O2_ELEM, BT_OV_UINT_UV4SI_INT) ++OB_DEF_VAR (s390_vec_extract_b32, s390_vlgvf, O2_ELEM, BT_OV_UINT_BV4SI_INT) ++OB_DEF_VAR (s390_vec_extract_s64, s390_vlgvg, O2_ELEM, BT_OV_LONGLONG_V2DI_INT) ++OB_DEF_VAR (s390_vec_extract_u64, s390_vlgvg, O2_ELEM, BT_OV_ULONGLONG_UV2DI_INT) ++OB_DEF_VAR (s390_vec_extract_b64, s390_vlgvg, O2_ELEM, BT_OV_ULONGLONG_BV2DI_INT) ++OB_DEF_VAR (s390_vec_extract_dbl, s390_vlgvg_dbl, O2_ELEM, BT_OV_DBL_V2DF_INT) /* vlgvg */ ++ ++B_DEF (s390_vlgvb, vec_extractv16qi, 0, B_VX, 0, BT_FN_UCHAR_UV16QI_INT) ++B_DEF (s390_vlgvh, vec_extractv8hi, 0, B_VX, 0, BT_FN_USHORT_UV8HI_INT) ++B_DEF (s390_vlgvf, vec_extractv4si, 0, B_VX, 0, BT_FN_UINT_UV4SI_INT) ++B_DEF (s390_vlgvg, vec_extractv2di, 0, B_VX, 0, BT_FN_ULONGLONG_UV2DI_INT) ++B_DEF (s390_vlgvg_dbl, vec_extractv2df, 0, B_VX | B_INT, 0, BT_FN_DBL_V2DF_INT) ++ ++OB_DEF (s390_vec_insert_and_zero, s390_vec_insert_and_zero_s8,s390_vec_insert_and_zero_dbl,B_VX,BT_FN_OV4SI_INTCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_s8,s390_vllezb, 0, BT_OV_V16QI_SCHARCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_u8,s390_vllezb, 0, BT_OV_UV16QI_UCHARCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_s16,s390_vllezh, 0, BT_OV_V8HI_SHORTCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_u16,s390_vllezh, 0, BT_OV_UV8HI_USHORTCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_s32,s390_vllezf, 0, BT_OV_V4SI_INTCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_u32,s390_vllezf, 0, BT_OV_UV4SI_UINTCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_s64,s390_vllezg, 0, BT_OV_V2DI_LONGLONGCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_u64,s390_vllezg, 0, BT_OV_UV2DI_ULONGLONGCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_dbl,s390_vllezg, 0, BT_OV_V2DF_DBLCONSTPTR) ++ ++B_DEF (s390_vllezb, vec_insert_and_zerov16qi,0, B_VX, 0, BT_FN_UV16QI_UCHARCONSTPTR) ++B_DEF (s390_vllezh, vec_insert_and_zerov8hi,0, B_VX, 0, BT_FN_UV8HI_USHORTCONSTPTR) ++B_DEF (s390_vllezf, vec_insert_and_zerov4si,0, B_VX, 0, BT_FN_UV4SI_UINTCONSTPTR) ++B_DEF (s390_vllezg, vec_insert_and_zerov2di,0, B_VX, 0, BT_FN_UV2DI_ULONGLONGCONSTPTR) ++ ++OB_DEF (s390_vec_load_bndry, s390_vec_load_bndry_s8,s390_vec_load_bndry_dbl,B_VX, BT_FN_OV4SI_INTCONSTPTR_INT) ++OB_DEF_VAR (s390_vec_load_bndry_s8, s390_vlbb, O2_U16, BT_OV_V16QI_SCHARCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_u8, s390_vlbb, O2_U16, BT_OV_UV16QI_UCHARCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_s16, s390_vlbb, O2_U16, BT_OV_V8HI_SHORTCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_u16, s390_vlbb, O2_U16, BT_OV_UV8HI_USHORTCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_s32, s390_vlbb, O2_U16, BT_OV_V4SI_INTCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_u32, s390_vlbb, O2_U16, BT_OV_UV4SI_UINTCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_s64, s390_vlbb, O2_U16, BT_OV_V2DI_LONGLONGCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_u64, s390_vlbb, O2_U16, BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_dbl, s390_vlbb, O2_U16, BT_OV_V2DF_DBLCONSTPTR_USHORT) ++ ++B_DEF (s390_vlbb, vlbb, 0, B_VX, O2_U3, BT_FN_UV16QI_UCHARCONSTPTR_USHORT) ++ ++OB_DEF (s390_vec_load_pair, s390_vec_load_pair_s64,s390_vec_load_pair_u64,B_VX, BT_FN_OV2DI_LONGLONG_LONGLONG) ++OB_DEF_VAR (s390_vec_load_pair_s64, MAX, 0, BT_OV_V2DI_LONGLONG_LONGLONG) /* vlvgp */ ++OB_DEF_VAR (s390_vec_load_pair_u64, MAX, 0, BT_OV_UV2DI_ULONGLONG_ULONGLONG) /* vlvgp */ ++ ++OB_DEF (s390_vec_load_len, s390_vec_load_len_s8,s390_vec_load_len_dbl,B_VX, BT_FN_OV4SI_INTCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_s8, s390_vll, 0, BT_OV_V16QI_SCHARCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_u8, s390_vll, 0, BT_OV_UV16QI_UCHARCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_s16, s390_vll, 0, BT_OV_V8HI_SHORTCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_u16, s390_vll, 0, BT_OV_UV8HI_USHORTCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_s32, s390_vll, 0, BT_OV_V4SI_INTCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_u32, s390_vll, 0, BT_OV_UV4SI_UINTCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_s64, s390_vll, 0, BT_OV_V2DI_LONGLONGCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_u64, s390_vll, 0, BT_OV_UV2DI_ULONGLONGCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_dbl, s390_vll, 0, BT_OV_V2DF_DBLCONSTPTR_UINT) ++ ++B_DEF (s390_vll, vllv16qi, 0, B_VX, 0, BT_FN_V16QI_UINT_VOIDCONSTPTR) ++ ++OB_DEF (s390_vec_mergeh, s390_vec_mergeh_s8, s390_vec_mergeh_dbl,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mergeh_s8, s390_vmrhb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mergeh_u8, s390_vmrhb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mergeh_b8, s390_vmrhb, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_mergeh_s16, s390_vmrhh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mergeh_u16, s390_vmrhh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mergeh_b16, s390_vmrhh, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_mergeh_s32, s390_vmrhf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_mergeh_u32, s390_vmrhf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mergeh_b32, s390_vmrhf, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_mergeh_s64, s390_vmrhg, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_mergeh_u64, s390_vmrhg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_mergeh_b64, s390_vmrhg, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_mergeh_dbl, s390_vmrhg, 0, BT_OV_V2DF_V2DF_V2DF) ++ ++B_DEF (s390_vmrhb, vec_mergehv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmrhh, vec_mergehv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmrhf, vec_mergehv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmrhg, vec_mergehv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_mergel, s390_vec_mergel_s8, s390_vec_mergel_dbl,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mergel_s8, s390_vmrlb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mergel_u8, s390_vmrlb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mergel_b8, s390_vmrlb, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_mergel_s16, s390_vmrlh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mergel_u16, s390_vmrlh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mergel_b16, s390_vmrlh, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_mergel_s32, s390_vmrlf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_mergel_u32, s390_vmrlf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mergel_b32, s390_vmrlf, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_mergel_s64, s390_vmrlg, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_mergel_u64, s390_vmrlg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_mergel_b64, s390_vmrlg, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_mergel_dbl, s390_vmrlg, 0, BT_OV_V2DF_V2DF_V2DF) ++ ++B_DEF (s390_vmrlb, vec_mergelv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmrlh, vec_mergelv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmrlf, vec_mergelv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmrlg, vec_mergelv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_pack, s390_vec_pack_s16, s390_vec_pack_b64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_pack_s16, s390_vpkh, 0, BT_OV_V16QI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_pack_u16, s390_vpkh, 0, BT_OV_UV16QI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_pack_b16, s390_vpkh, 0, BT_OV_BV16QI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_pack_s32, s390_vpkf, 0, BT_OV_V8HI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_pack_u32, s390_vpkf, 0, BT_OV_UV8HI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_pack_b32, s390_vpkf, 0, BT_OV_BV8HI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_pack_s64, s390_vpkg, 0, BT_OV_V4SI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_pack_u64, s390_vpkg, 0, BT_OV_UV4SI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_pack_b64, s390_vpkg, 0, BT_OV_BV4SI_BV2DI_BV2DI) ++ ++B_DEF (s390_vpkh, vec_packv8hi, 0, B_VX, 0, BT_FN_UV16QI_UV8HI_UV8HI) ++B_DEF (s390_vpkf, vec_packv4si, 0, B_VX, 0, BT_FN_UV8HI_UV4SI_UV4SI) ++B_DEF (s390_vpkg, vec_packv2di, 0, B_VX, 0, BT_FN_UV4SI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_packs, s390_vec_packs_s16, s390_vec_packs_u64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_packs_s16, s390_vpksh, 0, BT_OV_V16QI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_packs_u16, s390_vpklsh, 0, BT_OV_UV16QI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_packs_s32, s390_vpksf, 0, BT_OV_V8HI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_packs_u32, s390_vpklsf, 0, BT_OV_UV8HI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_packs_s64, s390_vpksg, 0, BT_OV_V4SI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_packs_u64, s390_vpklsg, 0, BT_OV_UV4SI_UV2DI_UV2DI) ++ ++B_DEF (s390_vpksh, vec_packsv8hi, 0, B_VX, 0, BT_FN_V16QI_V8HI_V8HI) ++B_DEF (s390_vpklsh, vec_packsuv8hi, 0, B_VX, 0, BT_FN_UV16QI_UV8HI_UV8HI) ++B_DEF (s390_vpksf, vec_packsv4si, 0, B_VX, 0, BT_FN_V8HI_V4SI_V4SI) ++B_DEF (s390_vpklsf, vec_packsuv4si, 0, B_VX, 0, BT_FN_UV8HI_UV4SI_UV4SI) ++B_DEF (s390_vpksg, vec_packsv2di, 0, B_VX, 0, BT_FN_V4SI_V2DI_V2DI) ++B_DEF (s390_vpklsg, vec_packsuv2di, 0, B_VX, 0, BT_FN_UV4SI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_packs_cc, s390_vec_packs_cc_s16,s390_vec_packs_cc_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_s16, s390_vpkshs, 0, BT_OV_V16QI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_u16, s390_vpklshs, 0, BT_OV_UV16QI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_s32, s390_vpksfs, 0, BT_OV_V8HI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_u32, s390_vpklsfs, 0, BT_OV_UV8HI_UV4SI_UV4SI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_s64, s390_vpksgs, 0, BT_OV_V4SI_V2DI_V2DI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_u64, s390_vpklsgs, 0, BT_OV_UV4SI_UV2DI_UV2DI_INTPTR) ++ ++B_DEF (s390_vpkshs, vec_packs_ccv8hi, 0, B_VX, 0, BT_FN_V16QI_V8HI_V8HI_INTPTR) ++B_DEF (s390_vpklshs, vec_packsu_ccv8hi, 0, B_VX, 0, BT_FN_UV16QI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vpksfs, vec_packs_ccv4si, 0, B_VX, 0, BT_FN_V8HI_V4SI_V4SI_INTPTR) ++B_DEF (s390_vpklsfs, vec_packsu_ccv4si, 0, B_VX, 0, BT_FN_UV8HI_UV4SI_UV4SI_INTPTR) ++B_DEF (s390_vpksgs, vec_packs_ccv2di, 0, B_VX, 0, BT_FN_V4SI_V2DI_V2DI_INTPTR) ++B_DEF (s390_vpklsgs, vec_packsu_ccv2di, 0, B_VX, 0, BT_FN_UV4SI_UV2DI_UV2DI_INTPTR) ++ ++OB_DEF (s390_vec_packsu, s390_vec_packsu_s16,s390_vec_packsu_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_packsu_s16, s390_vec_packsu_u16,0, BT_OV_UV16QI_V8HI_V8HI) /* vpklsh */ ++OB_DEF_VAR (s390_vec_packsu_u16, s390_vpklsh, 0, BT_OV_UV16QI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_packsu_s32, s390_vec_packsu_u32,0, BT_OV_UV8HI_V4SI_V4SI) /* vpklsf */ ++OB_DEF_VAR (s390_vec_packsu_u32, s390_vpklsf, 0, BT_OV_UV8HI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_packsu_s64, s390_vec_packsu_u64,0, BT_OV_UV4SI_V2DI_V2DI) /* vpklsg */ ++OB_DEF_VAR (s390_vec_packsu_u64, s390_vpklsg, 0, BT_OV_UV4SI_UV2DI_UV2DI) ++ ++B_DEF (s390_vec_packsu_u16, vec_packsu_uv8hi, 0, B_VX | B_INT, 0, BT_FN_UV16QI_UV8HI_UV8HI) /* vpklsh */ ++B_DEF (s390_vec_packsu_u32, vec_packsu_uv4si, 0, B_VX | B_INT, 0, BT_FN_UV8HI_UV4SI_UV4SI) /* vpklsf */ ++B_DEF (s390_vec_packsu_u64, vec_packsu_uv2di, 0, B_VX | B_INT, 0, BT_FN_UV4SI_UV2DI_UV2DI) /* vpklsg */ ++ ++OB_DEF (s390_vec_packsu_cc, s390_vec_packsu_cc_u16,s390_vec_packsu_cc_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vec_packsu_cc_u16, s390_vpklshs, 0, BT_OV_UV16QI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vec_packsu_cc_u32, s390_vpklsfs, 0, BT_OV_UV8HI_UV4SI_UV4SI_INTPTR) ++OB_DEF_VAR (s390_vec_packsu_cc_u64, s390_vpklsgs, 0, BT_OV_UV4SI_UV2DI_UV2DI_INTPTR) ++ ++OB_DEF (s390_vec_perm, s390_vec_perm_s8, s390_vec_perm_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_perm_s8, s390_vperm, 0, BT_OV_V16QI_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_b8, s390_vperm, 0, BT_OV_BV16QI_BV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_u8, s390_vperm, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_s16, s390_vperm, 0, BT_OV_V8HI_V8HI_V8HI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_b16, s390_vperm, 0, BT_OV_BV8HI_BV8HI_BV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_u16, s390_vperm, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_s32, s390_vperm, 0, BT_OV_V4SI_V4SI_V4SI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_b32, s390_vperm, 0, BT_OV_BV4SI_BV4SI_BV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_u32, s390_vperm, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_s64, s390_vperm, 0, BT_OV_V2DI_V2DI_V2DI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_b64, s390_vperm, 0, BT_OV_BV2DI_BV2DI_BV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_u64, s390_vperm, 0, BT_OV_UV2DI_UV2DI_UV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_dbl, s390_vperm, 0, BT_OV_V2DF_V2DF_V2DF_UV16QI) ++ ++B_DEF (s390_vperm, vec_permv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_permi, s390_vec_permi_s64, s390_vec_permi_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INT) ++OB_DEF_VAR (s390_vec_permi_s64, s390_vpdi, O3_U2, BT_OV_V2DI_V2DI_V2DI_INT) ++OB_DEF_VAR (s390_vec_permi_b64, s390_vpdi, O3_U2, BT_OV_BV2DI_BV2DI_BV2DI_INT) ++OB_DEF_VAR (s390_vec_permi_u64, s390_vpdi, O3_U2, BT_OV_UV2DI_UV2DI_UV2DI_INT) ++OB_DEF_VAR (s390_vec_permi_dbl, s390_vpdi, O3_U2, BT_OV_V2DF_V2DF_V2DF_INT) ++ ++B_DEF (s390_vpdi, vec_permiv2di, 0, B_VX, O3_U2, BT_FN_UV2DI_UV2DI_UV2DI_INT) ++ ++OB_DEF (s390_vec_splat, s390_vec_splat2_s8, s390_vec_splat2_dbl,B_VX, BT_FN_OV4SI_OV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_s8, s390_vrepb, O2_U4, BT_OV_V16QI_V16QI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_b8, s390_vrepb, O2_U4, BT_OV_BV16QI_BV16QI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_u8, s390_vrepb, O2_U4, BT_OV_UV16QI_UV16QI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_s16, s390_vreph, O2_U3, BT_OV_V8HI_V8HI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_b16, s390_vreph, O2_U3, BT_OV_BV8HI_BV8HI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_u16, s390_vreph, O2_U3, BT_OV_UV8HI_UV8HI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_s32, s390_vrepf, O2_U2, BT_OV_V4SI_V4SI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_b32, s390_vrepf, O2_U2, BT_OV_BV4SI_BV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_u32, s390_vrepf, O2_U2, BT_OV_UV4SI_UV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_s64, s390_vrepg, O2_U1, BT_OV_V2DI_V2DI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_b64, s390_vrepg, O2_U1, BT_OV_BV2DI_BV2DI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_u64, s390_vrepg, O2_U1, BT_OV_UV2DI_UV2DI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_dbl, s390_vrepg, O2_U1, BT_OV_V2DF_V2DF_UCHAR) ++ ++B_DEF (s390_vrepb, vec_splatv16qi, 0, B_VX, O2_U4, BT_FN_UV16QI_UV16QI_UCHAR) ++B_DEF (s390_vreph, vec_splatv8hi, 0, B_VX, O2_U3, BT_FN_UV8HI_UV8HI_UCHAR) ++B_DEF (s390_vrepf, vec_splatv4si, 0, B_VX, O2_U2, BT_FN_UV4SI_UV4SI_UCHAR) ++B_DEF (s390_vrepg, vec_splatv2di, 0, B_VX, O2_U1, BT_FN_UV2DI_UV2DI_UCHAR) ++ ++OB_DEF (s390_vec_scatter_element, s390_vec_scatter_element_s32,s390_vec_scatter_element_dbl,B_VX,BT_FN_VOID_V4SI_V4SI_INTPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_s32,s390_vscef, O4_U2, BT_OV_VOID_V4SI_UV4SI_INTPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_b32,s390_vscef, O4_U2, BT_OV_VOID_BV4SI_UV4SI_UINTPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_u32,s390_vscef, O4_U2, BT_OV_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_s64,s390_vsceg, O4_U1, BT_OV_VOID_V2DI_UV2DI_LONGLONGPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_b64,s390_vsceg, O4_U1, BT_OV_VOID_BV2DI_UV2DI_ULONGLONGPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_u64,s390_vsceg, O4_U1, BT_OV_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_dbl,s390_vsceg, O4_U1, BT_OV_VOID_V2DF_UV2DI_DBLPTR_ULONGLONG) ++ ++B_DEF (s390_vscef, vec_scatter_elementv4si,0, B_VX, O4_U2, BT_FN_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG) ++B_DEF (s390_vsceg, vec_scatter_elementv2di,0, B_VX, O4_U1, BT_FN_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG) ++ ++OB_DEF (s390_vec_sel, s390_vec_sel_b8_a, s390_vec_sel_dbl_b, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sel_b8_a, s390_vsel, 0, BT_OV_BV16QI_BV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sel_b8_b, s390_vsel, 0, BT_OV_BV16QI_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_sel_s8_a, s390_vsel, 0, BT_OV_V16QI_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sel_s8_b, s390_vsel, 0, BT_OV_V16QI_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_sel_u8_a, s390_vsel, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sel_u8_b, s390_vsel, 0, BT_OV_UV16QI_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_sel_b16_a, s390_vsel, 0, BT_OV_BV8HI_BV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sel_b16_b, s390_vsel, 0, BT_OV_BV8HI_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_sel_s16_a, s390_vsel, 0, BT_OV_V8HI_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sel_s16_b, s390_vsel, 0, BT_OV_V8HI_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_sel_u16_a, s390_vsel, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sel_u16_b, s390_vsel, 0, BT_OV_UV8HI_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_sel_b32_a, s390_vsel, 0, BT_OV_BV4SI_BV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sel_b32_b, s390_vsel, 0, BT_OV_BV4SI_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_sel_s32_a, s390_vsel, 0, BT_OV_V4SI_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sel_s32_b, s390_vsel, 0, BT_OV_V4SI_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_sel_u32_a, s390_vsel, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sel_u32_b, s390_vsel, 0, BT_OV_UV4SI_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_sel_b64_a, s390_vsel, 0, BT_OV_BV2DI_BV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_sel_b64_b, s390_vsel, 0, BT_OV_BV2DI_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_sel_s64_a, s390_vsel, 0, BT_OV_V2DI_V2DI_V2DI_UV2DI) ++OB_DEF_VAR (s390_vec_sel_s64_b, s390_vsel, 0, BT_OV_V2DI_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_sel_u64_a, s390_vsel, 0, BT_OV_UV2DI_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_sel_u64_b, s390_vsel, 0, BT_OV_UV2DI_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_sel_dbl_a, s390_vsel, 0, BT_OV_V2DF_V2DF_V2DF_UV2DI) ++OB_DEF_VAR (s390_vec_sel_dbl_b, s390_vsel, 0, BT_OV_V2DF_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vsel, vec_selv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_extend_s64, s390_vec_extend_s64_s8,s390_vec_extend_s64_s32,B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_extend_s64_s8, s390_vsegb, 0, BT_OV_V2DI_V16QI) ++OB_DEF_VAR (s390_vec_extend_s64_s16, s390_vsegh, 0, BT_OV_V2DI_V8HI) ++OB_DEF_VAR (s390_vec_extend_s64_s32, s390_vsegf, 0, BT_OV_V2DI_V4SI) ++ ++B_DEF (s390_vsegb, vec_extendv16qi, 0, B_VX, 0, BT_FN_V2DI_V16QI) ++B_DEF (s390_vsegh, vec_extendv8hi, 0, B_VX, 0, BT_FN_V2DI_V8HI) ++B_DEF (s390_vsegf, vec_extendv4si, 0, B_VX, 0, BT_FN_V2DI_V4SI) ++ ++OB_DEF (s390_vec_xstd2, s390_vec_xstd2_s8, s390_vec_xstd2_dbl, B_VX, BT_FN_VOID_OV4SI_INT_VOIDPTR) ++OB_DEF_VAR (s390_vec_xstd2_s8, MAX, O2_LIT, BT_OV_VOID_V16QI_LONG_SCHARPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_u8, MAX, O2_LIT, BT_OV_VOID_UV16QI_LONG_UCHARPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_s16, MAX, O2_LIT, BT_OV_VOID_V8HI_LONG_SHORTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_u16, MAX, O2_LIT, BT_OV_VOID_UV8HI_LONG_USHORTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_s32, MAX, O2_LIT, BT_OV_VOID_V4SI_LONG_INTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_u32, MAX, O2_LIT, BT_OV_VOID_UV4SI_LONG_UINTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_s64, MAX, O2_LIT, BT_OV_VOID_V2DI_LONG_LONGLONGPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_u64, MAX, O2_LIT, BT_OV_VOID_UV2DI_LONG_ULONGLONGPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_dbl, MAX, O2_LIT, BT_OV_VOID_V2DF_LONG_DBLPTR) /* vst */ ++ ++OB_DEF (s390_vec_xstw4, s390_vec_xstw4_s8, s390_vec_xstw4_u32, B_VX, BT_FN_VOID_OV4SI_INT_VOIDPTR) ++OB_DEF_VAR (s390_vec_xstw4_s8, MAX, O2_LIT, BT_OV_VOID_V16QI_LONG_SCHARPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstw4_u8, MAX, O2_LIT, BT_OV_VOID_UV16QI_LONG_UCHARPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstw4_s16, MAX, O2_LIT, BT_OV_VOID_V8HI_LONG_SHORTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstw4_u16, MAX, O2_LIT, BT_OV_VOID_UV8HI_LONG_USHORTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstw4_s32, MAX, O2_LIT, BT_OV_VOID_V4SI_LONG_INTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstw4_u32, MAX, O2_LIT, BT_OV_VOID_UV4SI_LONG_UINTPTR) /* vst */ ++ ++OB_DEF (s390_vec_store_len, s390_vec_store_len_s8,s390_vec_store_len_dbl,B_VX, BT_FN_VOID_OV4SI_VOIDPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_s8, s390_vstl, 0, BT_OV_VOID_V16QI_SCHARPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_u8, s390_vstl, 0, BT_OV_VOID_UV16QI_UCHARPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_s16, s390_vstl, 0, BT_OV_VOID_V8HI_SHORTPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_u16, s390_vstl, 0, BT_OV_VOID_UV8HI_USHORTPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_s32, s390_vstl, 0, BT_OV_VOID_V4SI_INTPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_u32, s390_vstl, 0, BT_OV_VOID_UV4SI_UINTPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_s64, s390_vstl, 0, BT_OV_VOID_V2DI_LONGLONGPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_u64, s390_vstl, 0, BT_OV_VOID_UV2DI_ULONGLONGPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_dbl, s390_vstl, 0, BT_OV_VOID_V2DF_DBLPTR_UINT) ++ ++B_DEF (s390_vstl, vstlv16qi, 0, B_VX, 0, BT_FN_VOID_V16QI_UINT_VOIDPTR) ++ ++OB_DEF (s390_vec_unpackh, s390_vec_unpackh_s8,s390_vec_unpackh_u32,B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_unpackh_s8, s390_vuphb, 0, BT_OV_V8HI_V16QI) ++OB_DEF_VAR (s390_vec_unpackh_b8, s390_vuphb, 0, BT_OV_BV8HI_BV16QI) ++OB_DEF_VAR (s390_vec_unpackh_u8, s390_vuplhb, 0, BT_OV_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_unpackh_s16, s390_vuphh, 0, BT_OV_V4SI_V8HI) ++OB_DEF_VAR (s390_vec_unpackh_b16, s390_vuphh, 0, BT_OV_BV4SI_BV8HI) ++OB_DEF_VAR (s390_vec_unpackh_u16, s390_vuplhh, 0, BT_OV_UV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_unpackh_s32, s390_vuphf, 0, BT_OV_V2DI_V4SI) ++OB_DEF_VAR (s390_vec_unpackh_b32, s390_vuphf, 0, BT_OV_BV2DI_BV4SI) ++OB_DEF_VAR (s390_vec_unpackh_u32, s390_vuplhf, 0, BT_OV_UV2DI_UV4SI) ++ ++B_DEF (s390_vuphb, vec_unpackhv16qi, 0, B_VX, 0, BT_FN_V8HI_V16QI) ++B_DEF (s390_vuplhb, vec_unpackh_lv16qi, 0, B_VX, 0, BT_FN_UV8HI_UV16QI) ++B_DEF (s390_vuphh, vec_unpackhv8hi, 0, B_VX, 0, BT_FN_V4SI_V8HI) ++B_DEF (s390_vuplhh, vec_unpackh_lv8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI) ++B_DEF (s390_vuphf, vec_unpackhv4si, 0, B_VX, 0, BT_FN_V2DI_V4SI) ++B_DEF (s390_vuplhf, vec_unpackh_lv4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI) ++ ++OB_DEF (s390_vec_unpackl, s390_vec_unpackl_s8,s390_vec_unpackl_u32,B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_unpackl_s8, s390_vuplb, 0, BT_OV_V8HI_V16QI) ++OB_DEF_VAR (s390_vec_unpackl_b8, s390_vuplb, 0, BT_OV_BV8HI_BV16QI) ++OB_DEF_VAR (s390_vec_unpackl_u8, s390_vupllb, 0, BT_OV_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_unpackl_s16, s390_vuplhw, 0, BT_OV_V4SI_V8HI) ++OB_DEF_VAR (s390_vec_unpackl_b16, s390_vupllh, 0, BT_OV_BV4SI_BV8HI) ++OB_DEF_VAR (s390_vec_unpackl_u16, s390_vupllh, 0, BT_OV_UV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_unpackl_s32, s390_vuplf, 0, BT_OV_V2DI_V4SI) ++OB_DEF_VAR (s390_vec_unpackl_b32, s390_vuplf, 0, BT_OV_BV2DI_BV4SI) ++OB_DEF_VAR (s390_vec_unpackl_u32, s390_vupllf, 0, BT_OV_UV2DI_UV4SI) ++ ++B_DEF (s390_vuplb, vec_unpacklv16qi, 0, B_VX, 0, BT_FN_V8HI_V16QI) ++B_DEF (s390_vupllb, vec_unpackl_lv16qi, 0, B_VX, 0, BT_FN_UV8HI_UV16QI) ++B_DEF (s390_vuplhw, vec_unpacklv8hi, 0, B_VX, 0, BT_FN_V4SI_V8HI) ++B_DEF (s390_vupllh, vec_unpackl_lv8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI) ++B_DEF (s390_vuplf, vec_unpacklv4si, 0, B_VX, 0, BT_FN_V2DI_V4SI) ++B_DEF (s390_vupllf, vec_unpackl_lv4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI) ++B_DEF (s390_vaq, vec_add_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_addc, s390_vec_addc_u8, s390_vec_addc_u64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_addc_u8, s390_vaccb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_addc_u16, s390_vacch, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_addc_u32, s390_vaccf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_addc_u64, s390_vaccg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++ ++B_DEF (s390_vaccb, vec_addcv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vacch, vec_addcv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vaccf, vec_addcv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vaccg, vec_addcv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++B_DEF (s390_vaccq, vec_addc_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vacq, vec_adde_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vacccq, vec_addec_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_and, s390_vec_and_b8, s390_vec_and_dbl_c, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_and_b8, s390_vn, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_and_s8_a, s390_vn, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_and_s8_b, s390_vn, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_and_s8_c, s390_vn, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_and_u8_a, s390_vn, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_and_u8_b, s390_vn, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_and_u8_c, s390_vn, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_and_b16, s390_vn, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_and_s16_a, s390_vn, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_and_s16_b, s390_vn, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_and_s16_c, s390_vn, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_and_u16_a, s390_vn, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_and_u16_b, s390_vn, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_and_u16_c, s390_vn, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_and_b32, s390_vn, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_and_s32_a, s390_vn, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_and_s32_b, s390_vn, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_and_s32_c, s390_vn, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_and_u32_a, s390_vn, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_and_u32_b, s390_vn, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_and_u32_c, s390_vn, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_and_b64, s390_vn, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_and_s64_a, s390_vn, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_and_s64_b, s390_vn, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_and_s64_c, s390_vn, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_and_u64_a, s390_vn, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_and_u64_b, s390_vn, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_and_u64_c, s390_vn, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_and_dbl_a, s390_vn, 0, BT_OV_V2DF_BV2DI_V2DF) ++OB_DEF_VAR (s390_vec_and_dbl_b, s390_vn, 0, BT_OV_V2DF_V2DF_V2DF) ++OB_DEF_VAR (s390_vec_and_dbl_c, s390_vn, 0, BT_OV_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vn, andv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_andc, s390_vec_andc_b8, s390_vec_andc_dbl_c,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_andc_b8, s390_vnc, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_andc_s8_a, s390_vnc, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_andc_s8_b, s390_vnc, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_andc_s8_c, s390_vnc, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_andc_u8_a, s390_vnc, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_andc_u8_b, s390_vnc, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_andc_u8_c, s390_vnc, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_andc_b16, s390_vnc, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_andc_s16_a, s390_vnc, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_andc_s16_b, s390_vnc, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_andc_s16_c, s390_vnc, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_andc_u16_a, s390_vnc, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_andc_u16_b, s390_vnc, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_andc_u16_c, s390_vnc, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_andc_b32, s390_vnc, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_andc_s32_a, s390_vnc, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_andc_s32_b, s390_vnc, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_andc_s32_c, s390_vnc, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_andc_u32_a, s390_vnc, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_andc_u32_b, s390_vnc, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_andc_u32_c, s390_vnc, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_andc_b64, s390_vnc, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_andc_s64_a, s390_vnc, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_andc_s64_b, s390_vnc, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_andc_s64_c, s390_vnc, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_andc_u64_a, s390_vnc, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_andc_u64_b, s390_vnc, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_andc_u64_c, s390_vnc, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_andc_dbl_a, s390_vnc, 0, BT_OV_V2DF_BV2DI_V2DF) ++OB_DEF_VAR (s390_vec_andc_dbl_b, s390_vnc, 0, BT_OV_V2DF_V2DF_V2DF) ++OB_DEF_VAR (s390_vec_andc_dbl_c, s390_vnc, 0, BT_OV_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vnc, vec_andcv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_avg, s390_vec_avg_s8, s390_vec_avg_u64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_avg_s8, s390_vavgb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_avg_u8, s390_vavglb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_avg_s16, s390_vavgh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_avg_u16, s390_vavglh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_avg_s32, s390_vavgf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_avg_u32, s390_vavglf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_avg_s64, s390_vavgg, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_avg_u64, s390_vavglg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++ ++B_DEF (s390_vavgb, vec_avgv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI_V16QI) ++B_DEF (s390_vavglb, vec_avguv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vavgh, vec_avgv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI) ++B_DEF (s390_vavglh, vec_avguv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vavgf, vec_avgv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI) ++B_DEF (s390_vavglf, vec_avguv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vavgg, vec_avgv2di, 0, B_VX, 0, BT_FN_V2DI_V2DI_V2DI) ++B_DEF (s390_vavglg, vec_avguv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++B_DEF (s390_vcksm, vec_checksum, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vceqbs, vec_cmpeqv16qi_cc, 0, B_VX, 0, BT_FN_V16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vceqhs, vec_cmpeqv8hi_cc, 0, B_VX, 0, BT_FN_V8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vceqfs, vec_cmpeqv4si_cc, 0, B_VX, 0, BT_FN_V4SI_UV4SI_UV4SI_INTPTR) ++B_DEF (s390_vceqgs, vec_cmpeqv2di_cc, 0, B_VX, 0, BT_FN_V2DI_UV2DI_UV2DI_INTPTR) ++B_DEF (s390_vfcedbs, vec_cmpeqv2df_cc, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF_INTPTR) ++B_DEF (s390_vchbs, vec_cmphv16qi_cc, 0, B_VX, 0, BT_FN_V16QI_V16QI_V16QI_INTPTR) ++B_DEF (s390_vchlbs, vec_cmphlv16qi_cc, 0, B_VX, 0, BT_FN_V16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vchhs, vec_cmphv8hi_cc, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI_INTPTR) ++B_DEF (s390_vchlhs, vec_cmphlv8hi_cc, 0, B_VX, 0, BT_FN_V8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vchfs, vec_cmphv4si_cc, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI_INTPTR) ++B_DEF (s390_vchlfs, vec_cmphlv4si_cc, 0, B_VX, 0, BT_FN_V4SI_UV4SI_UV4SI_INTPTR) ++B_DEF (s390_vchgs, vec_cmphv2di_cc, 0, B_VX, 0, BT_FN_V2DI_V2DI_V2DI_INTPTR) ++B_DEF (s390_vchlgs, vec_cmphlv2di_cc, 0, B_VX, 0, BT_FN_V2DI_UV2DI_UV2DI_INTPTR) ++B_DEF (s390_vfchdbs, vec_cmphv2df_cc, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF_INTPTR) ++B_DEF (s390_vfchedbs, vec_cmphev2df_cc, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF_INTPTR) ++B_DEF (vec_all_eqv16qi, vec_all_eqv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_eqv8hi, vec_all_eqv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_eqv4si, vec_all_eqv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_eqv2di, vec_all_eqv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_eqv2df, vec_all_eqv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_all_nev16qi, vec_all_nev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_nev8hi, vec_all_nev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_nev4si, vec_all_nev4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_nev2di, vec_all_nev2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_nev2df, vec_all_nev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_all_gev16qi, vec_all_gev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_all_geuv16qi, vec_all_geuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_gev8hi, vec_all_gev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_all_geuv8hi, vec_all_geuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_gev4si, vec_all_gev4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_all_geuv4si, vec_all_geuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_gev2di, vec_all_gev2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_all_geuv2di, vec_all_geuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_gev2df, vec_all_gev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_all_gtv16qi, vec_all_gtv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_all_gtuv16qi, vec_all_gtuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_gtv8hi, vec_all_gtv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_all_gtuv8hi, vec_all_gtuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_gtv4si, vec_all_gtv4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_all_gtuv4si, vec_all_gtuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_gtv2di, vec_all_gtv2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_all_gtuv2di, vec_all_gtuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_gtv2df, vec_all_gtv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_all_lev16qi, vec_all_lev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_all_leuv16qi, vec_all_leuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_lev8hi, vec_all_lev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_all_leuv8hi, vec_all_leuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_lev4si, vec_all_lev4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_all_leuv4si, vec_all_leuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_lev2di, vec_all_lev2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_all_leuv2di, vec_all_leuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_lev2df, vec_all_lev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_all_ltv16qi, vec_all_ltv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_all_ltuv16qi, vec_all_ltuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_ltv8hi, vec_all_ltv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_all_ltuv8hi, vec_all_ltuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_ltv4si, vec_all_ltv4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_all_ltuv4si, vec_all_ltuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_ltv2di, vec_all_ltv2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_all_ltuv2di, vec_all_ltuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_ltv2df, vec_all_ltv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_eq, s390_vec_all_eq_s8_a,s390_vec_all_eq_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_eq_s8_a, vec_all_eqv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_eq_s8_b, vec_all_eqv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_eq_b8_a, vec_all_eqv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_eq_b8_b, vec_all_eqv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_eq_b8_c, vec_all_eqv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_eq_u8_a, vec_all_eqv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_eq_u8_b, vec_all_eqv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_eq_s16_a, vec_all_eqv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_eq_s16_b, vec_all_eqv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_eq_b16_a, vec_all_eqv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_eq_b16_b, vec_all_eqv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_eq_b16_c, vec_all_eqv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_eq_u16_a, vec_all_eqv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_eq_u16_b, vec_all_eqv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_eq_s32_a, vec_all_eqv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_eq_s32_b, vec_all_eqv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_eq_b32_a, vec_all_eqv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_eq_b32_b, vec_all_eqv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_eq_b32_c, vec_all_eqv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_eq_u32_a, vec_all_eqv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_eq_u32_b, vec_all_eqv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_eq_s64_a, vec_all_eqv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_eq_s64_b, vec_all_eqv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_eq_b64_a, vec_all_eqv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_eq_b64_b, vec_all_eqv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_eq_b64_c, vec_all_eqv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_eq_u64_a, vec_all_eqv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_eq_u64_b, vec_all_eqv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_eq_dbl, vec_all_eqv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_ne, s390_vec_all_ne_s8_a,s390_vec_all_ne_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_ne_s8_a, vec_all_nev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_ne_s8_b, vec_all_nev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ne_b8_a, vec_all_nev16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ne_b8_b, vec_all_nev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_ne_b8_c, vec_all_nev16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_ne_u8_a, vec_all_nev16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_ne_u8_b, vec_all_nev16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ne_s16_a, vec_all_nev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_ne_s16_b, vec_all_nev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ne_b16_a, vec_all_nev8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ne_b16_b, vec_all_nev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_ne_b16_c, vec_all_nev8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_ne_u16_a, vec_all_nev8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_ne_u16_b, vec_all_nev8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ne_s32_a, vec_all_nev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_ne_s32_b, vec_all_nev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ne_b32_a, vec_all_nev4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ne_b32_b, vec_all_nev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_ne_b32_c, vec_all_nev4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_ne_u32_a, vec_all_nev4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_ne_u32_b, vec_all_nev4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ne_s64_a, vec_all_nev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_ne_s64_b, vec_all_nev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ne_b64_a, vec_all_nev2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ne_b64_b, vec_all_nev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_ne_b64_c, vec_all_nev2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_ne_u64_a, vec_all_nev2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_ne_u64_b, vec_all_nev2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ne_dbl, vec_all_nev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_ge, s390_vec_all_ge_s8_a,s390_vec_all_ge_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_ge_s8_a, vec_all_gev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_ge_s8_b, vec_all_gev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ge_b8_a, vec_all_geuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ge_b8_b, vec_all_gev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_ge_b8_c, vec_all_geuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_ge_u8_a, vec_all_geuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_ge_u8_b, vec_all_geuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ge_s16_a, vec_all_gev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_ge_s16_b, vec_all_gev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ge_b16_a, vec_all_geuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ge_b16_b, vec_all_gev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_ge_b16_c, vec_all_geuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_ge_u16_a, vec_all_geuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_ge_u16_b, vec_all_geuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ge_s32_a, vec_all_gev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_ge_s32_b, vec_all_gev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ge_b32_a, vec_all_geuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ge_b32_b, vec_all_gev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_ge_b32_c, vec_all_geuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_ge_u32_a, vec_all_geuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_ge_u32_b, vec_all_geuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ge_s64_a, vec_all_gev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_ge_s64_b, vec_all_gev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ge_b64_a, vec_all_geuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ge_b64_b, vec_all_gev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_ge_b64_c, vec_all_geuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_ge_u64_a, vec_all_geuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_ge_u64_b, vec_all_geuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ge_dbl, vec_all_gev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_gt, s390_vec_all_gt_s8_a,s390_vec_all_gt_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_gt_s8_a, vec_all_gtv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_gt_s8_b, vec_all_gtv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_gt_b8_a, vec_all_gtuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_gt_b8_b, vec_all_gtv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_gt_b8_c, vec_all_gtuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_gt_u8_a, vec_all_gtuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_gt_u8_b, vec_all_gtuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_gt_s16_a, vec_all_gtv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_gt_s16_b, vec_all_gtv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_gt_b16_a, vec_all_gtuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_gt_b16_b, vec_all_gtv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_gt_b16_c, vec_all_gtuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_gt_u16_a, vec_all_gtuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_gt_u16_b, vec_all_gtuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_gt_s32_a, vec_all_gtv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_gt_s32_b, vec_all_gtv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_gt_b32_a, vec_all_gtuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_gt_b32_b, vec_all_gtv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_gt_b32_c, vec_all_gtuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_gt_u32_a, vec_all_gtuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_gt_u32_b, vec_all_gtuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_gt_s64_a, vec_all_gtv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_gt_s64_b, vec_all_gtv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_gt_b64_a, vec_all_gtuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_gt_b64_b, vec_all_gtv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_gt_b64_c, vec_all_gtuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_gt_u64_a, vec_all_gtuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_gt_u64_b, vec_all_gtuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_gt_dbl, vec_all_gtv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_le, s390_vec_all_le_s8_a,s390_vec_all_le_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_le_s8_a, vec_all_lev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_le_s8_b, vec_all_lev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_le_b8_a, vec_all_leuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_le_b8_b, vec_all_lev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_le_b8_c, vec_all_leuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_le_u8_a, vec_all_leuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_le_u8_b, vec_all_leuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_le_s16_a, vec_all_lev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_le_s16_b, vec_all_lev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_le_b16_a, vec_all_leuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_le_b16_b, vec_all_lev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_le_b16_c, vec_all_leuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_le_u16_a, vec_all_leuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_le_u16_b, vec_all_leuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_le_s32_a, vec_all_lev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_le_s32_b, vec_all_lev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_le_b32_a, vec_all_leuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_le_b32_b, vec_all_lev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_le_b32_c, vec_all_leuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_le_u32_a, vec_all_leuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_le_u32_b, vec_all_leuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_le_s64_a, vec_all_lev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_le_s64_b, vec_all_lev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_le_b64_a, vec_all_leuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_le_b64_b, vec_all_lev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_le_b64_c, vec_all_leuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_le_u64_a, vec_all_leuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_le_u64_b, vec_all_leuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_le_dbl, vec_all_lev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_lt, s390_vec_all_lt_s8_a,s390_vec_all_lt_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_lt_s8_a, vec_all_ltv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_lt_s8_b, vec_all_ltv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_lt_b8_a, vec_all_ltuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_lt_b8_b, vec_all_ltv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_lt_b8_c, vec_all_ltuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_lt_u8_a, vec_all_ltuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_lt_u8_b, vec_all_ltuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_lt_s16_a, vec_all_ltv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_lt_s16_b, vec_all_ltv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_lt_b16_a, vec_all_ltuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_lt_b16_b, vec_all_ltv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_lt_b16_c, vec_all_ltuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_lt_u16_a, vec_all_ltuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_lt_u16_b, vec_all_ltuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_lt_s32_a, vec_all_ltv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_lt_s32_b, vec_all_ltv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_lt_b32_a, vec_all_ltuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_lt_b32_b, vec_all_ltv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_lt_b32_c, vec_all_ltuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_lt_u32_a, vec_all_ltuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_lt_u32_b, vec_all_ltuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_lt_s64_a, vec_all_ltv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_lt_s64_b, vec_all_ltv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_lt_b64_a, vec_all_ltuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_lt_b64_b, vec_all_ltv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_lt_b64_c, vec_all_ltuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_lt_u64_a, vec_all_ltuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_lt_u64_b, vec_all_ltuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_lt_dbl, vec_all_ltv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++B_DEF (vec_any_eqv16qi, vec_any_eqv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_eqv8hi, vec_any_eqv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_eqv4si, vec_any_eqv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_eqv2di, vec_any_eqv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_eqv2df, vec_any_eqv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_any_nev16qi, vec_any_nev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_nev8hi, vec_any_nev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_nev4si, vec_any_nev4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_nev2di, vec_any_nev2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_nev2df, vec_any_nev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_any_gev16qi, vec_any_gev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_any_geuv16qi, vec_any_geuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_gev8hi, vec_any_gev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_any_geuv8hi, vec_any_geuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_gev4si, vec_any_gev4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_any_geuv4si, vec_any_geuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_gev2di, vec_any_gev2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_any_geuv2di, vec_any_geuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_gev2df, vec_any_gev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_any_gtv16qi, vec_any_gtv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_any_gtuv16qi, vec_any_gtuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_gtv8hi, vec_any_gtv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_any_gtuv8hi, vec_any_gtuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_gtv4si, vec_any_gtv4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_any_gtuv4si, vec_any_gtuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_gtv2di, vec_any_gtv2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_any_gtuv2di, vec_any_gtuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_gtv2df, vec_any_gtv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_any_lev16qi, vec_any_lev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_any_leuv16qi, vec_any_leuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_lev8hi, vec_any_lev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_any_leuv8hi, vec_any_leuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_lev4si, vec_any_lev4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_any_leuv4si, vec_any_leuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_lev2di, vec_any_lev2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_any_leuv2di, vec_any_leuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_lev2df, vec_any_lev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_any_ltv16qi, vec_any_ltv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_any_ltuv16qi, vec_any_ltuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_ltv8hi, vec_any_ltv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_any_ltuv8hi, vec_any_ltuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_ltv4si, vec_any_ltv4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_any_ltuv4si, vec_any_ltuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_ltv2di, vec_any_ltv2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_any_ltuv2di, vec_any_ltuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_ltv2df, vec_any_ltv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_eq, s390_vec_any_eq_s8_a,s390_vec_any_eq_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_eq_s8_a, vec_any_eqv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_eq_s8_b, vec_any_eqv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_eq_b8_a, vec_any_eqv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_eq_b8_b, vec_any_eqv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_eq_b8_c, vec_any_eqv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_eq_u8_a, vec_any_eqv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_eq_u8_b, vec_any_eqv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_eq_s16_a, vec_any_eqv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_eq_s16_b, vec_any_eqv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_eq_b16_a, vec_any_eqv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_eq_b16_b, vec_any_eqv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_eq_b16_c, vec_any_eqv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_eq_u16_a, vec_any_eqv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_eq_u16_b, vec_any_eqv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_eq_s32_a, vec_any_eqv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_eq_s32_b, vec_any_eqv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_eq_b32_a, vec_any_eqv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_eq_b32_b, vec_any_eqv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_eq_b32_c, vec_any_eqv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_eq_u32_a, vec_any_eqv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_eq_u32_b, vec_any_eqv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_eq_s64_a, vec_any_eqv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_eq_s64_b, vec_any_eqv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_eq_b64_a, vec_any_eqv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_eq_b64_b, vec_any_eqv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_eq_b64_c, vec_any_eqv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_eq_u64_a, vec_any_eqv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_eq_u64_b, vec_any_eqv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_eq_dbl, vec_any_eqv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_ne, s390_vec_any_ne_s8_a,s390_vec_any_ne_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_ne_s8_a, vec_any_nev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_ne_s8_b, vec_any_nev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ne_b8_a, vec_any_nev16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ne_b8_b, vec_any_nev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_ne_b8_c, vec_any_nev16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_ne_u8_a, vec_any_nev16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_ne_u8_b, vec_any_nev16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ne_s16_a, vec_any_nev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_ne_s16_b, vec_any_nev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ne_b16_a, vec_any_nev8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ne_b16_b, vec_any_nev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_ne_b16_c, vec_any_nev8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_ne_u16_a, vec_any_nev8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_ne_u16_b, vec_any_nev8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ne_s32_a, vec_any_nev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_ne_s32_b, vec_any_nev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ne_b32_a, vec_any_nev4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ne_b32_b, vec_any_nev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_ne_b32_c, vec_any_nev4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_ne_u32_a, vec_any_nev4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_ne_u32_b, vec_any_nev4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ne_s64_a, vec_any_nev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_ne_s64_b, vec_any_nev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ne_b64_a, vec_any_nev2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ne_b64_b, vec_any_nev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_ne_b64_c, vec_any_nev2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_ne_u64_a, vec_any_nev2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_ne_u64_b, vec_any_nev2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ne_dbl, vec_any_nev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_ge, s390_vec_any_ge_s8_a,s390_vec_any_ge_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_ge_s8_a, vec_any_gev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_ge_s8_b, vec_any_gev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ge_b8_a, vec_any_geuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ge_b8_b, vec_any_gev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_ge_b8_c, vec_any_geuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_ge_u8_a, vec_any_geuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_ge_u8_b, vec_any_geuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ge_s16_a, vec_any_gev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_ge_s16_b, vec_any_gev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ge_b16_a, vec_any_geuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ge_b16_b, vec_any_gev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_ge_b16_c, vec_any_geuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_ge_u16_a, vec_any_geuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_ge_u16_b, vec_any_geuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ge_s32_a, vec_any_gev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_ge_s32_b, vec_any_gev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ge_b32_a, vec_any_geuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ge_b32_b, vec_any_gev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_ge_b32_c, vec_any_geuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_ge_u32_a, vec_any_geuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_ge_u32_b, vec_any_geuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ge_s64_a, vec_any_gev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_ge_s64_b, vec_any_gev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ge_b64_a, vec_any_geuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ge_b64_b, vec_any_gev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_ge_b64_c, vec_any_geuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_ge_u64_a, vec_any_geuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_ge_u64_b, vec_any_geuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ge_dbl, vec_any_gev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_gt, s390_vec_any_gt_s8_a,s390_vec_any_gt_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_gt_s8_a, vec_any_gtv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_gt_s8_b, vec_any_gtv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_gt_b8_a, vec_any_gtuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_gt_b8_b, vec_any_gtv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_gt_b8_c, vec_any_gtuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_gt_u8_a, vec_any_gtuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_gt_u8_b, vec_any_gtuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_gt_s16_a, vec_any_gtv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_gt_s16_b, vec_any_gtv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_gt_b16_a, vec_any_gtuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_gt_b16_b, vec_any_gtv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_gt_b16_c, vec_any_gtuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_gt_u16_a, vec_any_gtuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_gt_u16_b, vec_any_gtuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_gt_s32_a, vec_any_gtv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_gt_s32_b, vec_any_gtv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_gt_b32_a, vec_any_gtuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_gt_b32_b, vec_any_gtv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_gt_b32_c, vec_any_gtuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_gt_u32_a, vec_any_gtuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_gt_u32_b, vec_any_gtuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_gt_s64_a, vec_any_gtv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_gt_s64_b, vec_any_gtv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_gt_b64_a, vec_any_gtuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_gt_b64_b, vec_any_gtv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_gt_b64_c, vec_any_gtuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_gt_u64_a, vec_any_gtuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_gt_u64_b, vec_any_gtuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_gt_dbl, vec_any_gtv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_le, s390_vec_any_le_s8_a,s390_vec_any_le_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_le_s8_a, vec_any_lev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_le_s8_b, vec_any_lev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_le_b8_a, vec_any_leuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_le_b8_b, vec_any_lev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_le_b8_c, vec_any_leuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_le_u8_a, vec_any_leuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_le_u8_b, vec_any_leuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_le_s16_a, vec_any_lev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_le_s16_b, vec_any_lev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_le_b16_a, vec_any_leuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_le_b16_b, vec_any_lev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_le_b16_c, vec_any_leuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_le_u16_a, vec_any_leuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_le_u16_b, vec_any_leuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_le_s32_a, vec_any_lev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_le_s32_b, vec_any_lev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_le_b32_a, vec_any_leuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_le_b32_b, vec_any_lev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_le_b32_c, vec_any_leuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_le_u32_a, vec_any_leuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_le_u32_b, vec_any_leuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_le_s64_a, vec_any_lev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_le_s64_b, vec_any_lev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_le_b64_a, vec_any_leuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_le_b64_b, vec_any_lev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_le_b64_c, vec_any_leuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_le_u64_a, vec_any_leuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_le_u64_b, vec_any_leuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_le_dbl, vec_any_lev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_lt, s390_vec_any_lt_s8_a,s390_vec_any_lt_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_lt_s8_a, vec_any_ltv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_lt_s8_b, vec_any_ltv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_lt_b8_a, vec_any_ltuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_lt_b8_b, vec_any_ltv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_lt_b8_c, vec_any_ltuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_lt_u8_a, vec_any_ltuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_lt_u8_b, vec_any_ltuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_lt_s16_a, vec_any_ltv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_lt_s16_b, vec_any_ltv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_lt_b16_a, vec_any_ltuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_lt_b16_b, vec_any_ltv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_lt_b16_c, vec_any_ltuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_lt_u16_a, vec_any_ltuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_lt_u16_b, vec_any_ltuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_lt_s32_a, vec_any_ltv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_lt_s32_b, vec_any_ltv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_lt_b32_a, vec_any_ltuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_lt_b32_b, vec_any_ltv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_lt_b32_c, vec_any_ltuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_lt_u32_a, vec_any_ltuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_lt_u32_b, vec_any_ltuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_lt_s64_a, vec_any_ltv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_lt_s64_b, vec_any_ltv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_lt_b64_a, vec_any_ltuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_lt_b64_b, vec_any_ltv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_lt_b64_c, vec_any_ltuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_lt_u64_a, vec_any_ltuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_lt_u64_b, vec_any_ltuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_lt_dbl, vec_any_ltv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cmpeq, s390_vec_cmpeq_s8, s390_vec_cmpeq_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cmpeq_s8, s390_vceqb, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_cmpeq_u8, s390_vceqb, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cmpeq_b8, s390_vceqb, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_cmpeq_s16, s390_vceqh, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_cmpeq_u16, s390_vceqh, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cmpeq_b16, s390_vceqh, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_cmpeq_s32, s390_vceqf, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_cmpeq_u32, s390_vceqf, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cmpeq_b32, s390_vceqf, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_cmpeq_s64, s390_vceqg, 0, BT_OV_BV2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_cmpeq_u64, s390_vceqg, 0, BT_OV_BV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_cmpeq_b64, s390_vceqg, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_cmpeq_dbl, s390_vfcedb, 0, BT_OV_BV2DI_V2DF_V2DF) ++ ++B_DEF (s390_vceqb, vec_cmpeqv16qi, 0, B_VX, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (s390_vceqh, vec_cmpeqv8hi, 0, B_VX, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (s390_vceqf, vec_cmpeqv4si, 0, B_VX, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (s390_vceqg, vec_cmpeqv2di, 0, B_VX, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (s390_vfcedb, vec_cmpeqv2df, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cmpge, s390_vec_cmpge_s8, s390_vec_cmpge_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cmpge_s8, vec_cmpgev16qi, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_cmpge_u8, vec_cmpgeuv16qi, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cmpge_s16, vec_cmpgev8hi, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_cmpge_u16, vec_cmpgeuv8hi, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cmpge_s32, vec_cmpgev4si, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_cmpge_u32, vec_cmpgeuv4si, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cmpge_s64, vec_cmpgev2di, 0, BT_OV_BV2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_cmpge_u64, vec_cmpgeuv2di, 0, BT_OV_BV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_cmpge_dbl, s390_vfchedb, 0, BT_OV_BV2DI_V2DF_V2DF) ++ ++B_DEF (vec_cmpgev16qi, vec_cmpgev16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmpgeuv16qi, vec_cmpgeuv16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmpgev8hi, vec_cmpgev8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmpgeuv8hi, vec_cmpgeuv8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmpgev4si, vec_cmpgev4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmpgeuv4si, vec_cmpgeuv4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmpgev2di, vec_cmpgev2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (vec_cmpgeuv2di, vec_cmpgeuv2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (s390_vfchedb, vec_cmpgev2df, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cmpgt, s390_vec_cmpgt_s8, s390_vec_cmpgt_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cmpgt_s8, s390_vchb, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_cmpgt_u8, s390_vchlb, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cmpgt_s16, s390_vchh, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_cmpgt_u16, s390_vchlh, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cmpgt_s32, s390_vchf, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_cmpgt_u32, s390_vchlf, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cmpgt_s64, s390_vchg, 0, BT_OV_BV2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_cmpgt_u64, s390_vchlg, 0, BT_OV_BV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_cmpgt_dbl, s390_vfchdb, 0, BT_OV_BV2DI_V2DF_V2DF) ++ ++B_DEF (s390_vchb, vec_cmpgtv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI_V16QI) ++B_DEF (s390_vchlb, vec_cmpgtuv16qi, 0, B_VX, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (s390_vchh, vec_cmpgtv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI) ++B_DEF (s390_vchlh, vec_cmpgtuv8hi, 0, B_VX, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (s390_vchf, vec_cmpgtv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI) ++B_DEF (s390_vchlf, vec_cmpgtuv4si, 0, B_VX, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (s390_vchg, vec_cmpgtv2di, 0, B_VX, 0, BT_FN_V2DI_V2DI_V2DI) ++B_DEF (s390_vchlg, vec_cmpgtuv2di, 0, B_VX, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (s390_vfchdb, vec_cmpgtv2df, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cmple, s390_vec_cmple_s8, s390_vec_cmple_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cmple_s8, vec_cmplev16qi, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_cmple_u8, vec_cmpleuv16qi, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cmple_s16, vec_cmplev8hi, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_cmple_u16, vec_cmpleuv8hi, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cmple_s32, vec_cmplev4si, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_cmple_u32, vec_cmpleuv4si, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cmple_s64, vec_cmplev2di, 0, BT_OV_BV2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_cmple_u64, vec_cmpleuv2di, 0, BT_OV_BV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_cmple_dbl, vec_cmplev2df, 0, BT_OV_BV2DI_V2DF_V2DF) ++ ++B_DEF (vec_cmplev16qi, vec_cmplev16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmpleuv16qi, vec_cmpleuv16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmplev8hi, vec_cmplev8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmpleuv8hi, vec_cmpleuv8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmplev4si, vec_cmplev4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmpleuv4si, vec_cmpleuv4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmplev2di, vec_cmplev2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (vec_cmpleuv2di, vec_cmpleuv2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (vec_cmplev2df, vec_cmplev2df, 0, B_VX | B_INT, 0, BT_FN_V2DI_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cmplt, s390_vec_cmplt_s8, s390_vec_cmplt_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cmplt_s8, vec_cmpltv16qi, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_cmplt_u8, vec_cmpltuv16qi, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cmplt_s16, vec_cmpltv8hi, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_cmplt_u16, vec_cmpltuv8hi, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cmplt_s32, vec_cmpltv4si, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_cmplt_u32, vec_cmpltuv4si, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cmplt_s64, vec_cmpltv2di, 0, BT_OV_BV2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_cmplt_u64, vec_cmpltuv2di, 0, BT_OV_BV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_cmplt_dbl, vec_cmpltv2df, 0, BT_OV_BV2DI_V2DF_V2DF) ++ ++B_DEF (vec_cmpltv16qi, vec_cmpltv16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmpltuv16qi, vec_cmpltuv16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmpltv8hi, vec_cmpltv8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmpltuv8hi, vec_cmpltuv8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmpltv4si, vec_cmpltv4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmpltuv4si, vec_cmpltuv4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmpltv2di, vec_cmpltv2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (vec_cmpltuv2di, vec_cmpltuv2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (vec_cmpltv2df, vec_cmpltv2df, 0, B_VX | B_INT, 0, BT_FN_V2DI_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cntlz, s390_vec_cntlz_s8, s390_vec_cntlz_u64, B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cntlz_s8, s390_vclzb, 0, BT_OV_UV16QI_V16QI) ++OB_DEF_VAR (s390_vec_cntlz_u8, s390_vclzb, 0, BT_OV_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cntlz_s16, s390_vclzh, 0, BT_OV_UV8HI_V8HI) ++OB_DEF_VAR (s390_vec_cntlz_u16, s390_vclzh, 0, BT_OV_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cntlz_s32, s390_vclzf, 0, BT_OV_UV4SI_V4SI) ++OB_DEF_VAR (s390_vec_cntlz_u32, s390_vclzf, 0, BT_OV_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cntlz_s64, s390_vclzg, 0, BT_OV_UV2DI_V2DI) ++OB_DEF_VAR (s390_vec_cntlz_u64, s390_vclzg, 0, BT_OV_UV2DI_UV2DI) ++ ++B_DEF (s390_vclzb, clzv16qi2, 0, B_VX, 0, BT_FN_UV16QI_UV16QI) ++B_DEF (s390_vclzh, clzv8hi2, 0, B_VX, 0, BT_FN_UV8HI_UV8HI) ++B_DEF (s390_vclzf, clzv4si2, 0, B_VX, 0, BT_FN_UV4SI_UV4SI) ++B_DEF (s390_vclzg, clzv2di2, 0, B_VX, 0, BT_FN_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_cnttz, s390_vec_cnttz_s8, s390_vec_cnttz_u64, B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cnttz_s8, s390_vctzb, 0, BT_OV_UV16QI_V16QI) ++OB_DEF_VAR (s390_vec_cnttz_u8, s390_vctzb, 0, BT_OV_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cnttz_s16, s390_vctzh, 0, BT_OV_UV8HI_V8HI) ++OB_DEF_VAR (s390_vec_cnttz_u16, s390_vctzh, 0, BT_OV_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cnttz_s32, s390_vctzf, 0, BT_OV_UV4SI_V4SI) ++OB_DEF_VAR (s390_vec_cnttz_u32, s390_vctzf, 0, BT_OV_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cnttz_s64, s390_vctzg, 0, BT_OV_UV2DI_V2DI) ++OB_DEF_VAR (s390_vec_cnttz_u64, s390_vctzg, 0, BT_OV_UV2DI_UV2DI) ++ ++B_DEF (s390_vctzb, ctzv16qi2, 0, B_VX, 0, BT_FN_UV16QI_UV16QI) ++B_DEF (s390_vctzh, ctzv8hi2, 0, B_VX, 0, BT_FN_UV8HI_UV8HI) ++B_DEF (s390_vctzf, ctzv4si2, 0, B_VX, 0, BT_FN_UV4SI_UV4SI) ++B_DEF (s390_vctzg, ctzv2di2, 0, B_VX, 0, BT_FN_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_xor, s390_vec_xor_b8, s390_vec_xor_dbl_c, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_xor_b8, s390_vx, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_xor_s8_a, s390_vx, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_xor_s8_b, s390_vx, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_xor_s8_c, s390_vx, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_xor_u8_a, s390_vx, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_xor_u8_b, s390_vx, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_xor_u8_c, s390_vx, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_xor_b16, s390_vx, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_xor_s16_a, s390_vx, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_xor_s16_b, s390_vx, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_xor_s16_c, s390_vx, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_xor_u16_a, s390_vx, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_xor_u16_b, s390_vx, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_xor_u16_c, s390_vx, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_xor_b32, s390_vx, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_xor_s32_a, s390_vx, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_xor_s32_b, s390_vx, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_xor_s32_c, s390_vx, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_xor_u32_a, s390_vx, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_xor_u32_b, s390_vx, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_xor_u32_c, s390_vx, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_xor_b64, s390_vx, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_xor_s64_a, s390_vx, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_xor_s64_b, s390_vx, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_xor_s64_c, s390_vx, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_xor_u64_a, s390_vx, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_xor_u64_b, s390_vx, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_xor_u64_c, s390_vx, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_xor_dbl_a, s390_vx, 0, BT_OV_V2DF_BV2DI_V2DF) ++OB_DEF_VAR (s390_vec_xor_dbl_b, s390_vx, 0, BT_OV_V2DF_V2DF_V2DF) ++OB_DEF_VAR (s390_vec_xor_dbl_c, s390_vx, 0, BT_OV_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vx, xorv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_gfmsum, s390_vec_gfmsum_u8, s390_vec_gfmsum_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_gfmsum_u8, s390_vgfmb, 0, BT_OV_UV8HI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_gfmsum_u16, s390_vgfmh, 0, BT_OV_UV4SI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_gfmsum_u32, s390_vgfmf, 0, BT_OV_UV2DI_UV4SI_UV4SI) ++ ++B_DEF (s390_vgfmb, vec_gfmsumv16qi, 0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI) ++B_DEF (s390_vgfmh, vec_gfmsumv8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI) ++B_DEF (s390_vgfmf, vec_gfmsumv4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI) ++B_DEF (s390_vgfmg, vec_gfmsum_128, 0, B_VX, 0, BT_FN_UV16QI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_gfmsum_accum, s390_vec_gfmsum_accum_u8,s390_vec_gfmsum_accum_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_gfmsum_accum_u8, s390_vgfmab, 0, BT_OV_UV8HI_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_gfmsum_accum_u16, s390_vgfmah, 0, BT_OV_UV4SI_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_gfmsum_accum_u32, s390_vgfmaf, 0, BT_OV_UV2DI_UV4SI_UV4SI_UV2DI) ++ ++B_DEF (s390_vgfmab, vec_gfmsum_accumv16qi,0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI_UV8HI) ++B_DEF (s390_vgfmah, vec_gfmsum_accumv8hi,0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI_UV4SI) ++B_DEF (s390_vgfmaf, vec_gfmsum_accumv4si,0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI_UV2DI) ++B_DEF (s390_vgfmag, vec_gfmsum_accum_128,0, B_VX, 0, BT_FN_UV16QI_UV2DI_UV2DI_UV16QI) ++ ++OB_DEF (s390_vec_abs, s390_vec_abs_s8, s390_vec_abs_dbl, B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_abs_s8, s390_vlpb, 0, BT_OV_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_abs_s16, s390_vlph, 0, BT_OV_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_abs_s32, s390_vlpf, 0, BT_OV_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_abs_s64, s390_vlpg, 0, BT_OV_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_abs_dbl, s390_vflpdb, 0, BT_OV_V2DF_V2DF) ++ ++B_DEF (s390_vlpb, absv16qi2, 0, B_VX, 0, BT_FN_V16QI_V16QI) ++B_DEF (s390_vlph, absv8hi2, 0, B_VX, 0, BT_FN_V8HI_V8HI) ++B_DEF (s390_vlpf, absv4si2, 0, B_VX, 0, BT_FN_V4SI_V4SI) ++B_DEF (s390_vlpg, absv2di2, 0, B_VX, 0, BT_FN_V2DI_V2DI) ++B_DEF (s390_vflpdb, absv2df2, 0, B_VX, 0, BT_FN_V2DF_V2DF) ++ ++OB_DEF (s390_vec_max, s390_vec_max_s8_a, s390_vec_max_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_max_s8_a, s390_vmxb, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_max_s8_b, s390_vmxb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_max_s8_c, s390_vmxb, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_max_u8_a, s390_vmxlb, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_max_u8_b, s390_vmxlb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_max_u8_c, s390_vmxlb, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_max_s16_a, s390_vmxh, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_max_s16_b, s390_vmxh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_max_s16_c, s390_vmxh, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_max_u16_a, s390_vmxlh, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_max_u16_b, s390_vmxlh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_max_u16_c, s390_vmxlh, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_max_s32_a, s390_vmxf, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_max_s32_b, s390_vmxf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_max_s32_c, s390_vmxf, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_max_u32_a, s390_vmxlf, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_max_u32_b, s390_vmxlf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_max_u32_c, s390_vmxlf, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_max_s64_a, s390_vmxg, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_max_s64_b, s390_vmxg, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_max_s64_c, s390_vmxg, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_max_u64_a, s390_vmxlg, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_max_u64_b, s390_vmxlg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_max_u64_c, s390_vmxlg, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_max_dbl, s390_vec_max_dbl, 0, BT_OV_V2DF_V2DF_V2DF) ++ ++B_DEF (s390_vmxb, smaxv16qi3, 0, B_VX, 0, BT_FN_V16QI_BV16QI_V16QI) ++B_DEF (s390_vmxlb, umaxv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmxh, smaxv8hi3, 0, B_VX, 0, BT_FN_V8HI_BV8HI_V8HI) ++B_DEF (s390_vmxlh, umaxv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmxf, smaxv4si3, 0, B_VX, 0, BT_FN_V4SI_BV4SI_V4SI) ++B_DEF (s390_vmxlf, umaxv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmxg, smaxv2di3, 0, B_VX, 0, BT_FN_V2DI_BV2DI_V2DI) ++B_DEF (s390_vmxlg, umaxv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++B_DEF (s390_vec_max_dbl, smaxv2df3, 0, B_VX | B_INT, 0, BT_FN_V2DF_V2DF_V2DF) ++ ++OB_DEF (s390_vec_min, s390_vec_min_s8_a, s390_vec_min_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_min_s8_a, s390_vmnb, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_min_s8_b, s390_vmnb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_min_s8_c, s390_vmnb, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_min_u8_a, s390_vmnlb, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_min_u8_b, s390_vmnlb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_min_u8_c, s390_vmnlb, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_min_s16_a, s390_vmnh, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_min_s16_b, s390_vmnh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_min_s16_c, s390_vmnh, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_min_u16_a, s390_vmnlh, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_min_u16_b, s390_vmnlh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_min_u16_c, s390_vmnlh, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_min_s32_a, s390_vmnf, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_min_s32_b, s390_vmnf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_min_s32_c, s390_vmnf, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_min_u32_a, s390_vmnlf, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_min_u32_b, s390_vmnlf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_min_u32_c, s390_vmnlf, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_min_s64_a, s390_vmng, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_min_s64_b, s390_vmng, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_min_s64_c, s390_vmng, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_min_u64_a, s390_vmnlg, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_min_u64_b, s390_vmnlg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_min_u64_c, s390_vmnlg, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_min_dbl, s390_vec_min_dbl, 0, BT_OV_V2DF_V2DF_V2DF) ++ ++B_DEF (s390_vmnb, sminv16qi3, 0, B_VX, 0, BT_FN_V16QI_BV16QI_V16QI) ++B_DEF (s390_vmnlb, uminv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmnh, sminv8hi3, 0, B_VX, 0, BT_FN_V8HI_BV8HI_V8HI) ++B_DEF (s390_vmnlh, uminv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmnf, sminv4si3, 0, B_VX, 0, BT_FN_V4SI_BV4SI_V4SI) ++B_DEF (s390_vmnlf, uminv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmng, sminv2di3, 0, B_VX, 0, BT_FN_V2DI_BV2DI_V2DI) ++B_DEF (s390_vmnlg, uminv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++B_DEF (s390_vec_min_dbl, sminv2df3, 0, B_VX | B_INT, 0, BT_FN_V2DF_V2DF_V2DF) ++ ++OB_DEF (s390_vec_mladd, s390_vec_mladd_u8, s390_vec_mladd_s32_c,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mladd_u8, s390_vmalb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mladd_s8_a, s390_vmalb, 0, BT_OV_V16QI_UV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mladd_s8_b, s390_vmalb, 0, BT_OV_V16QI_V16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mladd_s8_c, s390_vmalb, 0, BT_OV_V16QI_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mladd_u16, s390_vmalhw, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mladd_s16_a, s390_vmalhw, 0, BT_OV_V8HI_UV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mladd_s16_b, s390_vmalhw, 0, BT_OV_V8HI_V8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mladd_s16_c, s390_vmalhw, 0, BT_OV_V8HI_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mladd_u32, s390_vmalf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mladd_s32_a, s390_vmalf, 0, BT_OV_V4SI_UV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_mladd_s32_b, s390_vmalf, 0, BT_OV_V4SI_V4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mladd_s32_c, s390_vmalf, 0, BT_OV_V4SI_V4SI_V4SI_V4SI) ++ ++B_DEF (s390_vmalb, vec_vmalv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmalhw, vec_vmalv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmalf, vec_vmalv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_mhadd, s390_vec_mhadd_u8, s390_vec_mhadd_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mhadd_u8, s390_vmalhb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mhadd_s8, s390_vmahb, 0, BT_OV_V16QI_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mhadd_u16, s390_vmalhh, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mhadd_s16, s390_vmahh, 0, BT_OV_V8HI_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mhadd_u32, s390_vmalhf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mhadd_s32, s390_vmahf, 0, BT_OV_V4SI_V4SI_V4SI_V4SI) ++ ++B_DEF (s390_vmalhb, vec_vmalhv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmahb, vec_vmahv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI_V16QI_V16QI) ++B_DEF (s390_vmalhh, vec_vmalhv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmahh, vec_vmahv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI_V8HI) ++B_DEF (s390_vmalhf, vec_vmalhv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmahf, vec_vmahv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI_V4SI) ++ ++OB_DEF (s390_vec_meadd, s390_vec_meadd_u8, s390_vec_meadd_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_meadd_u8, s390_vmaleb, 0, BT_OV_UV8HI_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_meadd_s8, s390_vmaeb, 0, BT_OV_V8HI_V16QI_V16QI_V8HI) ++OB_DEF_VAR (s390_vec_meadd_u16, s390_vmaleh, 0, BT_OV_UV4SI_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_meadd_s16, s390_vmaeh, 0, BT_OV_V4SI_V8HI_V8HI_V4SI) ++OB_DEF_VAR (s390_vec_meadd_u32, s390_vmalef, 0, BT_OV_UV2DI_UV4SI_UV4SI_UV2DI) ++OB_DEF_VAR (s390_vec_meadd_s32, s390_vmaef, 0, BT_OV_V2DI_V4SI_V4SI_V2DI) ++ ++B_DEF (s390_vmaleb, vec_vmalev16qi, 0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI_UV8HI) ++B_DEF (s390_vmaeb, vec_vmaev16qi, 0, B_VX, 0, BT_FN_V8HI_V16QI_V16QI_V8HI) ++B_DEF (s390_vmaleh, vec_vmalev8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI_UV4SI) ++B_DEF (s390_vmaeh, vec_vmaev8hi, 0, B_VX, 0, BT_FN_V4SI_V8HI_V8HI_V4SI) ++B_DEF (s390_vmalef, vec_vmalev4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI_UV2DI) ++B_DEF (s390_vmaef, vec_vmaev4si, 0, B_VX, 0, BT_FN_V2DI_V4SI_V4SI_V2DI) ++ ++OB_DEF (s390_vec_moadd, s390_vec_moadd_u8, s390_vec_moadd_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_moadd_u8, s390_vmalob, 0, BT_OV_UV8HI_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_moadd_s8, s390_vmaob, 0, BT_OV_V8HI_V16QI_V16QI_V8HI) ++OB_DEF_VAR (s390_vec_moadd_u16, s390_vmaloh, 0, BT_OV_UV4SI_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_moadd_s16, s390_vmaoh, 0, BT_OV_V4SI_V8HI_V8HI_V4SI) ++OB_DEF_VAR (s390_vec_moadd_u32, s390_vmalof, 0, BT_OV_UV2DI_UV4SI_UV4SI_UV2DI) ++OB_DEF_VAR (s390_vec_moadd_s32, s390_vmaof, 0, BT_OV_V2DI_V4SI_V4SI_V2DI) ++ ++B_DEF (s390_vmalob, vec_vmalov16qi, 0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI_UV8HI) ++B_DEF (s390_vmaob, vec_vmaov16qi, 0, B_VX, 0, BT_FN_V8HI_V16QI_V16QI_V8HI) ++B_DEF (s390_vmaloh, vec_vmalov8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI_UV4SI) ++B_DEF (s390_vmaoh, vec_vmaov8hi, 0, B_VX, 0, BT_FN_V4SI_V8HI_V8HI_V4SI) ++B_DEF (s390_vmalof, vec_vmalov4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI_UV2DI) ++B_DEF (s390_vmaof, vec_vmaov4si, 0, B_VX, 0, BT_FN_V2DI_V4SI_V4SI_V2DI) ++ ++OB_DEF (s390_vec_mulh, s390_vec_mulh_u8, s390_vec_mulh_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mulh_u8, s390_vmlhb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mulh_s8, s390_vmhb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mulh_u16, s390_vmlhh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mulh_s16, s390_vmhh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mulh_u32, s390_vmlhf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mulh_s32, s390_vmhf, 0, BT_OV_V4SI_V4SI_V4SI) ++ ++B_DEF (s390_vmlhb, vec_umulhv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmhb, vec_smulhv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI_V16QI) ++B_DEF (s390_vmlhh, vec_umulhv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmhh, vec_smulhv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI) ++B_DEF (s390_vmlhf, vec_umulhv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmhf, vec_smulhv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI) ++ ++OB_DEF (s390_vec_mule, s390_vec_mule_u8, s390_vec_mule_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mule_u8, s390_vmleb, 0, BT_OV_UV8HI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mule_s8, s390_vmeb, 0, BT_OV_V8HI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mule_u16, s390_vmleh, 0, BT_OV_UV4SI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mule_s15, s390_vmeh, 0, BT_OV_V4SI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mule_u32, s390_vmlef, 0, BT_OV_UV2DI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mule_s32, s390_vmef, 0, BT_OV_V2DI_V4SI_V4SI) ++ ++B_DEF (s390_vmleb, vec_widen_umult_even_v16qi,0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI) ++B_DEF (s390_vmeb, vec_widen_smult_even_v16qi,0, B_VX, 0, BT_FN_V8HI_V16QI_V16QI) ++B_DEF (s390_vmleh, vec_widen_umult_even_v8hi,0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI) ++B_DEF (s390_vmeh, vec_widen_smult_even_v8hi,0, B_VX, 0, BT_FN_V4SI_V8HI_V8HI) ++B_DEF (s390_vmlef, vec_widen_umult_even_v4si,0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI) ++B_DEF (s390_vmef, vec_widen_smult_even_v4si,0, B_VX, 0, BT_FN_V2DI_V4SI_V4SI) ++ ++OB_DEF (s390_vec_mulo, s390_vec_mulo_u8, s390_vec_mulo_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mulo_u8, s390_vmlob, 0, BT_OV_UV8HI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mulo_s8, s390_vmob, 0, BT_OV_V8HI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mulo_u16, s390_vmloh, 0, BT_OV_UV4SI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mulo_s16, s390_vmoh, 0, BT_OV_V4SI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mulo_u32, s390_vmlof, 0, BT_OV_UV2DI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mulo_s32, s390_vmof, 0, BT_OV_V2DI_V4SI_V4SI) ++ ++B_DEF (s390_vmlob, vec_widen_umult_odd_v16qi,0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI) ++B_DEF (s390_vmob, vec_widen_smult_odd_v16qi,0, B_VX, 0, BT_FN_V8HI_V16QI_V16QI) ++B_DEF (s390_vmloh, vec_widen_umult_odd_v8hi,0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI) ++B_DEF (s390_vmoh, vec_widen_smult_odd_v8hi,0, B_VX, 0, BT_FN_V4SI_V8HI_V8HI) ++B_DEF (s390_vmlof, vec_widen_umult_odd_v4si,0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI) ++B_DEF (s390_vmof, vec_widen_smult_odd_v4si,0, B_VX, 0, BT_FN_V2DI_V4SI_V4SI) ++ ++OB_DEF (s390_vec_nor, s390_vec_nor_b8, s390_vec_nor_dbl_c, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_nor_b8, s390_vno, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_nor_s8_a, s390_vno, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_nor_s8_b, s390_vno, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_nor_s8_c, s390_vno, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_nor_u8_a, s390_vno, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_nor_u8_b, s390_vno, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_nor_u8_c, s390_vno, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_nor_b16, s390_vno, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_nor_s16_a, s390_vno, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_nor_s16_b, s390_vno, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_nor_s16_c, s390_vno, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_nor_u16_a, s390_vno, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_nor_u16_b, s390_vno, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_nor_u16_c, s390_vno, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_nor_b32, s390_vno, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_nor_s32_a, s390_vno, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_nor_s32_b, s390_vno, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_nor_s32_c, s390_vno, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_nor_u32_a, s390_vno, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_nor_u32_b, s390_vno, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_nor_u32_c, s390_vno, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_nor_b64, s390_vno, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_nor_s64_a, s390_vno, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_nor_s64_b, s390_vno, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_nor_s64_c, s390_vno, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_nor_u64_a, s390_vno, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_nor_u64_b, s390_vno, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_nor_u64_c, s390_vno, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_nor_dbl_a, s390_vno, 0, BT_OV_V2DF_BV2DI_V2DF) ++OB_DEF_VAR (s390_vec_nor_dbl_b, s390_vno, 0, BT_OV_V2DF_V2DF_V2DF) ++OB_DEF_VAR (s390_vec_nor_dbl_c, s390_vno, 0, BT_OV_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vno, vec_norv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_or, s390_vec_or_b8, s390_vec_or_dbl_c, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_or_b8, s390_vo, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_or_s8_a, s390_vo, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_or_s8_b, s390_vo, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_or_s8_c, s390_vo, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_or_u8_a, s390_vo, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_or_u8_b, s390_vo, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_or_u8_c, s390_vo, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_or_b16, s390_vo, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_or_s16_a, s390_vo, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_or_s16_b, s390_vo, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_or_s16_c, s390_vo, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_or_u16_a, s390_vo, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_or_u16_b, s390_vo, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_or_u16_c, s390_vo, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_or_b32, s390_vo, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_or_s32_a, s390_vo, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_or_s32_b, s390_vo, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_or_s32_c, s390_vo, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_or_u32_a, s390_vo, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_or_u32_b, s390_vo, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_or_u32_c, s390_vo, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_or_b64, s390_vo, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_or_s64_a, s390_vo, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_or_s64_b, s390_vo, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_or_s64_c, s390_vo, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_or_u64_a, s390_vo, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_or_u64_b, s390_vo, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_or_u64_c, s390_vo, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_or_dbl_a, s390_vo, 0, BT_OV_V2DF_BV2DI_V2DF) ++OB_DEF_VAR (s390_vec_or_dbl_b, s390_vo, 0, BT_OV_V2DF_V2DF_V2DF) ++OB_DEF_VAR (s390_vec_or_dbl_c, s390_vo, 0, BT_OV_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vo, iorv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_popcnt, s390_vec_popcnt_s8, s390_vec_popcnt_u64,B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_popcnt_s8, s390_vpopctb, 0, BT_OV_UV16QI_V16QI) /* vpopct */ ++OB_DEF_VAR (s390_vec_popcnt_u8, s390_vpopctb, 0, BT_OV_UV16QI_UV16QI) /* vpopct */ ++OB_DEF_VAR (s390_vec_popcnt_s16, s390_vpopcth, 0, BT_OV_UV8HI_V8HI) /* vpopct */ ++OB_DEF_VAR (s390_vec_popcnt_u16, s390_vpopcth, 0, BT_OV_UV8HI_UV8HI) /* vpopct */ ++OB_DEF_VAR (s390_vec_popcnt_s32, s390_vpopctf, 0, BT_OV_UV4SI_V4SI) /* vpopct vsumb */ ++OB_DEF_VAR (s390_vec_popcnt_u32, s390_vpopctf, 0, BT_OV_UV4SI_UV4SI) /* vpopct vsumb */ ++OB_DEF_VAR (s390_vec_popcnt_s64, s390_vpopctg, 0, BT_OV_UV2DI_V2DI) /* vpopct vsumb vsumgf */ ++OB_DEF_VAR (s390_vec_popcnt_u64, s390_vpopctg, 0, BT_OV_UV2DI_UV2DI) /* vpopct vsumb vsumgf */ ++ ++B_DEF (s390_vpopctb, popcountv16qi2, 0, B_VX, 0, BT_FN_UV16QI_UV16QI) /* vpopct */ ++B_DEF (s390_vpopcth, popcountv8hi2, 0, B_VX, 0, BT_FN_UV8HI_UV8HI) /* vpopct */ ++B_DEF (s390_vpopctf, popcountv4si2, 0, B_VX, 0, BT_FN_UV4SI_UV4SI) /* vpopct vsumb */ ++B_DEF (s390_vpopctg, popcountv2di2, 0, B_VX, 0, BT_FN_UV2DI_UV2DI) /* vpopct vsumb vsumgf */ ++ ++OB_DEF (s390_vec_rl, s390_vec_rl_u8, s390_vec_rl_s64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_rl_u8, s390_verllvb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_rl_s8, s390_verllvb, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_rl_u16, s390_verllvh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_rl_s16, s390_verllvh, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_rl_u32, s390_verllvf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_rl_s32, s390_verllvf, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_rl_u64, s390_verllvg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_rl_s64, s390_verllvg, 0, BT_OV_V2DI_V2DI_UV2DI) ++ ++B_DEF (s390_verllvb, vrotlv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_verllvh, vrotlv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_verllvf, vrotlv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_verllvg, vrotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_rli, s390_vec_rli_u8, s390_vec_rli_s64, B_VX, BT_FN_OV4SI_OV4SI_ULONG) ++OB_DEF_VAR (s390_vec_rli_u8, s390_verllb, 0, BT_OV_UV16QI_UV16QI_ULONG) ++OB_DEF_VAR (s390_vec_rli_s8, s390_verllb, 0, BT_OV_V16QI_V16QI_ULONG) ++OB_DEF_VAR (s390_vec_rli_u16, s390_verllh, 0, BT_OV_UV8HI_UV8HI_ULONG) ++OB_DEF_VAR (s390_vec_rli_s16, s390_verllh, 0, BT_OV_V8HI_V8HI_ULONG) ++OB_DEF_VAR (s390_vec_rli_u32, s390_verllf, 0, BT_OV_UV4SI_UV4SI_ULONG) ++OB_DEF_VAR (s390_vec_rli_s32, s390_verllf, 0, BT_OV_V4SI_V4SI_ULONG) ++OB_DEF_VAR (s390_vec_rli_u64, s390_verllg, 0, BT_OV_UV2DI_UV2DI_ULONG) ++OB_DEF_VAR (s390_vec_rli_s64, s390_verllg, 0, BT_OV_V2DI_V2DI_ULONG) ++ ++B_DEF (s390_verllb, rotlv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UINT) ++B_DEF (s390_verllh, rotlv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UINT) ++B_DEF (s390_verllf, rotlv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UINT) ++B_DEF (s390_verllg, rotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UINT) ++ ++OB_DEF (s390_vec_rl_mask, s390_vec_rl_mask_s8,s390_vec_rl_mask_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_s8, s390_verimb, O3_U8, BT_OV_V16QI_V16QI_UV16QI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_u8, s390_verimb, O3_U8, BT_OV_UV16QI_UV16QI_UV16QI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_s16, s390_verimh, O3_U8, BT_OV_V8HI_V8HI_UV8HI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_u16, s390_verimh, O3_U8, BT_OV_UV8HI_UV8HI_UV8HI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_s32, s390_verimf, O3_U8, BT_OV_V4SI_V4SI_UV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_u32, s390_verimf, O3_U8, BT_OV_UV4SI_UV4SI_UV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_s64, s390_verimg, O3_U8, BT_OV_V2DI_V2DI_UV2DI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_u64, s390_verimg, O3_U8, BT_OV_UV2DI_UV2DI_UV2DI_UCHAR) ++ ++B_DEF (s390_verimb, verimv16qi, 0, B_VX, O4_U8, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT) ++B_DEF (s390_verimh, verimv8hi, 0, B_VX, O4_U8, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT) ++B_DEF (s390_verimf, verimv4si, 0, B_VX, O4_U8, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT) ++B_DEF (s390_verimg, verimv2di, 0, B_VX, O4_U8, BT_FN_UV2DI_UV2DI_UV2DI_UV2DI_INT) ++ ++OB_DEF (s390_vec_sll, s390_vec_sll_u8q, s390_vec_sll_b64s, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sll_u8q, s390_vsl, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_u8h, s390_vsl, 0, BT_OV_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_u8s, s390_vsl, 0, BT_OV_UV16QI_UV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_s8q, s390_vsl, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_s8h, s390_vsl, 0, BT_OV_V16QI_V16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_s8s, s390_vsl, 0, BT_OV_V16QI_V16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_b8q, s390_vsl, 0, BT_OV_BV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_b8h, s390_vsl, 0, BT_OV_BV16QI_BV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_b8s, s390_vsl, 0, BT_OV_BV16QI_BV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_u16q, s390_vsl, 0, BT_OV_UV8HI_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_u16h, s390_vsl, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_u16s, s390_vsl, 0, BT_OV_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_s16q, s390_vsl, 0, BT_OV_V8HI_V8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_s16h, s390_vsl, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_s16s, s390_vsl, 0, BT_OV_V8HI_V8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_b16q, s390_vsl, 0, BT_OV_BV8HI_BV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_b16h, s390_vsl, 0, BT_OV_BV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_b16s, s390_vsl, 0, BT_OV_BV8HI_BV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_u32q, s390_vsl, 0, BT_OV_UV4SI_UV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_u32h, s390_vsl, 0, BT_OV_UV4SI_UV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_u32s, s390_vsl, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_s32q, s390_vsl, 0, BT_OV_V4SI_V4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_s32h, s390_vsl, 0, BT_OV_V4SI_V4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_s32s, s390_vsl, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_b32q, s390_vsl, 0, BT_OV_BV4SI_BV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_b32h, s390_vsl, 0, BT_OV_BV4SI_BV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_b32s, s390_vsl, 0, BT_OV_BV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_u64q, s390_vsl, 0, BT_OV_UV2DI_UV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_u64h, s390_vsl, 0, BT_OV_UV2DI_UV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_u64s, s390_vsl, 0, BT_OV_UV2DI_UV2DI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_s64q, s390_vsl, 0, BT_OV_V2DI_V2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_s64h, s390_vsl, 0, BT_OV_V2DI_V2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_s64s, s390_vsl, 0, BT_OV_V2DI_V2DI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_b64q, s390_vsl, 0, BT_OV_BV2DI_BV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_b64h, s390_vsl, 0, BT_OV_BV2DI_BV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_b64s, s390_vsl, 0, BT_OV_BV2DI_BV2DI_UV4SI) ++ ++B_DEF (s390_vsl, vec_sllv16qiv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_slb, s390_vec_slb_u8_u8, s390_vec_slb_dbl_s64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_slb_u8_u8, s390_vslb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_slb_u8_s8, s390_vslb, 0, BT_OV_UV16QI_UV16QI_V16QI) ++OB_DEF_VAR (s390_vec_slb_s8_u8, s390_vslb, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_slb_s8_s8, s390_vslb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_slb_u16_u16, s390_vslb, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_slb_u16_s16, s390_vslb, 0, BT_OV_UV8HI_UV8HI_V8HI) ++OB_DEF_VAR (s390_vec_slb_s16_u16, s390_vslb, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_slb_s16_s16, s390_vslb, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_slb_u32_u32, s390_vslb, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_slb_u32_s32, s390_vslb, 0, BT_OV_UV4SI_UV4SI_V4SI) ++OB_DEF_VAR (s390_vec_slb_s32_u32, s390_vslb, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_slb_s32_s32, s390_vslb, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_slb_u64_u64, s390_vslb, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_slb_u64_s64, s390_vslb, 0, BT_OV_UV2DI_UV2DI_V2DI) ++OB_DEF_VAR (s390_vec_slb_s64_u64, s390_vslb, 0, BT_OV_V2DI_V2DI_UV2DI) ++OB_DEF_VAR (s390_vec_slb_s64_s64, s390_vslb, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_slb_dbl_u64, s390_vslb, 0, BT_OV_V2DF_V2DF_UV2DI) ++OB_DEF_VAR (s390_vec_slb_dbl_s64, s390_vslb, 0, BT_OV_V2DF_V2DF_V2DI) ++ ++B_DEF (s390_vslb, vec_slbv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_sld, s390_vec_sld_s8, s390_vec_sld_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_s8, s390_vsldb, O3_U4, BT_OV_V16QI_V16QI_V16QI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_u8, s390_vsldb, O3_U4, BT_OV_UV16QI_UV16QI_UV16QI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_s16, s390_vsldb, O3_U4, BT_OV_V8HI_V8HI_V8HI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_u16, s390_vsldb, O3_U4, BT_OV_UV8HI_UV8HI_UV8HI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_s32, s390_vsldb, O3_U4, BT_OV_V4SI_V4SI_V4SI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_u32, s390_vsldb, O3_U4, BT_OV_UV4SI_UV4SI_UV4SI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_s64, s390_vsldb, O3_U4, BT_OV_V2DI_V2DI_V2DI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_u64, s390_vsldb, O3_U4, BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_dbl, s390_vsldb, O3_U4, BT_OV_V2DF_V2DF_V2DF_ULONGLONG) ++ ++B_DEF (s390_vsldb, vec_sldv16qi, 0, B_VX, O3_U4, BT_FN_UV16QI_UV16QI_UV16QI_INT) ++ ++OB_DEF (s390_vec_sldw, s390_vec_sldw_s8, s390_vec_sldw_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INT) ++OB_DEF_VAR (s390_vec_sldw_s8, s390_vsldb, O3_U4, BT_OV_V16QI_V16QI_V16QI_INT) ++OB_DEF_VAR (s390_vec_sldw_u8, s390_vsldb, O3_U4, BT_OV_UV16QI_UV16QI_UV16QI_INT) ++OB_DEF_VAR (s390_vec_sldw_s16, s390_vsldb, O3_U4, BT_OV_V8HI_V8HI_V8HI_INT) ++OB_DEF_VAR (s390_vec_sldw_u16, s390_vsldb, O3_U4, BT_OV_UV8HI_UV8HI_UV8HI_INT) ++OB_DEF_VAR (s390_vec_sldw_s32, s390_vsldb, O3_U4, BT_OV_V4SI_V4SI_V4SI_INT) ++OB_DEF_VAR (s390_vec_sldw_u32, s390_vsldb, O3_U4, BT_OV_UV4SI_UV4SI_UV4SI_INT) ++OB_DEF_VAR (s390_vec_sldw_s64, s390_vsldb, O3_U4, BT_OV_V2DI_V2DI_V2DI_INT) ++OB_DEF_VAR (s390_vec_sldw_u64, s390_vsldb, O3_U4, BT_OV_UV2DI_UV2DI_UV2DI_INT) ++OB_DEF_VAR (s390_vec_sldw_dbl, s390_vsldb, O3_U4, BT_OV_V2DF_V2DF_V2DF_INT) ++ ++OB_DEF (s390_vec_sral, s390_vec_sral_u8q, s390_vec_sral_b64s, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sral_u8q, s390_vsra, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_u8h, s390_vsra, 0, BT_OV_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_u8s, s390_vsra, 0, BT_OV_UV16QI_UV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_s8q, s390_vsra, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_s8h, s390_vsra, 0, BT_OV_V16QI_V16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_s8s, s390_vsra, 0, BT_OV_V16QI_V16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_b8q, s390_vsra, 0, BT_OV_BV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_b8h, s390_vsra, 0, BT_OV_BV16QI_BV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_b8s, s390_vsra, 0, BT_OV_BV16QI_BV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_u16q, s390_vsra, 0, BT_OV_UV8HI_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_u16h, s390_vsra, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_u16s, s390_vsra, 0, BT_OV_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_s16q, s390_vsra, 0, BT_OV_V8HI_V8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_s16h, s390_vsra, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_s16s, s390_vsra, 0, BT_OV_V8HI_V8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_b16q, s390_vsra, 0, BT_OV_BV8HI_BV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_b16h, s390_vsra, 0, BT_OV_BV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_b16s, s390_vsra, 0, BT_OV_BV8HI_BV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_u32q, s390_vsra, 0, BT_OV_UV4SI_UV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_u32h, s390_vsra, 0, BT_OV_UV4SI_UV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_u32s, s390_vsra, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_s32q, s390_vsra, 0, BT_OV_V4SI_V4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_s32h, s390_vsra, 0, BT_OV_V4SI_V4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_s32s, s390_vsra, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_b32q, s390_vsra, 0, BT_OV_BV4SI_BV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_b32h, s390_vsra, 0, BT_OV_BV4SI_BV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_b32s, s390_vsra, 0, BT_OV_BV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_u64q, s390_vsra, 0, BT_OV_UV2DI_UV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_u64h, s390_vsra, 0, BT_OV_UV2DI_UV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_u64s, s390_vsra, 0, BT_OV_UV2DI_UV2DI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_s64q, s390_vsra, 0, BT_OV_V2DI_V2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_s64h, s390_vsra, 0, BT_OV_V2DI_V2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_s64s, s390_vsra, 0, BT_OV_V2DI_V2DI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_b64q, s390_vsra, 0, BT_OV_BV2DI_BV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_b64h, s390_vsra, 0, BT_OV_BV2DI_BV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_b64s, s390_vsra, 0, BT_OV_BV2DI_BV2DI_UV4SI) ++ ++B_DEF (s390_vsra, vec_sralv16qiv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_srab, s390_vec_srab_u8_u8,s390_vec_srab_dbl_s64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_srab_u8_u8, s390_vsrab, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srab_u8_s8, s390_vsrab, 0, BT_OV_UV16QI_UV16QI_V16QI) ++OB_DEF_VAR (s390_vec_srab_s8_u8, s390_vsrab, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srab_s8_s8, s390_vsrab, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_srab_u16_u16, s390_vsrab, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srab_u16_s16, s390_vsrab, 0, BT_OV_UV8HI_UV8HI_V8HI) ++OB_DEF_VAR (s390_vec_srab_s16_u16, s390_vsrab, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srab_s16_s16, s390_vsrab, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_srab_u32_u32, s390_vsrab, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srab_u32_s32, s390_vsrab, 0, BT_OV_UV4SI_UV4SI_V4SI) ++OB_DEF_VAR (s390_vec_srab_s32_u32, s390_vsrab, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srab_s32_s32, s390_vsrab, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_srab_u64_u64, s390_vsrab, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_srab_u64_s64, s390_vsrab, 0, BT_OV_UV2DI_UV2DI_V2DI) ++OB_DEF_VAR (s390_vec_srab_s64_u64, s390_vsrab, 0, BT_OV_V2DI_V2DI_UV2DI) ++OB_DEF_VAR (s390_vec_srab_s64_s64, s390_vsrab, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_srab_dbl_u64, s390_vsrab, 0, BT_OV_V2DF_V2DF_UV2DI) ++OB_DEF_VAR (s390_vec_srab_dbl_s64, s390_vsrab, 0, BT_OV_V2DF_V2DF_V2DI) ++ ++B_DEF (s390_vsrab, vec_srabv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_srl, s390_vec_srl_u8q, s390_vec_srl_b64s, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_srl_u8q, s390_vsrl, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_u8h, s390_vsrl, 0, BT_OV_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_u8s, s390_vsrl, 0, BT_OV_UV16QI_UV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_s8q, s390_vsrl, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_s8h, s390_vsrl, 0, BT_OV_V16QI_V16QI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_s8s, s390_vsrl, 0, BT_OV_V16QI_V16QI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_b8q, s390_vsrl, 0, BT_OV_BV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_b8h, s390_vsrl, 0, BT_OV_BV16QI_BV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_b8s, s390_vsrl, 0, BT_OV_BV16QI_BV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_u16q, s390_vsrl, 0, BT_OV_UV8HI_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_u16h, s390_vsrl, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_u16s, s390_vsrl, 0, BT_OV_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_s16q, s390_vsrl, 0, BT_OV_V8HI_V8HI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_s16h, s390_vsrl, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_s16s, s390_vsrl, 0, BT_OV_V8HI_V8HI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_b16q, s390_vsrl, 0, BT_OV_BV8HI_BV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_b16h, s390_vsrl, 0, BT_OV_BV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_b16s, s390_vsrl, 0, BT_OV_BV8HI_BV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_u32q, s390_vsrl, 0, BT_OV_UV4SI_UV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_u32h, s390_vsrl, 0, BT_OV_UV4SI_UV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_u32s, s390_vsrl, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_s32q, s390_vsrl, 0, BT_OV_V4SI_V4SI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_s32h, s390_vsrl, 0, BT_OV_V4SI_V4SI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_s32s, s390_vsrl, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_b32q, s390_vsrl, 0, BT_OV_BV4SI_BV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_b32h, s390_vsrl, 0, BT_OV_BV4SI_BV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_b32s, s390_vsrl, 0, BT_OV_BV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_u64q, s390_vsrl, 0, BT_OV_UV2DI_UV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_u64h, s390_vsrl, 0, BT_OV_UV2DI_UV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_u64s, s390_vsrl, 0, BT_OV_UV2DI_UV2DI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_s64q, s390_vsrl, 0, BT_OV_V2DI_V2DI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_s64h, s390_vsrl, 0, BT_OV_V2DI_V2DI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_s64s, s390_vsrl, 0, BT_OV_V2DI_V2DI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_b64q, s390_vsrl, 0, BT_OV_BV2DI_BV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_b64h, s390_vsrl, 0, BT_OV_BV2DI_BV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_b64s, s390_vsrl, 0, BT_OV_BV2DI_BV2DI_UV4SI) ++ ++B_DEF (s390_vsrl, vec_srlv16qiv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_srb, s390_vec_srb_u8_u8, s390_vec_srb_dbl_s64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_srb_u8_u8, s390_vsrlb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srb_u8_s8, s390_vsrlb, 0, BT_OV_UV16QI_UV16QI_V16QI) ++OB_DEF_VAR (s390_vec_srb_s8_u8, s390_vsrlb, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srb_s8_s8, s390_vsrlb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_srb_u16_u16, s390_vsrlb, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srb_u16_s16, s390_vsrlb, 0, BT_OV_UV8HI_UV8HI_V8HI) ++OB_DEF_VAR (s390_vec_srb_s16_u16, s390_vsrlb, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srb_s16_s16, s390_vsrlb, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_srb_u32_u32, s390_vsrlb, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srb_u32_s32, s390_vsrlb, 0, BT_OV_UV4SI_UV4SI_V4SI) ++OB_DEF_VAR (s390_vec_srb_s32_u32, s390_vsrlb, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srb_s32_s32, s390_vsrlb, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_srb_u64_u64, s390_vsrlb, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_srb_u64_s64, s390_vsrlb, 0, BT_OV_UV2DI_UV2DI_V2DI) ++OB_DEF_VAR (s390_vec_srb_s64_u64, s390_vsrlb, 0, BT_OV_V2DI_V2DI_UV2DI) ++OB_DEF_VAR (s390_vec_srb_s64_s64, s390_vsrlb, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_srb_dbl_u64, s390_vsrlb, 0, BT_OV_V2DF_V2DF_UV2DI) ++OB_DEF_VAR (s390_vec_srb_dbl_s64, s390_vsrlb, 0, BT_OV_V2DF_V2DF_V2DI) ++ ++B_DEF (s390_vsrlb, vec_srbv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vsq, vec_sub_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_subc, s390_vec_subc_u8, s390_vec_subc_u64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_subc_u8, s390_vscbib, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_subc_u16, s390_vscbih, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_subc_u32, s390_vscbif, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_subc_u64, s390_vscbig, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++ ++B_DEF (s390_vscbib, vec_subcv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vscbih, vec_subcv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vscbif, vec_subcv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vscbig, vec_subcv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++B_DEF (s390_vscbiq, vec_subc_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vsbiq, vec_sube_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vsbcbiq, vec_subec_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_sum2, s390_vec_sum2_u16, s390_vec_sum2_u32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sum2_u16, s390_vsumgh, 0, BT_OV_UV2DI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sum2_u32, s390_vsumgf, 0, BT_OV_UV2DI_UV4SI_UV4SI) ++ ++B_DEF (s390_vsumgh, vec_sum2v8hi, 0, B_VX, 0, BT_FN_UV2DI_UV8HI_UV8HI) ++B_DEF (s390_vsumgf, vec_sum2v4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_sum_u128, s390_vec_sum_u128_u32,s390_vec_sum_u128_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sum_u128_u32, s390_vsumqf, 0, BT_OV_UV16QI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sum_u128_u64, s390_vsumqg, 0, BT_OV_UV16QI_UV2DI_UV2DI) ++ ++B_DEF (s390_vsumqf, vec_sum_u128v4si, 0, B_VX, 0, BT_FN_UV16QI_UV4SI_UV4SI) ++B_DEF (s390_vsumqg, vec_sum_u128v2di, 0, B_VX, 0, BT_FN_UV16QI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_sum4, s390_vec_sum4_u8, s390_vec_sum4_u16, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sum4_u8, s390_vsumb, 0, BT_OV_UV4SI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sum4_u16, s390_vsumh, 0, BT_OV_UV4SI_UV8HI_UV8HI) ++ ++B_DEF (s390_vsumb, vec_sum4v16qi, 0, B_VX, 0, BT_FN_UV4SI_UV16QI_UV16QI) ++B_DEF (s390_vsumh, vec_sum4v8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI) ++ ++OB_DEF (s390_vec_test_mask, s390_vec_test_mask_s8,s390_vec_test_mask_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_test_mask_s8, s390_vtm, 0, BT_OV_INT_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_test_mask_u8, s390_vtm, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_test_mask_s16, s390_vtm, 0, BT_OV_INT_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_test_mask_u16, s390_vtm, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_test_mask_s32, s390_vtm, 0, BT_OV_INT_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_test_mask_u32, s390_vtm, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_test_mask_s64, s390_vtm, 0, BT_OV_INT_V2DI_UV2DI) ++OB_DEF_VAR (s390_vec_test_mask_u64, s390_vtm, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_test_mask_dbl, s390_vtm, 0, BT_OV_INT_V2DF_UV2DI) ++ ++B_DEF (s390_vtm, vec_test_mask_intv16qi,0, B_VX, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (s390_vfaeb, vfaev16qi, 0, B_VX, O3_U4, BT_FN_UV16QI_UV16QI_UV16QI_INT) ++B_DEF (s390_vfaeh, vfaev8hi, 0, B_VX, O3_U4, BT_FN_UV8HI_UV8HI_UV8HI_INT) ++B_DEF (s390_vfaef, vfaev4si, 0, B_VX, O3_U4, BT_FN_UV4SI_UV4SI_UV4SI_INT) ++B_DEF (s390_vfaezb, vfaezv16qi, 0, B_VX, O3_U4, BT_FN_UV16QI_UV16QI_UV16QI_INT) ++B_DEF (s390_vfaezh, vfaezv8hi, 0, B_VX, O3_U4, BT_FN_UV8HI_UV8HI_UV8HI_INT) ++B_DEF (s390_vfaezf, vfaezv4si, 0, B_VX, O3_U4, BT_FN_UV4SI_UV4SI_UV4SI_INT) ++B_DEF (s390_vfaebs, vfaesv16qi, 0, B_VX, O3_U4, BT_FN_UV16QI_UV16QI_UV16QI_INT_INTPTR) ++B_DEF (s390_vfaehs, vfaesv8hi, 0, B_VX, O3_U4, BT_FN_UV8HI_UV8HI_UV8HI_INT_INTPTR) ++B_DEF (s390_vfaefs, vfaesv4si, 0, B_VX, O3_U4, BT_FN_UV4SI_UV4SI_UV4SI_INT_INTPTR) ++B_DEF (s390_vfaezbs, vfaezsv16qi, 0, B_VX, O3_U4, BT_FN_UV16QI_UV16QI_UV16QI_INT_INTPTR) ++B_DEF (s390_vfaezhs, vfaezsv8hi, 0, B_VX, O3_U4, BT_FN_UV8HI_UV8HI_UV8HI_INT_INTPTR) ++B_DEF (s390_vfaezfs, vfaezsv4si, 0, B_VX, O3_U4, BT_FN_UV4SI_UV4SI_UV4SI_INT_INTPTR) ++ ++OB_DEF (s390_vec_find_any_eq_idx, s390_vfaeb_idx_s8, s390_vfaef_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaeb_idx_s8, s390_vfaeb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaeb_idx_u8a, s390_vfaeb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaeb_idx_u8b, s390_vfaeb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaeh_idx_s16, s390_vfaeh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaeh_idx_u16a, s390_vfaeh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaeh_idx_u16b, s390_vfaeh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaef_idx_s32, s390_vfaef, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaef_idx_u32a, s390_vfaef, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaef_idx_u32b, s390_vfaef, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_ne_idx, s390_vfaeb_inv_idx_s8,s390_vfaef_inv_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaeb_inv_idx_s8, s390_vfaeb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaeb_inv_idx_u8a, s390_vfaeb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaeb_inv_idx_u8b, s390_vfaeb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaeh_inv_idx_s16, s390_vfaeh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaeh_inv_idx_u16a, s390_vfaeh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaeh_inv_idx_u16b, s390_vfaeh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaef_inv_idx_s32, s390_vfaef, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaef_inv_idx_u32a, s390_vfaef, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaef_inv_idx_u32b, s390_vfaef, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_eq_or_0_idx,s390_vfaezb_idx_s8,s390_vfaezf_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaezb_idx_s8, s390_vfaezb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaezb_idx_u8a, s390_vfaezb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaezb_idx_u8b, s390_vfaezb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaezh_idx_s16, s390_vfaezh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaezh_idx_u16a, s390_vfaezh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaezh_idx_u16b, s390_vfaezh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaezf_idx_s32, s390_vfaezf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaezf_idx_u32a, s390_vfaezf, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaezf_idx_u32b, s390_vfaezf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_ne_or_0_idx,s390_vfaezb_inv_idx_s8,s390_vfaezf_inv_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaezb_inv_idx_s8, s390_vfaezb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaezb_inv_idx_u8a, s390_vfaezb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaezb_inv_idx_u8b, s390_vfaezb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaezh_inv_idx_s16, s390_vfaezh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaezh_inv_idx_u16a, s390_vfaezh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaezh_inv_idx_u16b, s390_vfaezh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaezf_inv_idx_s32, s390_vfaezf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaezf_inv_idx_u32a, s390_vfaezf, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaezf_inv_idx_u32b, s390_vfaezf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_eq, s390_vfaeb_s8, s390_vfaef_b32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaeb_s8, s390_vfaeb, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaeb_u8, s390_vfaeb, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaeb_b8, s390_vfaeb, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaeh_s16, s390_vfaeh, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaeh_u16, s390_vfaeh, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaeh_b16, s390_vfaeh, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaef_s32, s390_vfaef, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaef_u32, s390_vfaef, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaef_b32, s390_vfaef, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_ne, s390_vfaeb_inv_s8, s390_vfaef_inv_b32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaeb_inv_s8, s390_vfaeb, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaeb_inv_u8, s390_vfaeb, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaeb_inv_b8, s390_vfaeb, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaeh_inv_s16, s390_vfaeh, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaeh_inv_u16, s390_vfaeh, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaeh_inv_b16, s390_vfaeh, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaef_inv_s32, s390_vfaef, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaef_inv_u32, s390_vfaef, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaef_inv_b32, s390_vfaef, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_eq_idx_cc,s390_vfaebs_idx_s8, s390_vfaefs_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_idx_s8, s390_vfaebs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_idx_u8a, s390_vfaebs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_idx_u8b, s390_vfaebs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_idx_s16, s390_vfaehs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_idx_u16a, s390_vfaehs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_idx_u16b, s390_vfaehs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_idx_s32, s390_vfaefs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_idx_u32a, s390_vfaefs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_idx_u32b, s390_vfaefs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_find_any_ne_idx_cc,s390_vfaebs_inv_idx_s8,s390_vfaefs_inv_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_idx_s8, s390_vfaebs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_idx_u8a, s390_vfaebs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_idx_u8b, s390_vfaebs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_idx_s16, s390_vfaehs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_idx_u16a, s390_vfaehs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_idx_u16b, s390_vfaehs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_idx_s32, s390_vfaefs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_idx_u32a, s390_vfaefs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_idx_u32b, s390_vfaefs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_find_any_eq_or_0_idx_cc,s390_vfaezbs_idx_s8,s390_vfaezfs_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_idx_s8, s390_vfaezbs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_idx_u8a, s390_vfaezbs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_idx_u8b, s390_vfaezbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_idx_s16, s390_vfaezhs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_idx_u16a, s390_vfaezhs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_idx_u16b, s390_vfaezhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_idx_s32, s390_vfaezfs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_idx_u32a, s390_vfaezfs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_idx_u32b, s390_vfaezfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_find_any_ne_or_0_idx_cc,s390_vfaezbs_inv_idx_s8,s390_vfaezfs_inv_idx_u32b,B_VX,BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_inv_idx_s8, s390_vfaezbs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_inv_idx_u8a, s390_vfaezbs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_inv_idx_u8b, s390_vfaezbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_inv_idx_s16, s390_vfaezhs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_inv_idx_u16a, s390_vfaezhs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_inv_idx_u16b, s390_vfaezhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_inv_idx_s32, s390_vfaezfs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_inv_idx_u32a, s390_vfaezfs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_inv_idx_u32b, s390_vfaezfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_find_any_eq_cc, s390_vfaebs_s8, s390_vfaefs_b32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_s8, s390_vfaebs, 0, BT_OV_BV16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_u8, s390_vfaebs, 0, BT_OV_BV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_b8, s390_vfaebs, 0, BT_OV_BV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_s16, s390_vfaehs, 0, BT_OV_BV8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_u16, s390_vfaehs, 0, BT_OV_BV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_b16, s390_vfaehs, 0, BT_OV_BV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_s32, s390_vfaefs, 0, BT_OV_BV4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_u32, s390_vfaefs, 0, BT_OV_BV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_b32, s390_vfaefs, 0, BT_OV_BV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_find_any_ne_cc, s390_vfaebs_inv_s8, s390_vfaefs_inv_b32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_s8, s390_vfaebs, 0, BT_OV_BV16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_u8, s390_vfaebs, 0, BT_OV_BV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_b8, s390_vfaebs, 0, BT_OV_BV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_s16, s390_vfaehs, 0, BT_OV_BV8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_u16, s390_vfaehs, 0, BT_OV_BV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_b16, s390_vfaehs, 0, BT_OV_BV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_s32, s390_vfaefs, 0, BT_OV_BV4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_u32, s390_vfaefs, 0, BT_OV_BV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_b32, s390_vfaefs, 0, BT_OV_BV4SI_UV4SI_UV4SI_INTPTR) ++ ++B_DEF (s390_vfeeb, vfeev16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vfeeh, vfeev8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vfeef, vfeev4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vfeezb, vfeezv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vfeezh, vfeezv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vfeezf, vfeezv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vfeebs, vfeesv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vfeehs, vfeesv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vfeefs, vfeesv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_INTPTR) ++B_DEF (s390_vfeezbs, vfeezsv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vfeezhs, vfeezsv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vfeezfs, vfeezsv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpeq_idx, s390_vfeeb_s8, s390_vfeef_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfeeb_s8, s390_vfeeb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfeeb_u8a, s390_vfeeb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfeeb_u8b, s390_vfeeb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfeeh_s16, s390_vfeeh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfeeh_u16a, s390_vfeeh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfeeh_u16b, s390_vfeeh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfeef_s32, s390_vfeef, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfeef_u32a, s390_vfeef, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfeef_u32b, s390_vfeef, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpeq_or_0_idx, s390_vfeezb_s8, s390_vfeezf_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfeezb_s8, s390_vfeezb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfeezb_u8a, s390_vfeezb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfeezb_u8b, s390_vfeezb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfeezh_s16, s390_vfeezh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfeezh_u16a, s390_vfeezh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfeezh_u16b, s390_vfeezh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfeezf_s32, s390_vfeezf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfeezf_u32a, s390_vfeezf, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfeezf_u32b, s390_vfeezf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpeq_idx_cc, s390_vfeebs_s8, s390_vfeefs_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfeebs_s8, s390_vfeebs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfeebs_u8a, s390_vfeebs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfeebs_u8b, s390_vfeebs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfeehs_s16, s390_vfeehs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfeehs_u16a, s390_vfeehs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfeehs_u16b, s390_vfeehs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfeefs_s32, s390_vfeefs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfeefs_u32a, s390_vfeefs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfeefs_u32b, s390_vfeefs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpeq_or_0_idx_cc, s390_vfeezbs_s8, s390_vfeezfs_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfeezbs_s8, s390_vfeezbs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfeezbs_u8a, s390_vfeezbs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfeezbs_u8b, s390_vfeezbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfeezhs_s16, s390_vfeezhs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfeezhs_u16a, s390_vfeezhs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfeezhs_u16b, s390_vfeezhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfeezfs_s32, s390_vfeezfs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfeezfs_u32a, s390_vfeezfs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfeezfs_u32b, s390_vfeezfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++B_DEF (s390_vfeneb, vfenev16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vfeneh, vfenev8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vfenef, vfenev4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vfenezb, vfenezv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vfenezh, vfenezv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vfenezf, vfenezv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vfenebs, vfenesv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vfenehs, vfenesv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vfenefs, vfenesv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_INTPTR) ++B_DEF (s390_vfenezbs, vfenezsv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vfenezhs, vfenezsv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vfenezfs, vfenezsv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpne_idx, s390_vfeneb_s8, s390_vfenef_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfeneb_s8, s390_vfeneb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfeneb_u8a, s390_vfeneb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfeneb_u8b, s390_vfeneb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfeneh_s16, s390_vfeneh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfeneh_u16a, s390_vfeneh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfeneh_u16b, s390_vfeneh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfenef_s32, s390_vfenef, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfenef_u32a, s390_vfenef, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfenef_u32b, s390_vfenef, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpne_or_0_idx, s390_vfenezb_s8, s390_vfenezf_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfenezb_s8, s390_vfenezb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfenezb_u8a, s390_vfenezb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfenezb_u8b, s390_vfenezb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfenezh_s16, s390_vfenezh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfenezh_u16a, s390_vfenezh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfenezh_u16b, s390_vfenezh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfenezf_s32, s390_vfenezf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfenezf_u32a, s390_vfenezf, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfenezf_u32b, s390_vfenezf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpne_idx_cc, s390_vfenebs_s8, s390_vfenefs_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfenebs_s8, s390_vfenebs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfenebs_u8a, s390_vfenebs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfenebs_u8b, s390_vfenebs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfenehs_s16, s390_vfenehs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfenehs_u16a, s390_vfenehs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfenehs_u16b, s390_vfenehs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfenefs_s32, s390_vfenefs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfenefs_u32a, s390_vfenefs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfenefs_u32b, s390_vfenefs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpne_or_0_idx_cc, s390_vfenezbs_s8, s390_vfenezfs_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfenezbs_s8, s390_vfenezbs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfenezbs_u8a, s390_vfenezbs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfenezbs_u8b, s390_vfenezbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfenezhs_s16, s390_vfenezhs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfenezhs_u16a, s390_vfenezhs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfenezhs_u16b, s390_vfenezhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfenezfs_s32, s390_vfenezfs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfenezfs_u32a, s390_vfenezfs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfenezfs_u32b, s390_vfenezfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++B_DEF (s390_vistrb, vistrv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI) ++B_DEF (s390_vistrh, vistrv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI) ++B_DEF (s390_vistrf, vistrv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI) ++B_DEF (s390_vistrbs, vistrsv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vistrhs, vistrsv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vistrfs, vistrsv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cp_until_zero, s390_vistrb_s8, s390_vistrf_u32, B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vistrb_s8, s390_vistrb, 0, BT_OV_V16QI_V16QI) ++OB_DEF_VAR (s390_vistrb_b8, s390_vistrb, 0, BT_OV_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vistrb_u8, s390_vistrb, 0, BT_OV_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vistrh_s16, s390_vistrh, 0, BT_OV_V8HI_V8HI) ++OB_DEF_VAR (s390_vistrh_b16, s390_vistrh, 0, BT_OV_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vistrh_u16, s390_vistrh, 0, BT_OV_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vistrf_s32, s390_vistrf, 0, BT_OV_V4SI_V4SI) ++OB_DEF_VAR (s390_vistrf_b32, s390_vistrf, 0, BT_OV_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vistrf_u32, s390_vistrf, 0, BT_OV_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cp_until_zero_cc, s390_vistrbs_s8, s390_vistrfs_u32, B_VX, BT_FN_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vistrbs_s8, s390_vistrbs, 0, BT_OV_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vistrbs_b8, s390_vistrbs, 0, BT_OV_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vistrbs_u8, s390_vistrbs, 0, BT_OV_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vistrhs_s16, s390_vistrhs, 0, BT_OV_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vistrhs_b16, s390_vistrhs, 0, BT_OV_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vistrhs_u16, s390_vistrhs, 0, BT_OV_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vistrfs_s32, s390_vistrfs, 0, BT_OV_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vistrfs_b32, s390_vistrfs, 0, BT_OV_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vistrfs_u32, s390_vistrfs, 0, BT_OV_UV4SI_UV4SI_INTPTR) ++ ++B_DEF (s390_vstrcb, vstrcv16qi, 0, B_VX, O4_U4, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT) ++B_DEF (s390_vstrch, vstrcv8hi, 0, B_VX, O4_U4, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT) ++B_DEF (s390_vstrcf, vstrcv4si, 0, B_VX, O4_U4, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT) ++B_DEF (s390_vstrczb, vstrczv16qi, 0, B_VX, O4_U4, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT) ++B_DEF (s390_vstrczh, vstrczv8hi, 0, B_VX, O4_U4, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT) ++B_DEF (s390_vstrczf, vstrczv4si, 0, B_VX, O4_U4, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT) ++B_DEF (s390_vstrcbs, vstrcsv16qi, 0, B_VX, O4_U4, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT_INTPTR) ++B_DEF (s390_vstrchs, vstrcsv8hi, 0, B_VX, O4_U4, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT_INTPTR) ++B_DEF (s390_vstrcfs, vstrcsv4si, 0, B_VX, O4_U4, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT_INTPTR) ++B_DEF (s390_vstrczbs, vstrczsv16qi, 0, B_VX, O4_U4, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT_INTPTR) ++B_DEF (s390_vstrczhs, vstrczsv8hi, 0, B_VX, O4_U4, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT_INTPTR) ++B_DEF (s390_vstrczfs, vstrczsv4si, 0, B_VX, O4_U4, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT_INTPTR) ++ ++OB_DEF (s390_vec_cmprg_idx, s390_vstrcb_idx_u8, s390_vstrcf_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrcb_idx_u8, s390_vstrcb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrch_idx_u16, s390_vstrch, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrcf_idx_u32, s390_vstrcf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpnrg_idx, s390_vstrcb_inv_idx_u8,s390_vstrcf_inv_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrcb_inv_idx_u8, s390_vstrcb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrch_inv_idx_u16, s390_vstrch, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrcf_inv_idx_u32, s390_vstrcf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmprg_or_0_idx, s390_vstrczb_idx_u8,s390_vstrczf_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrczb_idx_u8, s390_vstrczb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrczh_idx_u16, s390_vstrczh, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrczf_idx_u32, s390_vstrczf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpnrg_or_0_idx, s390_vstrczb_inv_idx_u8,s390_vstrczf_inv_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrczb_inv_idx_u8, s390_vstrczb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrczh_inv_idx_u16, s390_vstrczh, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrczf_inv_idx_u32, s390_vstrczf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmprg, s390_vstrcb_u8, s390_vstrcf_u32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrcb_u8, s390_vstrcb, 0, BT_OV_BV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrch_u16, s390_vstrch, 0, BT_OV_BV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrcf_u32, s390_vstrcf, 0, BT_OV_BV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpnrg, s390_vstrcb_inv_u8, s390_vstrcf_inv_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrcb_inv_u8, s390_vstrcb, 0, BT_OV_BV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrch_inv_u16, s390_vstrch, 0, BT_OV_BV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrcf_inv_u32, s390_vstrcf, 0, BT_OV_BV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmprg_idx_cc, s390_vstrcbs_idx_u8,s390_vstrcfs_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrcbs_idx_u8, s390_vstrcbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vstrchs_idx_u16, s390_vstrchs, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vstrcfs_idx_u32, s390_vstrcfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpnrg_idx_cc, s390_vstrcbs_inv_idx_u8,s390_vstrcfs_inv_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrcbs_inv_idx_u8, s390_vstrcbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR) /* vstrcb */ ++OB_DEF_VAR (s390_vstrchs_inv_idx_u16, s390_vstrchs, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR) /* vstrch */ ++OB_DEF_VAR (s390_vstrcfs_inv_idx_u32, s390_vstrcfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR) /* vstrcf */ ++ ++OB_DEF (s390_vec_cmprg_or_0_idx_cc, s390_vstrczbs_idx_u8,s390_vstrczfs_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrczbs_idx_u8, s390_vstrczbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vstrczhs_idx_u16, s390_vstrczhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vstrczfs_idx_u32, s390_vstrczfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpnrg_or_0_idx_cc,s390_vstrczbs_inv_idx_u8,s390_vstrczfs_inv_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrczbs_inv_idx_u8, s390_vstrczbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vstrczhs_inv_idx_u16, s390_vstrczhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vstrczfs_inv_idx_u32, s390_vstrczfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmprg_cc, s390_vstrcbs_u8, s390_vstrcfs_u32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrcbs_u8, s390_vstrcbs, 0, BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vstrchs_u16, s390_vstrchs, 0, BT_OV_BV8HI_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vstrcfs_u32, s390_vstrcfs, 0, BT_OV_BV4SI_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpnrg_cc, s390_vstrcbs_inv_u8,s390_vstrcfs_inv_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrcbs_inv_u8, s390_vstrcbs, 0, BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vstrchs_inv_u16, s390_vstrchs, 0, BT_OV_BV8HI_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vstrcfs_inv_u32, s390_vstrcfs, 0, BT_OV_BV4SI_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++B_DEF (s390_vec_all_nge, vec_all_unltv2df, 0, B_VX, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (s390_vec_all_ngt, vec_all_unlev2df, 0, B_VX, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (s390_vec_any_nge, vec_any_unltv2df, 0, B_VX, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (s390_vec_any_ngt, vec_any_unlev2df, 0, B_VX, 0, BT_FN_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_ctd, s390_vec_ctd_s64, s390_vec_ctd_u64, B_VX, BT_FN_V2DF_UV4SI_INT) ++OB_DEF_VAR (s390_vec_ctd_s64, s390_vec_ctd_s64, O2_U5, BT_OV_V2DF_V2DI_INT) /* vcdgb */ ++OB_DEF_VAR (s390_vec_ctd_u64, s390_vec_ctd_u64, O2_U5, BT_OV_V2DF_UV2DI_INT) /* vcdlgb */ ++ ++B_DEF (s390_vec_ctd_s64, vec_ctd_s64, 0, B_VX, O2_U3, BT_FN_V2DF_V2DI_INT) /* vcdgb */ ++B_DEF (s390_vec_ctd_u64, vec_ctd_u64, 0, B_VX, O2_U3, BT_FN_V2DF_UV2DI_INT) /* vcdlgb */ ++B_DEF (s390_vcdgb, vec_di_to_df_s64, 0, B_VX, O2_U3, BT_FN_V2DF_V2DI_INT) /* vcdgb */ ++B_DEF (s390_vcdlgb, vec_di_to_df_u64, 0, B_VX, O2_U3, BT_FN_V2DF_UV2DI_INT) /* vcdlgb */ ++B_DEF (s390_vec_ctsl, vec_ctsl, 0, B_VX, O2_U3, BT_FN_V2DI_V2DF_INT) /* vcgdb */ ++B_DEF (s390_vec_ctul, vec_ctul, 0, B_VX, O2_U3, BT_FN_UV2DI_V2DF_INT) /* vclgdb */ ++B_DEF (s390_vcgdb, vec_df_to_di_s64, 0, B_VX, O2_U3, BT_FN_V2DI_V2DF_INT) /* vcgdb */ ++B_DEF (s390_vclgdb, vec_df_to_di_u64, 0, B_VX, O2_U3, BT_FN_UV2DI_V2DF_INT) /* vclgdb */ ++B_DEF (s390_vfidb, vfidb, 0, B_VX, O2_U4 | O3_U3, BT_FN_V2DF_V2DF_UCHAR_UCHAR) ++B_DEF (s390_vec_ld2f, vec_ld2f, 0, B_VX, 0, BT_FN_V2DF_FLTCONSTPTR) /* vldeb */ ++B_DEF (s390_vec_st2f, vec_st2f, 0, B_VX, 0, BT_FN_VOID_V2DF_FLTPTR) /* vledb */ ++B_DEF (s390_vfmadb, fmav2df4, 0, B_VX, 0, BT_FN_V2DF_V2DF_V2DF_V2DF) ++B_DEF (s390_vfmsdb, fmsv2df4, 0, B_VX, 0, BT_FN_V2DF_V2DF_V2DF_V2DF) ++B_DEF (s390_vflndb, vec_nabs, 0, B_VX, 0, BT_FN_V2DF_V2DF) ++B_DEF (s390_vfsqdb, sqrtv2df2, 0, B_VX, 0, BT_FN_V2DF_V2DF) ++B_DEF (s390_vftcidb, vftcidb, 0, B_VX, O2_U12, BT_FN_V2DI_V2DF_INT_INTPTR) +--- gcc/config/s390/s390-builtins.h 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/s390-builtins.h 2016-05-11 17:33:27.000000000 +0200 +@@ -0,0 +1,175 @@ ++/* Common data structures used for builtin handling on S/390. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ ++ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com). ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++/* This files contains data structure definitions which can be used by ++ s390-builtins.c as well as s390-c.c. Since the latter is ++ considered to be part of the front-end we have to be careful not ++ to use any of tree and rtx like data structures. */ ++ ++/* Builtin types, data and prototypes. */ ++ ++enum s390_builtin_type_index ++{ ++#undef DEF_TYPE ++#undef DEF_POINTER_TYPE ++#undef DEF_DISTINCT_TYPE ++#undef DEF_VECTOR_TYPE ++#undef DEF_OPAQUE_VECTOR_TYPE ++#undef DEF_FN_TYPE ++#undef DEF_OV_TYPE ++#define DEF_TYPE(INDEX, ...) INDEX, ++#define DEF_POINTER_TYPE(INDEX, ...) INDEX, ++#define DEF_DISTINCT_TYPE(INDEX, ...) INDEX, ++#define DEF_VECTOR_TYPE(INDEX, ...) INDEX, ++#define DEF_OPAQUE_VECTOR_TYPE(INDEX, ...) INDEX, ++#define DEF_FN_TYPE(...) ++#define DEF_OV_TYPE(...) ++#include "s390-builtin-types.def" ++ BT_MAX ++}; ++ ++enum s390_builtin_fn_type_index ++{ ++#undef DEF_TYPE ++#undef DEF_POINTER_TYPE ++#undef DEF_DISTINCT_TYPE ++#undef DEF_VECTOR_TYPE ++#undef DEF_OPAQUE_VECTOR_TYPE ++#undef DEF_FN_TYPE ++#undef DEF_OV_TYPE ++#define DEF_TYPE(...) ++#define DEF_POINTER_TYPE(...) ++#define DEF_DISTINCT_TYPE(...) ++#define DEF_VECTOR_TYPE(...) ++#define DEF_OPAQUE_VECTOR_TYPE(...) ++#define DEF_FN_TYPE(INDEX, ...) INDEX, ++#define DEF_OV_TYPE(...) ++#include "s390-builtin-types.def" ++ BT_FN_MAX ++}; ++ ++enum s390_builtin_ov_type_index ++{ ++#undef DEF_TYPE ++#undef DEF_POINTER_TYPE ++#undef DEF_DISTINCT_TYPE ++#undef DEF_VECTOR_TYPE ++#undef DEF_OPAQUE_VECTOR_TYPE ++#undef DEF_FN_TYPE ++#undef DEF_OV_TYPE ++#define DEF_TYPE(...) ++#define DEF_POINTER_TYPE(...) ++#define DEF_DISTINCT_TYPE(...) ++#define DEF_VECTOR_TYPE(...) ++#define DEF_OPAQUE_VECTOR_TYPE(...) ++#define DEF_FN_TYPE(...) ++#define DEF_OV_TYPE(INDEX, ...) INDEX, ++#include "s390-builtin-types.def" ++ BT_OV_MAX ++}; ++ ++#define MAX_OV_OPERANDS 6 ++ ++extern tree s390_builtin_types[BT_MAX]; ++extern tree s390_builtin_fn_types[BT_FN_MAX]; ++ ++ /* Builtins. */ ++ ++enum s390_builtins { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(NAME, ...) S390_BUILTIN_##NAME, ++#define OB_DEF(...) ++#define OB_DEF_VAR(...) ++ ++#include "s390-builtins.def" ++ S390_BUILTIN_MAX ++}; ++ ++ ++/* Generate an enumeration of all overloaded builtins defined with ++ OB_DEF in s390-builtins.def. */ ++enum s390_overloaded_builtins { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(NAME, ...) S390_OVERLOADED_BUILTIN_##NAME, ++#define OB_DEF_VAR(...) ++#include "s390-builtins.def" ++S390_OVERLOADED_BUILTIN_MAX ++}; ++ ++/* Generate an enumeration of all variants of overloaded builtins ++ defined with OB_DEF_VAR in s390-builtins.def. */ ++enum s390_overloaded_builtin_vars { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(...) ++#define OB_DEF_VAR(NAME, ...) S390_OVERLOADED_BUILTIN_VAR_##NAME, ++#include "s390-builtins.def" ++S390_OVERLOADED_BUILTIN_VAR_MAX ++}; ++ ++#define S390_OVERLOADED_BUILTIN_OFFSET S390_BUILTIN_MAX ++#define S390_OVERLOADED_BUILTIN_VAR_OFFSET \ ++ (S390_BUILTIN_MAX + S390_OVERLOADED_BUILTIN_MAX) ++#define S390_ALL_BUILTIN_MAX \ ++ (S390_BUILTIN_MAX + S390_OVERLOADED_BUILTIN_MAX + \ ++ S390_OVERLOADED_BUILTIN_VAR_MAX) ++ ++extern const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1]; ++extern const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1]; ++ ++extern const unsigned int ++ bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1]; ++extern const unsigned int ++ opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1]; ++ ++static inline unsigned int ++bflags_for_builtin (int fcode) ++{ ++ if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET) ++ gcc_unreachable (); ++ else if (fcode >= S390_OVERLOADED_BUILTIN_OFFSET) ++ return bflags_overloaded_builtin[fcode - S390_BUILTIN_MAX]; ++ else ++ return bflags_builtin[fcode]; ++} ++ ++static inline unsigned int ++opflags_for_builtin (int fcode) ++{ ++ if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET) ++ return opflags_overloaded_builtin_var[fcode - ++ S390_OVERLOADED_BUILTIN_VAR_OFFSET]; ++ else if (fcode >= S390_OVERLOADED_BUILTIN_OFFSET) ++ gcc_unreachable (); ++ else ++ return opflags_builtin[fcode]; ++} ++ ++extern tree s390_builtin_decls[S390_BUILTIN_MAX + ++ S390_OVERLOADED_BUILTIN_MAX + ++ S390_OVERLOADED_BUILTIN_VAR_MAX]; +--- gcc/config/s390/s390-builtin-types.def 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/s390-builtin-types.def 2016-05-11 17:53:39.000000000 +0200 +@@ -0,0 +1,755 @@ ++/* Builtin type definitions for IBM S/390 and zSeries ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ ++ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com). ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#define DEF_FN_TYPE_1(FN_TYPE, FLAGS, T1) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1]) ++#define DEF_FN_TYPE_2(FN_TYPE, FLAGS, T1, T2) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1], \ ++ s390_builtin_types[T2]) ++#define DEF_FN_TYPE_3(FN_TYPE, FLAGS, T1, T2, T3) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1], \ ++ s390_builtin_types[T2], \ ++ s390_builtin_types[T3]) ++#define DEF_FN_TYPE_4(FN_TYPE, FLAGS, T1, T2, T3, T4) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1], \ ++ s390_builtin_types[T2], \ ++ s390_builtin_types[T3], \ ++ s390_builtin_types[T4]) ++#define DEF_FN_TYPE_5(FN_TYPE, FLAGS, T1, T2, T3, T4, T5) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1], \ ++ s390_builtin_types[T2], \ ++ s390_builtin_types[T3], \ ++ s390_builtin_types[T4], \ ++ s390_builtin_types[T5]) ++#define DEF_FN_TYPE_6(FN_TYPE, FLAGS, T1, T2, T3, T4, T5, T6) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1], \ ++ s390_builtin_types[T2], \ ++ s390_builtin_types[T3], \ ++ s390_builtin_types[T4], \ ++ s390_builtin_types[T5], \ ++ s390_builtin_types[T6]) ++DEF_TYPE (BT_INT, B_HTM | B_VX, integer_type_node, 0) ++DEF_TYPE (BT_VOID, 0, void_type_node, 0) ++DEF_TYPE (BT_FLTCONST, B_VX, float_type_node, 1) ++DEF_TYPE (BT_UINT64, B_HTM, c_uint64_type_node, 0) ++DEF_TYPE (BT_FLT, B_VX, float_type_node, 0) ++DEF_TYPE (BT_UINT, 0, unsigned_type_node, 0) ++DEF_TYPE (BT_VOIDCONST, B_VX, void_type_node, 1) ++DEF_TYPE (BT_ULONG, B_VX, long_unsigned_type_node, 0) ++DEF_TYPE (BT_USHORTCONST, B_VX, short_unsigned_type_node, 1) ++DEF_TYPE (BT_SHORTCONST, B_VX, short_integer_type_node, 1) ++DEF_TYPE (BT_INTCONST, B_VX, integer_type_node, 1) ++DEF_TYPE (BT_UCHARCONST, B_VX, unsigned_char_type_node, 1) ++DEF_TYPE (BT_UCHAR, B_VX, unsigned_char_type_node, 0) ++DEF_TYPE (BT_SCHARCONST, B_VX, signed_char_type_node, 1) ++DEF_TYPE (BT_SHORT, B_VX, short_integer_type_node, 0) ++DEF_TYPE (BT_LONG, B_VX, long_integer_type_node, 0) ++DEF_TYPE (BT_SCHAR, B_VX, signed_char_type_node, 0) ++DEF_TYPE (BT_ULONGLONGCONST, B_VX, long_long_unsigned_type_node, 1) ++DEF_TYPE (BT_USHORT, B_VX, short_unsigned_type_node, 0) ++DEF_TYPE (BT_LONGLONG, B_VX, long_long_integer_type_node, 0) ++DEF_TYPE (BT_DBLCONST, B_VX, double_type_node, 1) ++DEF_TYPE (BT_ULONGLONG, B_VX, long_long_unsigned_type_node, 0) ++DEF_TYPE (BT_DBL, B_VX, double_type_node, 0) ++DEF_TYPE (BT_LONGLONGCONST, B_VX, long_long_integer_type_node, 1) ++DEF_TYPE (BT_UINTCONST, B_VX, unsigned_type_node, 1) ++DEF_VECTOR_TYPE (BT_UV2DI, B_VX, BT_ULONGLONG, 2) ++DEF_VECTOR_TYPE (BT_V4SI, B_VX, BT_INT, 4) ++DEF_VECTOR_TYPE (BT_V8HI, B_VX, BT_SHORT, 8) ++DEF_VECTOR_TYPE (BT_UV4SI, B_VX, BT_UINT, 4) ++DEF_VECTOR_TYPE (BT_V16QI, B_VX, BT_SCHAR, 16) ++DEF_VECTOR_TYPE (BT_V2DF, B_VX, BT_DBL, 2) ++DEF_VECTOR_TYPE (BT_V2DI, B_VX, BT_LONGLONG, 2) ++DEF_VECTOR_TYPE (BT_UV8HI, B_VX, BT_USHORT, 8) ++DEF_VECTOR_TYPE (BT_UV16QI, B_VX, BT_UCHAR, 16) ++DEF_POINTER_TYPE (BT_UCHARPTR, B_VX, BT_UCHAR) ++DEF_POINTER_TYPE (BT_DBLCONSTPTR, B_VX, BT_DBLCONST) ++DEF_POINTER_TYPE (BT_VOIDPTR, B_HTM | B_VX, BT_VOID) ++DEF_POINTER_TYPE (BT_FLTPTR, B_VX, BT_FLT) ++DEF_POINTER_TYPE (BT_UINT64PTR, B_HTM, BT_UINT64) ++DEF_POINTER_TYPE (BT_SCHARPTR, B_VX, BT_SCHAR) ++DEF_POINTER_TYPE (BT_UINTCONSTPTR, B_VX, BT_UINTCONST) ++DEF_POINTER_TYPE (BT_ULONGLONGCONSTPTR, B_VX, BT_ULONGLONGCONST) ++DEF_POINTER_TYPE (BT_LONGLONGCONSTPTR, B_VX, BT_LONGLONGCONST) ++DEF_POINTER_TYPE (BT_SHORTPTR, B_VX, BT_SHORT) ++DEF_POINTER_TYPE (BT_USHORTPTR, B_VX, BT_USHORT) ++DEF_POINTER_TYPE (BT_INTPTR, B_VX, BT_INT) ++DEF_POINTER_TYPE (BT_INTCONSTPTR, B_VX, BT_INTCONST) ++DEF_POINTER_TYPE (BT_LONGLONGPTR, B_VX, BT_LONGLONG) ++DEF_POINTER_TYPE (BT_ULONGLONGPTR, B_VX, BT_ULONGLONG) ++DEF_POINTER_TYPE (BT_DBLPTR, B_VX, BT_DBL) ++DEF_POINTER_TYPE (BT_VOIDCONSTPTR, B_VX, BT_VOIDCONST) ++DEF_POINTER_TYPE (BT_USHORTCONSTPTR, B_VX, BT_USHORTCONST) ++DEF_POINTER_TYPE (BT_SHORTCONSTPTR, B_VX, BT_SHORTCONST) ++DEF_POINTER_TYPE (BT_UCHARCONSTPTR, B_VX, BT_UCHARCONST) ++DEF_POINTER_TYPE (BT_FLTCONSTPTR, B_VX, BT_FLTCONST) ++DEF_POINTER_TYPE (BT_SCHARCONSTPTR, B_VX, BT_SCHARCONST) ++DEF_POINTER_TYPE (BT_UINTPTR, B_VX, BT_UINT) ++DEF_DISTINCT_TYPE (BT_BLONGLONG, B_VX, BT_ULONGLONG) ++DEF_DISTINCT_TYPE (BT_BINT, B_VX, BT_UINT) ++DEF_DISTINCT_TYPE (BT_BSHORT, B_VX, BT_USHORT) ++DEF_DISTINCT_TYPE (BT_BCHAR, B_VX, BT_UCHAR) ++DEF_OPAQUE_VECTOR_TYPE (BT_OV2DI, B_VX, BT_LONGLONG, 2) ++DEF_OPAQUE_VECTOR_TYPE (BT_BV16QI, B_VX, BT_BCHAR, 16) ++DEF_OPAQUE_VECTOR_TYPE (BT_OV4SI, B_VX, BT_INT, 4) ++DEF_OPAQUE_VECTOR_TYPE (BT_OUV4SI, B_VX, BT_UINT, 4) ++DEF_OPAQUE_VECTOR_TYPE (BT_BV4SI, B_VX, BT_BINT, 4) ++DEF_OPAQUE_VECTOR_TYPE (BT_BV2DI, B_VX, BT_BLONGLONG, 2) ++DEF_OPAQUE_VECTOR_TYPE (BT_BV8HI, B_VX, BT_BSHORT, 8) ++DEF_FN_TYPE_1 (BT_FN_INT, B_HTM, BT_INT) ++DEF_FN_TYPE_1 (BT_FN_UINT, 0, BT_UINT) ++DEF_FN_TYPE_2 (BT_FN_INT_INT, B_VX, BT_INT, BT_INT) ++DEF_FN_TYPE_2 (BT_FN_INT_VOIDPTR, B_HTM, BT_INT, BT_VOIDPTR) ++DEF_FN_TYPE_2 (BT_FN_OV4SI_INT, B_VX, BT_OV4SI, BT_INT) ++DEF_FN_TYPE_2 (BT_FN_OV4SI_INTCONSTPTR, B_VX, BT_OV4SI, BT_INTCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_OV4SI_OV4SI, B_VX, BT_OV4SI, BT_OV4SI) ++DEF_FN_TYPE_2 (BT_FN_UV16QI_UCHAR, B_VX, BT_UV16QI, BT_UCHAR) ++DEF_FN_TYPE_2 (BT_FN_UV16QI_UCHARCONSTPTR, B_VX, BT_UV16QI, BT_UCHARCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_UV16QI_USHORT, B_VX, BT_UV16QI, BT_USHORT) ++DEF_FN_TYPE_2 (BT_FN_UV16QI_UV16QI, B_VX, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_2 (BT_FN_UV2DI_ULONGLONG, B_VX, BT_UV2DI, BT_ULONGLONG) ++DEF_FN_TYPE_2 (BT_FN_UV2DI_ULONGLONGCONSTPTR, B_VX, BT_UV2DI, BT_ULONGLONGCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_UV2DI_USHORT, B_VX, BT_UV2DI, BT_USHORT) ++DEF_FN_TYPE_2 (BT_FN_UV2DI_UV2DI, B_VX, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_2 (BT_FN_UV2DI_UV4SI, B_VX, BT_UV2DI, BT_UV4SI) ++DEF_FN_TYPE_2 (BT_FN_UV4SI_UINT, B_VX, BT_UV4SI, BT_UINT) ++DEF_FN_TYPE_2 (BT_FN_UV4SI_UINTCONSTPTR, B_VX, BT_UV4SI, BT_UINTCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_UV4SI_USHORT, B_VX, BT_UV4SI, BT_USHORT) ++DEF_FN_TYPE_2 (BT_FN_UV4SI_UV4SI, B_VX, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_2 (BT_FN_UV4SI_UV8HI, B_VX, BT_UV4SI, BT_UV8HI) ++DEF_FN_TYPE_2 (BT_FN_UV8HI_USHORT, B_VX, BT_UV8HI, BT_USHORT) ++DEF_FN_TYPE_2 (BT_FN_UV8HI_USHORTCONSTPTR, B_VX, BT_UV8HI, BT_USHORTCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_UV8HI_UV16QI, B_VX, BT_UV8HI, BT_UV16QI) ++DEF_FN_TYPE_2 (BT_FN_UV8HI_UV8HI, B_VX, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_2 (BT_FN_V16QI_SCHAR, B_VX, BT_V16QI, BT_SCHAR) ++DEF_FN_TYPE_2 (BT_FN_V16QI_UCHAR, B_VX, BT_V16QI, BT_UCHAR) ++DEF_FN_TYPE_2 (BT_FN_V16QI_V16QI, B_VX, BT_V16QI, BT_V16QI) ++DEF_FN_TYPE_2 (BT_FN_V2DF_DBL, B_VX, BT_V2DF, BT_DBL) ++DEF_FN_TYPE_2 (BT_FN_V2DF_FLTCONSTPTR, B_VX, BT_V2DF, BT_FLTCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_V2DF_V2DF, B_VX, BT_V2DF, BT_V2DF) ++DEF_FN_TYPE_2 (BT_FN_V2DI_SHORT, B_VX, BT_V2DI, BT_SHORT) ++DEF_FN_TYPE_2 (BT_FN_V2DI_V16QI, B_VX, BT_V2DI, BT_V16QI) ++DEF_FN_TYPE_2 (BT_FN_V2DI_V2DI, B_VX, BT_V2DI, BT_V2DI) ++DEF_FN_TYPE_2 (BT_FN_V2DI_V4SI, B_VX, BT_V2DI, BT_V4SI) ++DEF_FN_TYPE_2 (BT_FN_V2DI_V8HI, B_VX, BT_V2DI, BT_V8HI) ++DEF_FN_TYPE_2 (BT_FN_V4SI_SHORT, B_VX, BT_V4SI, BT_SHORT) ++DEF_FN_TYPE_2 (BT_FN_V4SI_V4SI, B_VX, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_2 (BT_FN_V4SI_V8HI, B_VX, BT_V4SI, BT_V8HI) ++DEF_FN_TYPE_2 (BT_FN_V8HI_SHORT, B_VX, BT_V8HI, BT_SHORT) ++DEF_FN_TYPE_2 (BT_FN_V8HI_V16QI, B_VX, BT_V8HI, BT_V16QI) ++DEF_FN_TYPE_2 (BT_FN_V8HI_V8HI, B_VX, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_2 (BT_FN_VOID_INT, B_HTM, BT_VOID, BT_INT) ++DEF_FN_TYPE_2 (BT_FN_VOID_UINT, 0, BT_VOID, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_DBL_V2DF_INT, B_VX, BT_DBL, BT_V2DF, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_INT_OV4SI_INT, B_VX, BT_INT, BT_OV4SI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_INT_OV4SI_OV4SI, B_VX, BT_INT, BT_OV4SI, BT_OV4SI) ++DEF_FN_TYPE_3 (BT_FN_INT_UV16QI_UV16QI, B_VX, BT_INT, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_3 (BT_FN_INT_UV2DI_UV2DI, B_VX, BT_INT, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_3 (BT_FN_INT_UV4SI_UV4SI, B_VX, BT_INT, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_INT_UV8HI_UV8HI, B_VX, BT_INT, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_INT_V16QI_V16QI, B_VX, BT_INT, BT_V16QI, BT_V16QI) ++DEF_FN_TYPE_3 (BT_FN_INT_V2DF_V2DF, B_VX, BT_INT, BT_V2DF, BT_V2DF) ++DEF_FN_TYPE_3 (BT_FN_INT_V2DI_V2DI, B_VX, BT_INT, BT_V2DI, BT_V2DI) ++DEF_FN_TYPE_3 (BT_FN_INT_V4SI_V4SI, B_VX, BT_INT, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_3 (BT_FN_INT_V8HI_V8HI, B_VX, BT_INT, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_3 (BT_FN_INT_VOIDPTR_INT, B_HTM, BT_INT, BT_VOIDPTR, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_OV2DI_LONGLONG_LONGLONG, B_VX, BT_OV2DI, BT_LONGLONG, BT_LONGLONG) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_INTCONSTPTR_INT, B_VX, BT_OV4SI, BT_INTCONSTPTR, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_INTCONSTPTR_UINT, B_VX, BT_OV4SI, BT_INTCONSTPTR, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_INT_INT, B_VX, BT_OV4SI, BT_INT, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_INTPTR, B_VX, BT_OV4SI, BT_OV4SI, BT_INTPTR) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_OV4SI, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_UCHAR, B_VX, BT_OV4SI, BT_OV4SI, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_ULONG, B_VX, BT_OV4SI, BT_OV4SI, BT_ULONG) ++DEF_FN_TYPE_3 (BT_FN_UCHAR_UV16QI_INT, B_VX, BT_UCHAR, BT_UV16QI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UINT_UV4SI_INT, B_VX, BT_UINT, BT_UV4SI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UINT_VOIDCONSTPTR_INT, B_VX, BT_UINT, BT_VOIDCONSTPTR, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_ULONGLONG_UV2DI_INT, B_VX, BT_ULONGLONG, BT_UV2DI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_USHORT_UV8HI_INT, B_VX, BT_USHORT, BT_UV8HI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UCHARCONSTPTR_USHORT, B_VX, BT_UV16QI, BT_UCHARCONSTPTR, BT_USHORT) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UCHAR_INT, B_VX, BT_UV16QI, BT_UCHAR, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UCHAR_UCHAR, B_VX, BT_UV16QI, BT_UCHAR, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UCHAR, B_VX, BT_UV16QI, BT_UV16QI, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UINT, B_VX, BT_UV16QI, BT_UV16QI, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV2DI_UV2DI, B_VX, BT_UV16QI, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV4SI_UV4SI, B_VX, BT_UV16QI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV8HI_UV8HI, B_VX, BT_UV16QI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UCHAR_UCHAR, B_VX, BT_UV2DI, BT_UCHAR, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_ULONGLONG_INT, B_VX, BT_UV2DI, BT_ULONGLONG, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UCHAR, B_VX, BT_UV2DI, BT_UV2DI, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UINT, B_VX, BT_UV2DI, BT_UV2DI, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV4SI_UV4SI, B_VX, BT_UV2DI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV8HI_UV8HI, B_VX, BT_UV2DI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_V2DF_INT, B_VX, BT_UV2DI, BT_V2DF, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UCHAR_UCHAR, B_VX, BT_UV4SI, BT_UCHAR, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UINT_INT, B_VX, BT_UV4SI, BT_UINT, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV16QI_UV16QI, B_VX, BT_UV4SI, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV2DI_UV2DI, B_VX, BT_UV4SI, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UCHAR, B_VX, BT_UV4SI, BT_UV4SI, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UINT, B_VX, BT_UV4SI, BT_UV4SI, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UV4SI, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV8HI_UV8HI, B_VX, BT_UV4SI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UCHAR_UCHAR, B_VX, BT_UV8HI, BT_UCHAR, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_USHORT_INT, B_VX, BT_UV8HI, BT_USHORT, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV16QI_UV16QI, B_VX, BT_UV8HI, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV4SI_UV4SI, B_VX, BT_UV8HI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UCHAR, B_VX, BT_UV8HI, BT_UV8HI, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UINT, B_VX, BT_UV8HI, BT_UV8HI, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UV8HI, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_V16QI_BV16QI_V16QI, B_VX, BT_V16QI, BT_BV16QI, BT_V16QI) ++DEF_FN_TYPE_3 (BT_FN_V16QI_UINT_VOIDCONSTPTR, B_VX, BT_V16QI, BT_UINT, BT_VOIDCONSTPTR) ++DEF_FN_TYPE_3 (BT_FN_V16QI_UV16QI_UV16QI, B_VX, BT_V16QI, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_3 (BT_FN_V16QI_V16QI_V16QI, B_VX, BT_V16QI, BT_V16QI, BT_V16QI) ++DEF_FN_TYPE_3 (BT_FN_V16QI_V8HI_V8HI, B_VX, BT_V16QI, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_3 (BT_FN_V2DF_DBL_INT, B_VX, BT_V2DF, BT_DBL, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_V2DF_UV2DI_INT, B_VX, BT_V2DF, BT_UV2DI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_V2DF_UV4SI_INT, B_VX, BT_V2DF, BT_UV4SI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_V2DF, B_VX, BT_V2DF, BT_V2DF, BT_V2DF) ++DEF_FN_TYPE_3 (BT_FN_V2DF_V2DI_INT, B_VX, BT_V2DF, BT_V2DI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_V2DI_BV2DI_V2DI, B_VX, BT_V2DI, BT_BV2DI, BT_V2DI) ++DEF_FN_TYPE_3 (BT_FN_V2DI_UV2DI_UV2DI, B_VX, BT_V2DI, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_3 (BT_FN_V2DI_V2DF_INT, B_VX, BT_V2DI, BT_V2DF, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_V2DI_V2DF_V2DF, B_VX, BT_V2DI, BT_V2DF, BT_V2DF) ++DEF_FN_TYPE_3 (BT_FN_V2DI_V2DI_V2DI, B_VX, BT_V2DI, BT_V2DI, BT_V2DI) ++DEF_FN_TYPE_3 (BT_FN_V2DI_V4SI_V4SI, B_VX, BT_V2DI, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_3 (BT_FN_V4SI_BV4SI_V4SI, B_VX, BT_V4SI, BT_BV4SI, BT_V4SI) ++DEF_FN_TYPE_3 (BT_FN_V4SI_INT_VOIDPTR, B_VX, BT_V4SI, BT_INT, BT_VOIDPTR) ++DEF_FN_TYPE_3 (BT_FN_V4SI_UV4SI_UV4SI, B_VX, BT_V4SI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_V4SI_V2DI_V2DI, B_VX, BT_V4SI, BT_V2DI, BT_V2DI) ++DEF_FN_TYPE_3 (BT_FN_V4SI_V4SI_V4SI, B_VX, BT_V4SI, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_3 (BT_FN_V4SI_V8HI_V8HI, B_VX, BT_V4SI, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_3 (BT_FN_V8HI_BV8HI_V8HI, B_VX, BT_V8HI, BT_BV8HI, BT_V8HI) ++DEF_FN_TYPE_3 (BT_FN_V8HI_UV8HI_UV8HI, B_VX, BT_V8HI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_V8HI_V16QI_V16QI, B_VX, BT_V8HI, BT_V16QI, BT_V16QI) ++DEF_FN_TYPE_3 (BT_FN_V8HI_V4SI_V4SI, B_VX, BT_V8HI, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_3 (BT_FN_V8HI_V8HI_V8HI, B_VX, BT_V8HI, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_3 (BT_FN_VOID_UINT64PTR_UINT64, B_HTM, BT_VOID, BT_UINT64PTR, BT_UINT64) ++DEF_FN_TYPE_3 (BT_FN_VOID_V2DF_FLTPTR, B_VX, BT_VOID, BT_V2DF, BT_FLTPTR) ++DEF_FN_TYPE_4 (BT_FN_INT_OV4SI_OV4SI_INTPTR, B_VX, BT_INT, BT_OV4SI, BT_OV4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_INT_OV4SI_INT, B_VX, BT_OV4SI, BT_INT, BT_OV4SI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_INT, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_INTPTR, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_OV4SI, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_OV4SI) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_UCHAR, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_UCHAR) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_ULONGLONG, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_ULONGLONG) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UCHAR_INT, B_VX, BT_UV16QI, BT_UV16QI, BT_UCHAR, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UV16QI_INT, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UV16QI_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV2DI_UV2DI_UV16QI, B_VX, BT_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV16QI) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV8HI_UV8HI_INTPTR, B_VX, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV2DI_UV2DI_ULONGLONG_INT, B_VX, BT_UV2DI, BT_UV2DI, BT_ULONGLONG, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV2DI_UV2DI_UV2DI_INT, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV2DI_UV4SI_UV4SI_UV2DI, B_VX, BT_UV2DI, BT_UV4SI, BT_UV4SI, BT_UV2DI) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV2DI_UV2DI_INTPTR, B_VX, BT_UV4SI, BT_UV2DI, BT_UV2DI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UINT_INT, B_VX, BT_UV4SI, BT_UV4SI, BT_UINT, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UV4SI_INT, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UV4SI_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UV4SI_UV4SI, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV8HI_UV8HI_UV4SI, B_VX, BT_UV4SI, BT_UV8HI, BT_UV8HI, BT_UV4SI) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV16QI_UV16QI_UV8HI, B_VX, BT_UV8HI, BT_UV16QI, BT_UV16QI, BT_UV8HI) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV4SI_UV4SI_INTPTR, B_VX, BT_UV8HI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_USHORT_INT, B_VX, BT_UV8HI, BT_UV8HI, BT_USHORT, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_UV8HI_INT, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_UV8HI_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_UV8HI_UV8HI, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_4 (BT_FN_V16QI_UV16QI_UV16QI_INTPTR, B_VX, BT_V16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V16QI_V16QI_V16QI_INTPTR, B_VX, BT_V16QI, BT_V16QI, BT_V16QI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V16QI_V16QI_V16QI_V16QI, B_VX, BT_V16QI, BT_V16QI, BT_V16QI, BT_V16QI) ++DEF_FN_TYPE_4 (BT_FN_V16QI_V8HI_V8HI_INTPTR, B_VX, BT_V16QI, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V2DF_V2DF_DBL_INT, B_VX, BT_V2DF, BT_V2DF, BT_DBL, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_V2DF_V2DF_UCHAR_UCHAR, B_VX, BT_V2DF, BT_V2DF, BT_UCHAR, BT_UCHAR) ++DEF_FN_TYPE_4 (BT_FN_V2DF_V2DF_V2DF_V2DF, B_VX, BT_V2DF, BT_V2DF, BT_V2DF, BT_V2DF) ++DEF_FN_TYPE_4 (BT_FN_V2DI_UV2DI_UV2DI_INTPTR, B_VX, BT_V2DI, BT_UV2DI, BT_UV2DI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V2DI_V2DF_INT_INTPTR, B_VX, BT_V2DI, BT_V2DF, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V2DI_V2DF_V2DF_INTPTR, B_VX, BT_V2DI, BT_V2DF, BT_V2DF, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V2DI_V2DI_V2DI_INTPTR, B_VX, BT_V2DI, BT_V2DI, BT_V2DI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V2DI_V4SI_V4SI_V2DI, B_VX, BT_V2DI, BT_V4SI, BT_V4SI, BT_V2DI) ++DEF_FN_TYPE_4 (BT_FN_V4SI_UV4SI_UV4SI_INTPTR, B_VX, BT_V4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V4SI_V2DI_V2DI_INTPTR, B_VX, BT_V4SI, BT_V2DI, BT_V2DI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V4SI_V4SI_V4SI_INTPTR, B_VX, BT_V4SI, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V4SI_V4SI_V4SI_V4SI, B_VX, BT_V4SI, BT_V4SI, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_4 (BT_FN_V4SI_V8HI_V8HI_V4SI, B_VX, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI) ++DEF_FN_TYPE_4 (BT_FN_V8HI_UV8HI_UV8HI_INTPTR, B_VX, BT_V8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V8HI_V16QI_V16QI_V8HI, B_VX, BT_V8HI, BT_V16QI, BT_V16QI, BT_V8HI) ++DEF_FN_TYPE_4 (BT_FN_V8HI_V4SI_V4SI_INTPTR, B_VX, BT_V8HI, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V8HI_V8HI_V8HI_INTPTR, B_VX, BT_V8HI, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V8HI_V8HI_V8HI_V8HI, B_VX, BT_V8HI, BT_V8HI, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_4 (BT_FN_VOID_OV4SI_INT_VOIDPTR, B_VX, BT_VOID, BT_OV4SI, BT_INT, BT_VOIDPTR) ++DEF_FN_TYPE_4 (BT_FN_VOID_OV4SI_VOIDPTR_UINT, B_VX, BT_VOID, BT_OV4SI, BT_VOIDPTR, BT_UINT) ++DEF_FN_TYPE_4 (BT_FN_VOID_V16QI_UINT_VOIDPTR, B_VX, BT_VOID, BT_V16QI, BT_UINT, BT_VOIDPTR) ++DEF_FN_TYPE_5 (BT_FN_OV4SI_OV4SI_OUV4SI_INTCONSTPTR_UCHAR, B_VX, BT_OV4SI, BT_OV4SI, BT_OUV4SI, BT_INTCONSTPTR, BT_UCHAR) ++DEF_FN_TYPE_5 (BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_INTPTR) ++DEF_FN_TYPE_5 (BT_FN_UV16QI_UV16QI_UV16QI_INT_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_5 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT) ++DEF_FN_TYPE_5 (BT_FN_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR) ++DEF_FN_TYPE_5 (BT_FN_UV2DI_UV2DI_UV2DI_UV2DI_INT, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT) ++DEF_FN_TYPE_5 (BT_FN_UV4SI_UV4SI_UV4SI_INT_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_5 (BT_FN_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UINTCONSTPTR, BT_UCHAR) ++DEF_FN_TYPE_5 (BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT) ++DEF_FN_TYPE_5 (BT_FN_UV8HI_UV8HI_UV8HI_INT_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_5 (BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT) ++DEF_FN_TYPE_5 (BT_FN_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, B_VX, BT_VOID, BT_UV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG) ++DEF_FN_TYPE_5 (BT_FN_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG, B_VX, BT_VOID, BT_UV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG) ++DEF_FN_TYPE_5 (BT_FN_VOID_V4SI_V4SI_INTPTR_ULONGLONG, B_VX, BT_VOID, BT_V4SI, BT_V4SI, BT_INTPTR, BT_ULONGLONG) ++DEF_FN_TYPE_6 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_6 (BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_6 (BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_INTPTR, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_UV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_INTPTR, BT_BV16QI, BT_BV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UCHAR, BT_BV16QI, BT_BV16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UV16QI, BT_BV16QI, BT_BV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UV4SI, BT_BV16QI, BT_BV16QI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UV8HI, BT_BV16QI, BT_BV16QI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV8HI_BV8HI, BT_BV16QI, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI, BT_BV16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_INTPTR, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI, BT_BV16QI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI_INTPTR, BT_BV16QI, BT_V16QI, BT_V16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_INT, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_UV16QI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_UV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UCHAR, BT_BV2DI, BT_BV2DI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV16QI, BT_BV2DI, BT_BV2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, BT_BV2DI, BT_BV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV4SI, BT_BV2DI, BT_BV2DI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV8HI, BT_BV2DI, BT_BV2DI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV4SI, BT_BV2DI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_BV2DI_UV2DI_UV2DI, BT_BV2DI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_BV2DI_V2DF_V2DF, BT_BV2DI, BT_V2DF, BT_V2DF) ++DEF_OV_TYPE (BT_OV_BV2DI_V2DI_V2DI, BT_BV2DI, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV2DI_BV2DI, BT_BV4SI, BT_BV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_INTPTR, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_UV16QI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_UV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_INTPTR, BT_BV4SI, BT_BV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UCHAR, BT_BV4SI, BT_BV4SI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV16QI, BT_BV4SI, BT_BV4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV4SI, BT_BV4SI, BT_BV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV4SI_UINTCONSTPTR_UCHAR, BT_BV4SI, BT_BV4SI, BT_UV4SI, BT_UINTCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV8HI, BT_BV4SI, BT_BV4SI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV8HI, BT_BV4SI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI, BT_BV4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI_INTPTR, BT_BV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI_UV4SI, BT_BV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI_UV4SI_INTPTR, BT_BV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV4SI_V4SI_V4SI, BT_BV4SI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_V4SI_V4SI_INTPTR, BT_BV4SI, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV8HI_BV16QI, BT_BV8HI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV4SI_BV4SI, BT_BV8HI, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_INTPTR, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_UV16QI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_UV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_INTPTR, BT_BV8HI, BT_BV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UCHAR, BT_BV8HI, BT_BV8HI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UV16QI, BT_BV8HI, BT_BV8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UV4SI, BT_BV8HI, BT_BV8HI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UV8HI, BT_BV8HI, BT_BV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI, BT_BV8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI_INTPTR, BT_BV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI_UV8HI, BT_BV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI_UV8HI_INTPTR, BT_BV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV8HI_V8HI_V8HI, BT_BV8HI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_V8HI_V8HI_INTPTR, BT_BV8HI, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_DBL_V2DF_INT, BT_DBL, BT_V2DF, BT_INT) ++DEF_OV_TYPE (BT_OV_INT_BV16QI_BV16QI, BT_INT, BT_BV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_INT_BV16QI_UV16QI, BT_INT, BT_BV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_INT_BV16QI_V16QI, BT_INT, BT_BV16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_INT_BV2DI_BV2DI, BT_INT, BT_BV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_INT_BV2DI_UV2DI, BT_INT, BT_BV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_INT_BV2DI_V2DI, BT_INT, BT_BV2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_INT_BV4SI_BV4SI, BT_INT, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_INT_BV4SI_UV4SI, BT_INT, BT_BV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_INT_BV4SI_V4SI, BT_INT, BT_BV4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_INT_BV8HI_BV8HI, BT_INT, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_INT_BV8HI_UV8HI, BT_INT, BT_BV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_INT_BV8HI_V8HI, BT_INT, BT_BV8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_INT_UV16QI_BV16QI, BT_INT, BT_UV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_INT_UV16QI_UV16QI, BT_INT, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_INT_UV2DI_BV2DI, BT_INT, BT_UV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_INT_UV2DI_UV2DI, BT_INT, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_INT_UV4SI_BV4SI, BT_INT, BT_UV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_INT_UV4SI_UV4SI, BT_INT, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_INT_UV8HI_BV8HI, BT_INT, BT_UV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_INT_UV8HI_UV8HI, BT_INT, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_INT_V16QI_BV16QI, BT_INT, BT_V16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_INT_V16QI_UV16QI, BT_INT, BT_V16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_INT_V16QI_V16QI, BT_INT, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_INT_V2DF_UV2DI, BT_INT, BT_V2DF, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_INT_V2DF_V2DF, BT_INT, BT_V2DF, BT_V2DF) ++DEF_OV_TYPE (BT_OV_INT_V2DI_BV2DI, BT_INT, BT_V2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_INT_V2DI_UV2DI, BT_INT, BT_V2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_INT_V2DI_V2DI, BT_INT, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_INT_V4SI_BV4SI, BT_INT, BT_V4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_INT_V4SI_INT, BT_INT, BT_V4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_INT_V4SI_UV4SI, BT_INT, BT_V4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_INT_V4SI_V4SI, BT_INT, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_INT_V8HI_BV8HI, BT_INT, BT_V8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_INT_V8HI_UV8HI, BT_INT, BT_V8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_INT_V8HI_V8HI, BT_INT, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_LONGLONG_V2DI_INT, BT_LONGLONG, BT_V2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_SCHAR_V16QI_INT, BT_SCHAR, BT_V16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_SHORT_V8HI_INT, BT_SHORT, BT_V8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_UCHAR_BV16QI_INT, BT_UCHAR, BT_BV16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_UCHAR_UV16QI_INT, BT_UCHAR, BT_UV16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_UINT_BV4SI_INT, BT_UINT, BT_BV4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_UINT_UV4SI_INT, BT_UINT, BT_UV4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_ULONGLONG_BV2DI_INT, BT_ULONGLONG, BT_BV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_ULONGLONG_UV2DI_INT, BT_ULONGLONG, BT_UV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_USHORT_BV8HI_INT, BT_USHORT, BT_BV8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_USHORT_UV8HI_INT, BT_USHORT, BT_UV8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI, BT_UV16QI, BT_BV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI_INTPTR, BT_UV16QI, BT_BV16QI, BT_BV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_UV16QI, BT_UV16QI, BT_BV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_LONG_UCHARPTR, BT_UV16QI, BT_LONG, BT_UCHARPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR, BT_UV16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR, BT_UV16QI, BT_UCHARCONSTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR_UINT, BT_UV16QI, BT_UCHARCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR_USHORT, BT_UV16QI, BT_UCHARCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR_BV16QI_INT, BT_UV16QI, BT_UCHAR, BT_BV16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR_INT, BT_UV16QI, BT_UCHAR, BT_INT) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR_UV16QI_INT, BT_UV16QI, BT_UCHAR, BT_UV16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_BV16QI, BT_UV16QI, BT_UV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UCHAR, BT_UV16QI, BT_UV16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_ULONG, BT_UV16QI, BT_UV16QI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_INT, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UCHAR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_ULONGLONG, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV4SI, BT_UV16QI, BT_UV16QI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV8HI, BT_UV16QI, BT_UV16QI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_V16QI, BT_UV16QI, BT_UV16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV2DI_UV2DI, BT_UV16QI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV4SI_UV4SI, BT_UV16QI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV8HI_UV8HI, BT_UV16QI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_V16QI, BT_UV16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_V8HI_V8HI, BT_UV16QI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_UV2DI_BV2DI_UV2DI, BT_UV2DI, BT_BV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR, BT_UV2DI, BT_ULONGLONGCONSTPTR) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR_UINT, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_BV2DI_INT, BT_UV2DI, BT_ULONGLONG, BT_BV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_ULONGLONG, BT_INT) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_ULONGLONG, BT_UV2DI, BT_ULONGLONG, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_UV2DI_INT, BT_UV2DI, BT_ULONGLONG, BT_UV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_BV2DI, BT_UV2DI, BT_UV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UCHAR, BT_UV2DI, BT_UV2DI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_ULONG, BT_UV2DI, BT_UV2DI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_BV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_INT, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UCHAR, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONG, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV4SI, BT_UV2DI, BT_UV2DI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV8HI, BT_UV2DI, BT_UV2DI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_V2DI, BT_UV2DI, BT_UV2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV4SI, BT_UV2DI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV4SI_UV4SI, BT_UV2DI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV4SI_UV4SI_UV2DI, BT_UV2DI, BT_UV4SI, BT_UV4SI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV8HI_UV8HI, BT_UV2DI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV2DI_V2DI, BT_UV2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI, BT_UV4SI, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI_INTPTR, BT_UV4SI, BT_BV4SI, BT_BV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_UV4SI, BT_UV4SI, BT_BV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_LONG_UINTPTR, BT_UV4SI, BT_LONG, BT_UINTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UINT, BT_UV4SI, BT_UINT) ++DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR, BT_UV4SI, BT_UINTCONSTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR_UINT, BT_UV4SI, BT_UINTCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR_USHORT, BT_UV4SI, BT_UINTCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_UV4SI_UINT_BV4SI_INT, BT_UV4SI, BT_UINT, BT_BV4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV4SI_UINT_INT, BT_UV4SI, BT_UINT, BT_INT) ++DEF_OV_TYPE (BT_OV_UV4SI_UINT_UV4SI_INT, BT_UV4SI, BT_UINT, BT_UV4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV4SI_UV16QI_UV16QI, BT_UV4SI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV2DI_UV2DI, BT_UV4SI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV2DI_UV2DI_INTPTR, BT_UV4SI, BT_UV2DI, BT_UV2DI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_BV4SI, BT_UV4SI, BT_UV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_INTPTR, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UCHAR, BT_UV4SI, BT_UV4SI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_ULONG, BT_UV4SI, BT_UV4SI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV16QI, BT_UV4SI, BT_UV4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_BV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_INT, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_INTPTR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UCHAR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UINTCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_ULONGLONG, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV16QI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV8HI, BT_UV4SI, BT_UV4SI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_V4SI, BT_UV4SI, BT_UV4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV8HI, BT_UV4SI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV8HI_UV8HI, BT_UV4SI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV8HI_UV8HI_UV4SI, BT_UV4SI, BT_UV8HI, BT_UV8HI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_V2DI_V2DI, BT_UV4SI, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_UV4SI_V4SI, BT_UV4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI, BT_UV8HI, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI_INTPTR, BT_UV8HI, BT_BV8HI, BT_BV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_UV8HI, BT_UV8HI, BT_BV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_LONG_USHORTPTR, BT_UV8HI, BT_LONG, BT_USHORTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORT, BT_UV8HI, BT_USHORT) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR, BT_UV8HI, BT_USHORTCONSTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR_UINT, BT_UV8HI, BT_USHORTCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR_USHORT, BT_UV8HI, BT_USHORTCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORT_BV8HI_INT, BT_UV8HI, BT_USHORT, BT_BV8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORT_INT, BT_UV8HI, BT_USHORT, BT_INT) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORT_UV8HI_INT, BT_UV8HI, BT_USHORT, BT_UV8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV8HI_UV16QI, BT_UV8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV16QI_UV16QI, BT_UV8HI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV16QI_UV16QI_UV8HI, BT_UV8HI, BT_UV16QI, BT_UV16QI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV4SI_UV4SI, BT_UV8HI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV4SI_UV4SI_INTPTR, BT_UV8HI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_BV8HI, BT_UV8HI, BT_UV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_INTPTR, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UCHAR, BT_UV8HI, BT_UV8HI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_ULONG, BT_UV8HI, BT_UV8HI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV16QI, BT_UV8HI, BT_UV8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV4SI, BT_UV8HI, BT_UV8HI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_BV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_INT, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_INTPTR, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UCHAR, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_ULONGLONG, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV16QI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_V8HI, BT_UV8HI, BT_UV8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_V4SI_V4SI, BT_UV8HI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_UV8HI_V8HI, BT_UV8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V16QI_BV16QI_V16QI, BT_V16QI, BT_BV16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V16QI_LONG_SCHARPTR, BT_V16QI, BT_LONG, BT_SCHARPTR) ++DEF_OV_TYPE (BT_OV_V16QI_SCHAR, BT_V16QI, BT_SCHAR) ++DEF_OV_TYPE (BT_OV_V16QI_SCHARCONSTPTR, BT_V16QI, BT_SCHARCONSTPTR) ++DEF_OV_TYPE (BT_OV_V16QI_SCHARCONSTPTR_UINT, BT_V16QI, BT_SCHARCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_V16QI_SCHARCONSTPTR_USHORT, BT_V16QI, BT_SCHARCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_V16QI_SCHAR_INT, BT_V16QI, BT_SCHAR, BT_INT) ++DEF_OV_TYPE (BT_OV_V16QI_SCHAR_V16QI_INT, BT_V16QI, BT_SCHAR, BT_V16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_V16QI_UV16QI_V16QI_V16QI, BT_V16QI, BT_UV16QI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_BV16QI, BT_V16QI, BT_V16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_INTPTR, BT_V16QI, BT_V16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UCHAR, BT_V16QI, BT_V16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_ULONG, BT_V16QI, BT_V16QI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV16QI, BT_V16QI, BT_V16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV16QI_UCHAR, BT_V16QI, BT_V16QI, BT_UV16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV16QI_UV16QI, BT_V16QI, BT_V16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV4SI, BT_V16QI, BT_V16QI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV8HI, BT_V16QI, BT_V16QI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI, BT_V16QI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_BV16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_INT, BT_V16QI, BT_V16QI, BT_V16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_INTPTR, BT_V16QI, BT_V16QI, BT_V16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_ULONGLONG, BT_V16QI, BT_V16QI, BT_V16QI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_UV16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_V16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V8HI_V8HI, BT_V16QI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V16QI_V8HI_V8HI_INTPTR, BT_V16QI, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V2DF_BV2DI_V2DF, BT_V2DF, BT_BV2DI, BT_V2DF) ++DEF_OV_TYPE (BT_OV_V2DF_DBL, BT_V2DF, BT_DBL) ++DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR, BT_V2DF, BT_DBLCONSTPTR) ++DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR_UINT, BT_V2DF, BT_DBLCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR_USHORT, BT_V2DF, BT_DBLCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_V2DF_DBL_INT, BT_V2DF, BT_DBL, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DF_DBL_V2DF_INT, BT_V2DF, BT_DBL, BT_V2DF, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DF_LONG_DBLPTR, BT_V2DF, BT_LONG, BT_DBLPTR) ++DEF_OV_TYPE (BT_OV_V2DF_UV2DI_INT, BT_V2DF, BT_UV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF, BT_V2DF, BT_V2DF) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_BV2DI, BT_V2DF, BT_V2DF, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_UCHAR, BT_V2DF, BT_V2DF, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_UV2DI, BT_V2DF, BT_V2DF, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_UV2DI_DBLCONSTPTR_UCHAR, BT_V2DF, BT_V2DF, BT_UV2DI, BT_DBLCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF, BT_V2DF, BT_V2DF, BT_V2DF) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_BV2DI, BT_V2DF, BT_V2DF, BT_V2DF, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_INT, BT_V2DF, BT_V2DF, BT_V2DF, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_ULONGLONG, BT_V2DF, BT_V2DF, BT_V2DF, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_UV16QI, BT_V2DF, BT_V2DF, BT_V2DF, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_UV2DI, BT_V2DF, BT_V2DF, BT_V2DF, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DI, BT_V2DF, BT_V2DF, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DI_INT, BT_V2DF, BT_V2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DI_BV2DI_V2DI, BT_V2DI, BT_BV2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG, BT_V2DI, BT_LONGLONG) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONGCONSTPTR, BT_V2DI, BT_LONGLONGCONSTPTR) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONGCONSTPTR_UINT, BT_V2DI, BT_LONGLONGCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONGCONSTPTR_USHORT, BT_V2DI, BT_LONGLONGCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_INT, BT_V2DI, BT_LONGLONG, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_LONGLONG, BT_V2DI, BT_LONGLONG, BT_LONGLONG) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_V2DI_INT, BT_V2DI, BT_LONGLONG, BT_V2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DI_LONG_LONGLONGPTR, BT_V2DI, BT_LONG, BT_LONGLONGPTR) ++DEF_OV_TYPE (BT_OV_V2DI_V16QI, BT_V2DI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_BV2DI, BT_V2DI, BT_V2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UCHAR, BT_V2DI, BT_V2DI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_ULONG, BT_V2DI, BT_V2DI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV16QI, BT_V2DI, BT_V2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI, BT_V2DI, BT_V2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI_LONGLONGCONSTPTR_UCHAR, BT_V2DI, BT_V2DI, BT_UV2DI, BT_LONGLONGCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI_UCHAR, BT_V2DI, BT_V2DI, BT_UV2DI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV4SI, BT_V2DI, BT_V2DI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV8HI, BT_V2DI, BT_V2DI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI, BT_V2DI, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_BV2DI, BT_V2DI, BT_V2DI, BT_V2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_INT, BT_V2DI, BT_V2DI, BT_V2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_ULONGLONG, BT_V2DI, BT_V2DI, BT_V2DI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_UV16QI, BT_V2DI, BT_V2DI, BT_V2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_UV2DI, BT_V2DI, BT_V2DI, BT_V2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V4SI, BT_V2DI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V2DI_V4SI_V4SI, BT_V2DI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V2DI_V4SI_V4SI_V2DI, BT_V2DI, BT_V4SI, BT_V4SI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V8HI, BT_V2DI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V4SI_INT, BT_V4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR, BT_V4SI, BT_INTCONSTPTR) ++DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR_UINT, BT_V4SI, BT_INTCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR_USHORT, BT_V4SI, BT_INTCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_V4SI_INT_INT, BT_V4SI, BT_INT, BT_INT) ++DEF_OV_TYPE (BT_OV_V4SI_INT_V4SI_INT, BT_V4SI, BT_INT, BT_V4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_V4SI_LONG_INTPTR, BT_V4SI, BT_LONG, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V4SI_UV4SI_V4SI_V4SI, BT_V4SI, BT_UV4SI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V2DI_V2DI, BT_V4SI, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V4SI_V2DI_V2DI_INTPTR, BT_V4SI, BT_V2DI, BT_V2DI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_BV4SI, BT_V4SI, BT_V4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_INTPTR, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UCHAR, BT_V4SI, BT_V4SI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_ULONG, BT_V4SI, BT_V4SI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV16QI, BT_V4SI, BT_V4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI, BT_V4SI, BT_V4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI_INTCONSTPTR_UCHAR, BT_V4SI, BT_V4SI, BT_UV4SI, BT_INTCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI_UCHAR, BT_V4SI, BT_V4SI, BT_UV4SI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI_UV4SI, BT_V4SI, BT_V4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV8HI, BT_V4SI, BT_V4SI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_BV4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_INT, BT_V4SI, BT_V4SI, BT_V4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_INTPTR, BT_V4SI, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_ULONGLONG, BT_V4SI, BT_V4SI, BT_V4SI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_UV16QI, BT_V4SI, BT_V4SI, BT_V4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_UV4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V8HI, BT_V4SI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI, BT_V4SI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI_V4SI, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V8HI_BV8HI_V8HI, BT_V8HI, BT_BV8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V8HI_LONG_SHORTPTR, BT_V8HI, BT_LONG, BT_SHORTPTR) ++DEF_OV_TYPE (BT_OV_V8HI_SHORT, BT_V8HI, BT_SHORT) ++DEF_OV_TYPE (BT_OV_V8HI_SHORTCONSTPTR, BT_V8HI, BT_SHORTCONSTPTR) ++DEF_OV_TYPE (BT_OV_V8HI_SHORTCONSTPTR_UINT, BT_V8HI, BT_SHORTCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_V8HI_SHORTCONSTPTR_USHORT, BT_V8HI, BT_SHORTCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_V8HI_SHORT_INT, BT_V8HI, BT_SHORT, BT_INT) ++DEF_OV_TYPE (BT_OV_V8HI_SHORT_V8HI_INT, BT_V8HI, BT_SHORT, BT_V8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_V8HI_UV8HI_V8HI_V8HI, BT_V8HI, BT_UV8HI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V16QI, BT_V8HI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V8HI_V16QI_V16QI, BT_V8HI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V8HI_V16QI_V16QI_V8HI, BT_V8HI, BT_V16QI, BT_V16QI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V4SI_V4SI, BT_V8HI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V8HI_V4SI_V4SI_INTPTR, BT_V8HI, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_BV8HI, BT_V8HI, BT_V8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_INTPTR, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UCHAR, BT_V8HI, BT_V8HI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_ULONG, BT_V8HI, BT_V8HI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV16QI, BT_V8HI, BT_V8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV4SI, BT_V8HI, BT_V8HI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV8HI, BT_V8HI, BT_V8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV8HI_UCHAR, BT_V8HI, BT_V8HI, BT_UV8HI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV8HI_UV8HI, BT_V8HI, BT_V8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_BV8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_INT, BT_V8HI, BT_V8HI, BT_V8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_INTPTR, BT_V8HI, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_ULONGLONG, BT_V8HI, BT_V8HI, BT_V8HI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_UV16QI, BT_V8HI, BT_V8HI, BT_V8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_UV8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_VOID_BV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, BT_VOID, BT_BV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_BV4SI_UV4SI_UINTPTR_ULONGLONG, BT_VOID, BT_BV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_UV16QI_LONG_UCHARPTR, BT_VOID, BT_UV16QI, BT_LONG, BT_UCHARPTR) ++DEF_OV_TYPE (BT_OV_VOID_UV16QI_UCHARPTR_UINT, BT_VOID, BT_UV16QI, BT_UCHARPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_UV2DI_LONG_ULONGLONGPTR, BT_VOID, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR) ++DEF_OV_TYPE (BT_OV_VOID_UV2DI_ULONGLONGPTR_UINT, BT_VOID, BT_UV2DI, BT_ULONGLONGPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, BT_VOID, BT_UV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_UV4SI_LONG_UINTPTR, BT_VOID, BT_UV4SI, BT_LONG, BT_UINTPTR) ++DEF_OV_TYPE (BT_OV_VOID_UV4SI_UINTPTR_UINT, BT_VOID, BT_UV4SI, BT_UINTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG, BT_VOID, BT_UV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_UV8HI_LONG_USHORTPTR, BT_VOID, BT_UV8HI, BT_LONG, BT_USHORTPTR) ++DEF_OV_TYPE (BT_OV_VOID_UV8HI_USHORTPTR_UINT, BT_VOID, BT_UV8HI, BT_USHORTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_V16QI_LONG_SCHARPTR, BT_VOID, BT_V16QI, BT_LONG, BT_SCHARPTR) ++DEF_OV_TYPE (BT_OV_VOID_V16QI_SCHARPTR_UINT, BT_VOID, BT_V16QI, BT_SCHARPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_V2DF_DBLPTR_UINT, BT_VOID, BT_V2DF, BT_DBLPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_V2DF_LONG_DBLPTR, BT_VOID, BT_V2DF, BT_LONG, BT_DBLPTR) ++DEF_OV_TYPE (BT_OV_VOID_V2DF_UV2DI_DBLPTR_ULONGLONG, BT_VOID, BT_V2DF, BT_UV2DI, BT_DBLPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_V2DI_LONGLONGPTR_UINT, BT_VOID, BT_V2DI, BT_LONGLONGPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_V2DI_LONG_LONGLONGPTR, BT_VOID, BT_V2DI, BT_LONG, BT_LONGLONGPTR) ++DEF_OV_TYPE (BT_OV_VOID_V2DI_UV2DI_LONGLONGPTR_ULONGLONG, BT_VOID, BT_V2DI, BT_UV2DI, BT_LONGLONGPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_V4SI_INTPTR_UINT, BT_VOID, BT_V4SI, BT_INTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_V4SI_LONG_INTPTR, BT_VOID, BT_V4SI, BT_LONG, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_VOID_V4SI_UV4SI_INTPTR_ULONGLONG, BT_VOID, BT_V4SI, BT_UV4SI, BT_INTPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_V8HI_LONG_SHORTPTR, BT_VOID, BT_V8HI, BT_LONG, BT_SHORTPTR) ++DEF_OV_TYPE (BT_OV_VOID_V8HI_SHORTPTR_UINT, BT_VOID, BT_V8HI, BT_SHORTPTR, BT_UINT) +--- gcc/config/s390/s390.c 2015-06-18 16:33:04.000000000 +0200 ++++ gcc/config/s390/s390.c 2016-05-11 19:11:44.333028400 +0200 +@@ -52,6 +52,10 @@ along with GCC; see the file COPYING3. + #include "params.h" + #include "cfgloop.h" + #include "opts.h" ++#include "intl.h" ++#include "plugin-api.h" ++#include "cgraph.h" ++#include "tm-constrs.h" + + /* Define the specific costs for a given cpu. */ + +@@ -288,6 +292,19 @@ extern int reload_completed; + + /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */ + static rtx last_scheduled_insn; ++#define MAX_SCHED_UNITS 3 ++static int last_scheduled_unit_distance[MAX_SCHED_UNITS]; ++ ++/* The maximum score added for an instruction whose unit hasn't been ++ in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to ++ give instruction mix scheduling more priority over instruction ++ grouping. */ ++#define MAX_SCHED_MIX_SCORE 8 ++ ++/* The maximum distance up to which individual scores will be ++ calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE. ++ Increase this with the OOO windows size of the machine. */ ++#define MAX_SCHED_MIX_DISTANCE 100 + + /* Structure used to hold the components of a S/390 memory + address. A legitimate address on S/390 is of the general +@@ -387,6 +404,7 @@ struct GTY(()) machine_function + /* Number of GPRs and FPRs used for argument passing. */ + #define GP_ARG_NUM_REG 5 + #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2) ++#define VEC_ARG_NUM_REG 8 + + /* A couple of shortcuts. */ + #define CONST_OK_FOR_J(x) \ +@@ -407,6 +425,539 @@ struct GTY(()) machine_function + bytes on a z10 (or higher) CPU. */ + #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048) + ++ ++/* Indicate which ABI has been used for passing vector args. ++ 0 - no vector type arguments have been passed where the ABI is relevant ++ 1 - the old ABI has been used ++ 2 - a vector type argument has been passed either in a vector register ++ or on the stack by value */ ++static int s390_vector_abi = 0; ++ ++/* Set the vector ABI marker if TYPE is subject to the vector ABI ++ switch. The vector ABI affects only vector data types. There are ++ two aspects of the vector ABI relevant here: ++ ++ 1. vectors >= 16 bytes have an alignment of 8 bytes with the new ++ ABI and natural alignment with the old. ++ ++ 2. vector <= 16 bytes are passed in VRs or by value on the stack ++ with the new ABI but by reference on the stack with the old. ++ ++ If ARG_P is true TYPE is used for a function argument or return ++ value. The ABI marker then is set for all vector data types. If ++ ARG_P is false only type 1 vectors are being checked. */ ++ ++static void ++s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p) ++{ ++ static htab_t visited_types_hash ++ = htab_create (37, htab_hash_pointer, htab_eq_pointer, free); ++ void **slot; ++ ++ if (s390_vector_abi) ++ return; ++ ++ if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK) ++ return; ++ ++ slot = htab_find_slot (visited_types_hash, type, INSERT); ++ if (*slot) ++ return; ++ ++ *slot = CONST_CAST_TREE (type); ++ ++ if (TREE_CODE (type) == VECTOR_TYPE) ++ { ++ int type_size = int_size_in_bytes (type); ++ ++ /* Outside arguments only the alignment is changing and this ++ only happens for vector types >= 16 bytes. */ ++ if (!arg_p && type_size < 16) ++ return; ++ ++ /* In arguments vector types > 16 are passed as before (GCC ++ never enforced the bigger alignment for arguments which was ++ required by the old vector ABI). However, it might still be ++ ABI relevant due to the changed alignment if it is a struct ++ member. */ ++ if (arg_p && type_size > 16 && !in_struct_p) ++ return; ++ ++ s390_vector_abi = TARGET_VX_ABI ? 2 : 1; ++ } ++ else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) ++ { ++ /* ARRAY_TYPE: Since with neither of the ABIs we have more than ++ natural alignment there will never be ABI dependent padding ++ in an array type. That's why we do not set in_struct_p to ++ true here. */ ++ s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p); ++ } ++ else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) ++ { ++ tree arg_chain; ++ ++ /* Check the return type. */ ++ s390_check_type_for_vector_abi (TREE_TYPE (type), true, false); ++ ++ for (arg_chain = TYPE_ARG_TYPES (type); ++ arg_chain; ++ arg_chain = TREE_CHAIN (arg_chain)) ++ s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false); ++ } ++ else if (RECORD_OR_UNION_TYPE_P (type)) ++ { ++ tree field; ++ ++ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) != FIELD_DECL) ++ continue; ++ ++ s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true); ++ } ++ } ++} ++ ++ ++/* System z builtins. */ ++ ++#include "s390-builtins.h" ++ ++const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS, ++#define OB_DEF(...) ++#define OB_DEF_VAR(...) ++#include "s390-builtins.def" ++ 0 ++ }; ++ ++const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS, ++#define OB_DEF(...) ++#define OB_DEF_VAR(...) ++#include "s390-builtins.def" ++ 0 ++ }; ++ ++const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS, ++#define OB_DEF_VAR(...) ++#include "s390-builtins.def" ++ 0 ++ }; ++ ++const unsigned int ++opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(...) ++#define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS, ++#include "s390-builtins.def" ++ 0 ++ }; ++ ++tree s390_builtin_types[BT_MAX]; ++tree s390_builtin_fn_types[BT_FN_MAX]; ++tree s390_builtin_decls[S390_BUILTIN_MAX + ++ S390_OVERLOADED_BUILTIN_MAX + ++ S390_OVERLOADED_BUILTIN_VAR_MAX]; ++ ++static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN, ++#define OB_DEF(...) ++#define OB_DEF_VAR(...) ++ ++#include "s390-builtins.def" ++ CODE_FOR_nothing ++}; ++ ++static void ++s390_init_builtins (void) ++{ ++ /* These definitions are being used in s390-builtins.def. */ ++ tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"), ++ NULL, NULL); ++ tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL); ++ tree c_uint64_type_node; ++ unsigned int bflags_mask = (BFLAGS_MASK_INIT); ++ ++ bflags_mask |= (TARGET_VX) ? B_VX : 0; ++ bflags_mask |= (TARGET_HTM) ? B_HTM : 0; ++ ++ /* The uint64_type_node from tree.c is not compatible to the C99 ++ uint64_t data type. What we want is c_uint64_type_node from ++ c-common.c. But since backend code is not supposed to interface ++ with the frontend we recreate it here. */ ++ if (TARGET_64BIT) ++ c_uint64_type_node = long_unsigned_type_node; ++ else ++ c_uint64_type_node = long_long_unsigned_type_node; ++ ++#undef DEF_TYPE ++#define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_types[INDEX] = (!CONST_P) ? \ ++ (NODE) : build_type_variant ((NODE), 1, 0); ++ ++#undef DEF_POINTER_TYPE ++#define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_types[INDEX] = \ ++ build_pointer_type (s390_builtin_types[INDEX_BASE]); ++ ++#undef DEF_DISTINCT_TYPE ++#define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_types[INDEX] = \ ++ build_distinct_type_copy (s390_builtin_types[INDEX_BASE]); ++ ++#undef DEF_VECTOR_TYPE ++#define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_types[INDEX] = \ ++ build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); ++ ++#undef DEF_OPAQUE_VECTOR_TYPE ++#define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_types[INDEX] = \ ++ build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); ++ ++#undef DEF_FN_TYPE ++#define DEF_FN_TYPE(INDEX, BFLAGS, args...) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_fn_types[INDEX] = \ ++ build_function_type_list (args, NULL_TREE); ++#undef DEF_OV_TYPE ++#define DEF_OV_TYPE(...) ++#include "s390-builtin-types.def" ++ ++#undef B_DEF ++#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \ ++ if (((BFLAGS) & ~bflags_mask) == 0) \ ++ s390_builtin_decls[S390_BUILTIN_##NAME] = \ ++ add_builtin_function ("__builtin_" #NAME, \ ++ s390_builtin_fn_types[FNTYPE], \ ++ S390_BUILTIN_##NAME, \ ++ BUILT_IN_MD, \ ++ NULL, \ ++ ATTRS); ++#undef OB_DEF ++#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \ ++ if (((BFLAGS) & ~bflags_mask) == 0) \ ++ s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \ ++ add_builtin_function ("__builtin_" #NAME, \ ++ s390_builtin_fn_types[FNTYPE], \ ++ S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \ ++ BUILT_IN_MD, \ ++ NULL, \ ++ 0); ++#undef OB_DEF_VAR ++#define OB_DEF_VAR(...) ++#include "s390-builtins.def" ++ ++} ++ ++/* Return true if ARG is appropriate as argument number ARGNUM of ++ builtin DECL. The operand flags from s390-builtins.def have to ++ passed as OP_FLAGS. */ ++bool ++s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl) ++{ ++ if (O_UIMM_P (op_flags)) ++ { ++ int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 }; ++ int bitwidth = bitwidths[op_flags - O_U1]; ++ ++ if (!host_integerp (arg, 1) ++ || ((unsigned HOST_WIDE_INT) tree_low_cst (arg, 1) ++ > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)) ++ { ++ error("constant argument %d for builtin %qF is out of range (0.." ++ HOST_WIDE_INT_PRINT_UNSIGNED ")", ++ argnum, decl, ++ ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1); ++ return false; ++ } ++ } ++ ++ if (O_SIMM_P (op_flags)) ++ { ++ int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 }; ++ int bitwidth = bitwidths[op_flags - O_S2]; ++ ++ if (!host_integerp (arg, 0) ++ || tree_low_cst (arg, 0) < -((HOST_WIDE_INT)1 << (bitwidth - 1)) ++ || (tree_low_cst (arg, 0) ++ > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))) ++ { ++ error("constant argument %d for builtin %qF is out of range (" ++ HOST_WIDE_INT_PRINT_DEC ".." ++ HOST_WIDE_INT_PRINT_DEC ")", ++ argnum, decl, ++ -((HOST_WIDE_INT)1 << (bitwidth - 1)), ++ ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1); ++ return false; ++ } ++ } ++ return true; ++} ++ ++/* Expand an expression EXP that calls a built-in function, ++ with result going to TARGET if that's convenient ++ (and in mode MODE if that's convenient). ++ SUBTARGET may be used as the target for computing one of EXP's operands. ++ IGNORE is nonzero if the value is to be ignored. */ ++ ++static rtx ++s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, ++ enum machine_mode mode ATTRIBUTE_UNUSED, ++ int ignore ATTRIBUTE_UNUSED) ++{ ++#define MAX_ARGS 5 ++ ++ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); ++ unsigned int fcode = DECL_FUNCTION_CODE (fndecl); ++ enum insn_code icode; ++ rtx op[MAX_ARGS], pat; ++ int arity; ++ bool nonvoid; ++ tree arg; ++ call_expr_arg_iterator iter; ++ unsigned int all_op_flags = opflags_for_builtin (fcode); ++ enum machine_mode last_vec_mode = VOIDmode; ++ ++ if (TARGET_DEBUG_ARG) ++ { ++ fprintf (stderr, ++ "s390_expand_builtin, code = %4d, %s\n", ++ (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl))); ++ } ++ ++ if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET ++ && fcode < S390_ALL_BUILTIN_MAX) ++ { ++ gcc_unreachable (); ++ } ++ else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET) ++ { ++ icode = code_for_builtin[fcode]; ++ /* Set a flag in the machine specific cfun part in order to support ++ saving/restoring of FPRs. */ ++ if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry) ++ cfun->machine->tbegin_p = true; ++ } ++ else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET) ++ { ++ error ("Unresolved overloaded builtin"); ++ return const0_rtx; ++ } ++ else ++ internal_error ("bad builtin fcode"); ++ ++ if (icode == 0) ++ internal_error ("bad builtin icode"); ++ ++ nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; ++ ++ if (nonvoid) ++ { ++ enum machine_mode tmode = insn_data[icode].operand[0].mode; ++ if (!target ++ || GET_MODE (target) != tmode ++ || !(*insn_data[icode].operand[0].predicate) (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ /* There are builtins (e.g. vec_promote) with no vector ++ arguments but an element selector. So we have to also look ++ at the vector return type when emitting the modulo ++ operation. */ ++ if (VECTOR_MODE_P (insn_data[icode].operand[0].mode)) ++ last_vec_mode = insn_data[icode].operand[0].mode; ++ } ++ ++ arity = 0; ++ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) ++ { ++ const struct insn_operand_data *insn_op; ++ unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1); ++ ++ all_op_flags = all_op_flags >> O_SHIFT; ++ ++ if (arg == error_mark_node) ++ return NULL_RTX; ++ if (arity >= MAX_ARGS) ++ return NULL_RTX; ++ ++ if (O_IMM_P (op_flags) ++ && TREE_CODE (arg) != INTEGER_CST) ++ { ++ error ("constant value required for builtin %qF argument %d", ++ fndecl, arity + 1); ++ return const0_rtx; ++ } ++ ++ if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl)) ++ return const0_rtx; ++ ++ insn_op = &insn_data[icode].operand[arity + nonvoid]; ++ op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); ++ ++ /* expand_expr truncates constants to the target mode only if it ++ is "convenient". However, our checks below rely on this ++ being done. */ ++ if (CONST_INT_P (op[arity]) ++ && SCALAR_INT_MODE_P (insn_op->mode) ++ && GET_MODE (op[arity]) != insn_op->mode) ++ op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]), ++ insn_op->mode)); ++ ++ /* Wrap the expanded RTX for pointer types into a MEM expr with ++ the proper mode. This allows us to use e.g. (match_operand ++ "memory_operand"..) in the insn patterns instead of (mem ++ (match_operand "address_operand)). This is helpful for ++ patterns not just accepting MEMs. */ ++ if (POINTER_TYPE_P (TREE_TYPE (arg)) ++ && insn_op->predicate != address_operand) ++ op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]); ++ ++ /* Expand the module operation required on element selectors. */ ++ if (op_flags == O_ELEM) ++ { ++ gcc_assert (last_vec_mode != VOIDmode); ++ op[arity] = simplify_expand_binop (SImode, code_to_optab (AND), ++ op[arity], ++ GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1), ++ NULL_RTX, 1, OPTAB_DIRECT); ++ } ++ ++ /* Record the vector mode used for an element selector. This assumes: ++ 1. There is no builtin with two different vector modes and an element selector ++ 2. The element selector comes after the vector type it is referring to. ++ This currently the true for all the builtins but FIXME we ++ should better check for that. */ ++ if (VECTOR_MODE_P (insn_op->mode)) ++ last_vec_mode = insn_op->mode; ++ ++ if (insn_op->predicate (op[arity], insn_op->mode)) ++ { ++ arity++; ++ continue; ++ } ++ ++ if (MEM_P (op[arity]) ++ && insn_op->predicate == memory_operand ++ && (GET_MODE (XEXP (op[arity], 0)) == Pmode ++ || GET_MODE (XEXP (op[arity], 0)) == VOIDmode)) ++ { ++ op[arity] = replace_equiv_address (op[arity], ++ copy_to_mode_reg (Pmode, ++ XEXP (op[arity], 0))); ++ } ++ else if (GET_MODE (op[arity]) == insn_op->mode ++ || GET_MODE (op[arity]) == VOIDmode ++ || (insn_op->predicate == address_operand ++ && GET_MODE (op[arity]) == Pmode)) ++ { ++ /* An address_operand usually has VOIDmode in the expander ++ so we cannot use this. */ ++ enum machine_mode target_mode = ++ (insn_op->predicate == address_operand ++ ? Pmode : insn_op->mode); ++ op[arity] = copy_to_mode_reg (target_mode, op[arity]); ++ } ++ ++ if (!insn_op->predicate (op[arity], insn_op->mode)) ++ { ++ error ("Invalid argument %d for builtin %qF", arity + 1, fndecl); ++ return const0_rtx; ++ } ++ arity++; ++ } ++ ++ if (last_vec_mode != VOIDmode && !TARGET_VX) ++ { ++ error ("Vector type builtin %qF is not supported without -mvx " ++ "(default with -march=z13).", ++ fndecl); ++ return const0_rtx; ++ } ++ ++ switch (arity) ++ { ++ case 0: ++ pat = GEN_FCN (icode) (target); ++ break; ++ case 1: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0]); ++ else ++ pat = GEN_FCN (icode) (op[0]); ++ break; ++ case 2: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0], op[1]); ++ else ++ pat = GEN_FCN (icode) (op[0], op[1]); ++ break; ++ case 3: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]); ++ else ++ pat = GEN_FCN (icode) (op[0], op[1], op[2]); ++ break; ++ case 4: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]); ++ else ++ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); ++ break; ++ case 5: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); ++ else ++ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); ++ break; ++ case 6: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]); ++ else ++ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ if (!pat) ++ return NULL_RTX; ++ emit_insn (pat); ++ ++ if (nonvoid) ++ return target; ++ else ++ return const0_rtx; ++} ++ ++ + static const int s390_hotpatch_hw_max = 1000000; + static int s390_hotpatch_hw_before_label = 0; + static int s390_hotpatch_hw_after_label = 0; +@@ -458,9 +1009,43 @@ s390_handle_hotpatch_attribute (tree *no + return NULL_TREE; + } + ++/* Expand the s390_vector_bool type attribute. */ ++ ++static tree ++s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED, ++ tree args ATTRIBUTE_UNUSED, ++ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) ++{ ++ tree type = *node, result = NULL_TREE; ++ enum machine_mode mode; ++ ++ while (POINTER_TYPE_P (type) ++ || TREE_CODE (type) == FUNCTION_TYPE ++ || TREE_CODE (type) == METHOD_TYPE ++ || TREE_CODE (type) == ARRAY_TYPE) ++ type = TREE_TYPE (type); ++ ++ mode = TYPE_MODE (type); ++ switch (mode) ++ { ++ case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break; ++ case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break; ++ case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break; ++ case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI]; ++ default: break; ++ } ++ ++ *no_add_attrs = true; /* No need to hang on to the attribute. */ ++ ++ if (result) ++ *node = lang_hooks.types.reconstruct_complex_type (*node, result); ++ ++ return NULL_TREE; ++} ++ + static const struct attribute_spec s390_attribute_table[] = { +- { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false +- }, ++ { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false }, ++ { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true }, + /* End element. */ + { NULL, 0, 0, false, false, false, NULL, false } + }; +@@ -524,6 +1109,35 @@ s390_scalar_mode_supported_p (enum machi + return default_scalar_mode_supported_p (mode); + } + ++/* Return true if the back end supports vector mode MODE. */ ++static bool ++s390_vector_mode_supported_p (enum machine_mode mode) ++{ ++ enum machine_mode inner; ++ ++ if (!VECTOR_MODE_P (mode) ++ || !TARGET_VX ++ || GET_MODE_SIZE (mode) > 16) ++ return false; ++ ++ inner = GET_MODE_INNER (mode); ++ ++ switch (inner) ++ { ++ case QImode: ++ case HImode: ++ case SImode: ++ case DImode: ++ case TImode: ++ case SFmode: ++ case DFmode: ++ case TFmode: ++ return true; ++ default: ++ return false; ++ } ++} ++ + /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */ + + void +@@ -595,6 +1209,11 @@ s390_match_ccmode_set (rtx set, enum mac + case CCT1mode: + case CCT2mode: + case CCT3mode: ++ case CCVEQmode: ++ case CCVHmode: ++ case CCVHUmode: ++ case CCVFHmode: ++ case CCVFHEmode: + if (req_mode != set_mode) + return 0; + break; +@@ -695,6 +1314,29 @@ s390_tm_ccmode (rtx op1, rtx op2, bool m + enum machine_mode + s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1) + { ++ if (TARGET_VX ++ && register_operand (op0, DFmode) ++ && register_operand (op1, DFmode)) ++ { ++ /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either ++ s390_emit_compare or s390_canonicalize_comparison will take ++ care of it. */ ++ switch (code) ++ { ++ case EQ: ++ case NE: ++ return CCVEQmode; ++ case GT: ++ case UNLE: ++ return CCVFHmode; ++ case GE: ++ case UNLT: ++ return CCVFHEmode; ++ default: ++ ; ++ } ++ } ++ + switch (code) + { + case EQ: +@@ -972,8 +1614,73 @@ s390_canonicalize_comparison (int *code, + rtx tem = *op0; *op0 = *op1; *op1 = tem; + *code = (int)swap_condition ((enum rtx_code)*code); + } ++ ++ /* Using the scalar variants of vector instructions for 64 bit FP ++ comparisons might require swapping the operands. */ ++ if (TARGET_VX ++ && register_operand (*op0, DFmode) ++ && register_operand (*op1, DFmode) ++ && (*code == LT || *code == LE || *code == UNGT || *code == UNGE)) ++ { ++ rtx tmp; ++ ++ switch (*code) ++ { ++ case LT: *code = GT; break; ++ case LE: *code = GE; break; ++ case UNGT: *code = UNLE; break; ++ case UNGE: *code = UNLT; break; ++ default: ; ++ } ++ tmp = *op0; *op0 = *op1; *op1 = tmp; ++ } ++} ++ ++/* Helper function for s390_emit_compare. If possible emit a 64 bit ++ FP compare using the single element variant of vector instructions. ++ Replace CODE with the comparison code to be used in the CC reg ++ compare and return the condition code register RTX in CC. */ ++ ++static bool ++s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2, ++ rtx *cc) ++{ ++ enum machine_mode cmp_mode; ++ bool swap_p = false; ++ ++ switch (*code) ++ { ++ case EQ: cmp_mode = CCVEQmode; break; ++ case NE: cmp_mode = CCVEQmode; break; ++ case GT: cmp_mode = CCVFHmode; break; ++ case GE: cmp_mode = CCVFHEmode; break; ++ case UNLE: cmp_mode = CCVFHmode; break; ++ case UNLT: cmp_mode = CCVFHEmode; break; ++ case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break; ++ case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break; ++ case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break; ++ case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break; ++ default: return false; ++ } ++ ++ if (swap_p) ++ { ++ rtx tmp = cmp2; ++ cmp2 = cmp1; ++ cmp1 = tmp; ++ } ++ *cc = gen_rtx_REG (cmp_mode, CC_REGNUM); ++ emit_insn (gen_rtx_PARALLEL (VOIDmode, ++ gen_rtvec (2, ++ gen_rtx_SET (VOIDmode, *cc, ++ gen_rtx_COMPARE (cmp_mode, cmp1, ++ cmp2)), ++ gen_rtx_CLOBBER (VOIDmode, ++ gen_rtx_SCRATCH (V2DImode))))); ++ return true; + } + ++ + /* Emit a compare instruction suitable to implement the comparison + OP0 CODE OP1. Return the correct condition RTL to be placed in + the IF_THEN_ELSE of the conditional branch testing the result. */ +@@ -984,10 +1691,18 @@ s390_emit_compare (enum rtx_code code, r + enum machine_mode mode = s390_select_ccmode (code, op0, op1); + rtx cc; + +- /* Do not output a redundant compare instruction if a compare_and_swap +- pattern already computed the result and the machine modes are compatible. */ +- if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) +- { ++ if (TARGET_VX ++ && register_operand (op0, DFmode) ++ && register_operand (op1, DFmode) ++ && s390_expand_vec_compare_scalar (&code, op0, op1, &cc)) ++ { ++ /* Work has been done by s390_expand_vec_compare_scalar already. */ ++ } ++ else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) ++ { ++ /* Do not output a redundant compare instruction if a ++ compare_and_swap pattern already computed the result and the ++ machine modes are compatible. */ + gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode) + == GET_MODE (op0)); + cc = op0; +@@ -1222,6 +1937,93 @@ s390_branch_condition_mask (rtx code) + } + break; + ++ /* Vector comparison modes. */ ++ ++ case CCVEQmode: ++ switch (GET_CODE (code)) ++ { ++ case EQ: return CC0; ++ case NE: return CC3; ++ default: return -1; ++ } ++ ++ case CCVEQANYmode: ++ switch (GET_CODE (code)) ++ { ++ case EQ: return CC0 | CC1; ++ case NE: return CC3 | CC1; ++ default: return -1; ++ } ++ ++ /* Integer vector compare modes. */ ++ ++ case CCVHmode: ++ switch (GET_CODE (code)) ++ { ++ case GT: return CC0; ++ case LE: return CC3; ++ default: return -1; ++ } ++ ++ case CCVHANYmode: ++ switch (GET_CODE (code)) ++ { ++ case GT: return CC0 | CC1; ++ case LE: return CC3 | CC1; ++ default: return -1; ++ } ++ ++ case CCVHUmode: ++ switch (GET_CODE (code)) ++ { ++ case GTU: return CC0; ++ case LEU: return CC3; ++ default: return -1; ++ } ++ ++ case CCVHUANYmode: ++ switch (GET_CODE (code)) ++ { ++ case GTU: return CC0 | CC1; ++ case LEU: return CC3 | CC1; ++ default: return -1; ++ } ++ ++ /* FP vector compare modes. */ ++ ++ case CCVFHmode: ++ switch (GET_CODE (code)) ++ { ++ case GT: return CC0; ++ case UNLE: return CC3; ++ default: return -1; ++ } ++ ++ case CCVFHANYmode: ++ switch (GET_CODE (code)) ++ { ++ case GT: return CC0 | CC1; ++ case UNLE: return CC3 | CC1; ++ default: return -1; ++ } ++ ++ case CCVFHEmode: ++ switch (GET_CODE (code)) ++ { ++ case GE: return CC0; ++ case UNLT: return CC3; ++ default: return -1; ++ } ++ ++ case CCVFHEANYmode: ++ switch (GET_CODE (code)) ++ { ++ case GE: return CC0 | CC1; ++ case UNLT: return CC3 | CC1; ++ default: return -1; ++ } ++ ++ + case CCRAWmode: + switch (GET_CODE (code)) + { +@@ -1421,6 +2223,9 @@ s390_contiguous_bitmask_p (unsigned HOST + /* Calculate a mask for all bits beyond the contiguous bits. */ + mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1)); + ++ if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT) ++ mask &= ((unsigned HOST_WIDE_INT) 1 << size) - 1; ++ + if (mask & in) + return false; + +@@ -1436,6 +2241,128 @@ s390_contiguous_bitmask_p (unsigned HOST + return true; + } + ++/* Return true if OP is a constant vector with the same constant in ++ all its elements. */ ++ ++bool ++s390_const_vec_duplicate_p (rtx op) ++{ ++ if (!VECTOR_MODE_P (GET_MODE (op)) ++ || GET_CODE (op) != CONST_VECTOR ++ || !CONST_INT_P (XVECEXP (op, 0, 0))) ++ return false; ++ ++ if (GET_MODE_NUNITS (GET_MODE (op)) > 1) ++ { ++ int i; ++ ++ for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i) ++ if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0))) ++ return false; ++ } ++ return true; ++} ++ ++/* Return true if OP contains the same contiguous bitfield in *all* ++ its elements. START and END can be used to obtain the start and ++ end position of the bitfield. ++ ++ START/STOP give the position of the first/last bit of the bitfield ++ counting from the lowest order bit starting with zero. In order to ++ use these values for S/390 instructions this has to be converted to ++ "bits big endian" style. */ ++ ++bool ++s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end) ++{ ++ unsigned HOST_WIDE_INT mask; ++ int length, size; ++ ++ if (!VECTOR_MODE_P (GET_MODE (op)) ++ || GET_CODE (op) != CONST_VECTOR ++ || !CONST_INT_P (XVECEXP (op, 0, 0))) ++ return false; ++ ++ if (GET_MODE_NUNITS (GET_MODE (op)) > 1) ++ { ++ int i; ++ ++ for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i) ++ if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0))) ++ return false; ++ } ++ ++ size = GET_MODE_UNIT_BITSIZE (GET_MODE (op)); ++ ++ /* We cannot deal with V1TI/V1TF. This would require a vgmq. */ ++ if (size > 64) ++ return false; ++ ++ mask = UINTVAL (XVECEXP (op, 0, 0)); ++ if (s390_contiguous_bitmask_p (mask, size, start, ++ end != NULL ? &length : NULL)) ++ { ++ if (end != NULL) ++ *end = *start + length - 1; ++ return true; ++ } ++ /* 0xff00000f style immediates can be covered by swapping start and ++ end indices in vgm. */ ++ if (s390_contiguous_bitmask_p (~mask, size, start, ++ end != NULL ? &length : NULL)) ++ { ++ if (end != NULL) ++ *end = *start - 1; ++ if (start != NULL) ++ *start = *start + length; ++ return true; ++ } ++ return false; ++} ++ ++/* Return true if C consists only of byte chunks being either 0 or ++ 0xff. If MASK is !=NULL a byte mask is generated which is ++ appropriate for the vector generate byte mask instruction. */ ++ ++bool ++s390_bytemask_vector_p (rtx op, unsigned *mask) ++{ ++ int i; ++ unsigned tmp_mask = 0; ++ int nunit, unit_size; ++ ++ if (!VECTOR_MODE_P (GET_MODE (op)) ++ || GET_CODE (op) != CONST_VECTOR ++ || !CONST_INT_P (XVECEXP (op, 0, 0))) ++ return false; ++ ++ nunit = GET_MODE_NUNITS (GET_MODE (op)); ++ unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op)); ++ ++ for (i = 0; i < nunit; i++) ++ { ++ unsigned HOST_WIDE_INT c; ++ int j; ++ ++ if (!CONST_INT_P (XVECEXP (op, 0, i))) ++ return false; ++ ++ c = UINTVAL (XVECEXP (op, 0, i)); ++ for (j = 0; j < unit_size; j++) ++ { ++ if ((c & 0xff) != 0 && (c & 0xff) != 0xff) ++ return false; ++ tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j); ++ c = c >> BITS_PER_UNIT; ++ } ++ } ++ ++ if (mask != NULL) ++ *mask = tmp_mask; ++ ++ return true; ++} ++ + /* Check whether a rotate of ROTL followed by an AND of CONTIG is + equivalent to a shift followed by the AND. In particular, CONTIG + should not overlap the (rotated) bit 0/bit 63 gap. Negative values +@@ -1461,8 +2388,8 @@ s390_extzv_shift_ok (int bitsize, int ro + bool + s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword) + { +- /* Floating point registers cannot be split. */ +- if (FP_REG_P (src) || FP_REG_P (dst)) ++ /* Floating point and vector registers cannot be split. */ ++ if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst)) + return false; + + /* We don't need to split if operands are directly accessible. */ +@@ -1752,6 +2679,22 @@ s390_option_override (void) + if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH) + target_flags |= MASK_OPT_HTM; + ++ if (target_flags_explicit & MASK_OPT_VX) ++ { ++ if (TARGET_OPT_VX) ++ { ++ if (!TARGET_CPU_VX) ++ error ("hardware vector support not available on %s", ++ s390_arch_string); ++ if (TARGET_SOFT_FLOAT) ++ error ("hardware vector support not available with -msoft-float"); ++ } ++ } ++ else if (TARGET_CPU_VX) ++ /* Enable vector support if available and not explicitly disabled ++ by user. E.g. with -m31 -march=z13 -mzarch */ ++ target_flags |= MASK_OPT_VX; ++ + if (TARGET_HARD_DFP && !TARGET_DFP) + { + if (target_flags_explicit & MASK_HARD_DFP) +@@ -1791,6 +2734,7 @@ s390_option_override (void) + s390_cost = &z196_cost; + break; + case PROCESSOR_2827_ZEC12: ++ case PROCESSOR_2964_Z13: + s390_cost = &zEC12_cost; + break; + default: +@@ -1818,7 +2762,8 @@ s390_option_override (void) + + if (s390_tune == PROCESSOR_2097_Z10 + || s390_tune == PROCESSOR_2817_Z196 +- || s390_tune == PROCESSOR_2827_ZEC12) ++ || s390_tune == PROCESSOR_2827_ZEC12 ++ || s390_tune == PROCESSOR_2964_Z13) + { + maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100, + global_options.x_param_values, +@@ -1882,16 +2827,20 @@ s390_option_override (void) + /* Map for smallest class containing reg regno. */ + + const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] = +-{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, +- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, +- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, +- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, +- ACCESS_REGS, ACCESS_REGS ++{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */ ++ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */ ++ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */ ++ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */ ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */ ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */ ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */ ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */ ++ ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */ ++ ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */ ++ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */ ++ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */ ++ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */ ++ VEC_REGS, VEC_REGS /* 52 */ + }; + + /* Return attribute type of insn. */ +@@ -2933,6 +3882,19 @@ legitimate_pic_operand_p (rtx op) + static bool + s390_legitimate_constant_p (enum machine_mode mode, rtx op) + { ++ if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR) ++ { ++ if (GET_MODE_SIZE (mode) != 16) ++ return 0; ++ ++ if (!satisfies_constraint_j00 (op) ++ && !satisfies_constraint_jm1 (op) ++ && !satisfies_constraint_jKK (op) ++ && !satisfies_constraint_jxx (op) ++ && !satisfies_constraint_jyy (op)) ++ return 0; ++ } ++ + /* Accept all non-symbolic constants. */ + if (!SYMBOLIC_CONST (op)) + return 1; +@@ -2969,6 +3931,7 @@ s390_cannot_force_const_mem (enum machin + { + case CONST_INT: + case CONST_DOUBLE: ++ case CONST_VECTOR: + /* Accept all non-symbolic constants. */ + return false; + +@@ -3101,6 +4064,25 @@ legitimate_reload_fp_constant_p (rtx op) + return false; + } + ++/* Returns true if the constant value OP is a legitimate vector operand ++ during and after reload. ++ This function accepts all constants which can be loaded directly ++ into an VR. */ ++ ++static bool ++legitimate_reload_vector_constant_p (rtx op) ++{ ++ if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16 ++ && (satisfies_constraint_j00 (op) ++ || satisfies_constraint_jm1 (op) ++ || satisfies_constraint_jKK (op) ++ || satisfies_constraint_jxx (op) ++ || satisfies_constraint_jyy (op))) ++ return true; ++ ++ return false; ++} ++ + /* Given an rtx OP being reloaded into a reg required to be in class RCLASS, + return the class of reg to actually use. */ + +@@ -3111,6 +4093,7 @@ s390_preferred_reload_class (rtx op, reg + { + /* Constants we cannot reload into general registers + must be forced into the literal pool. */ ++ case CONST_VECTOR: + case CONST_DOUBLE: + case CONST_INT: + if (reg_class_subset_p (GENERAL_REGS, rclass) +@@ -3122,6 +4105,10 @@ s390_preferred_reload_class (rtx op, reg + else if (reg_class_subset_p (FP_REGS, rclass) + && legitimate_reload_fp_constant_p (op)) + return FP_REGS; ++ else if (reg_class_subset_p (VEC_REGS, rclass) ++ && legitimate_reload_vector_constant_p (op)) ++ return VEC_REGS; ++ + return NO_REGS; + + /* If a symbolic constant or a PLUS is reloaded, +@@ -3245,6 +4232,7 @@ s390_reload_symref_address (rtx reg, rtx + /* Reload might have pulled a constant out of the literal pool. + Force it back in. */ + if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE ++ || GET_CODE (mem) == CONST_VECTOR + || GET_CODE (mem) == CONST) + mem = force_const_mem (GET_MODE (reg), mem); + +@@ -3284,6 +4272,30 @@ s390_secondary_reload (bool in_p, rtx x, + if (reg_classes_intersect_p (CC_REGS, rclass)) + return GENERAL_REGS; + ++ if (TARGET_VX) ++ { ++ /* The vst/vl vector move instructions allow only for short ++ displacements. */ ++ if (MEM_P (x) ++ && GET_CODE (XEXP (x, 0)) == PLUS ++ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT ++ && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1))) ++ && reg_class_subset_p (rclass, VEC_REGS) ++ && (!reg_class_subset_p (rclass, FP_REGS) ++ || (GET_MODE_SIZE (mode) > 8 ++ && s390_class_max_nregs (FP_REGS, mode) == 1))) ++ { ++ if (in_p) ++ sri->icode = (TARGET_64BIT ? ++ CODE_FOR_reloaddi_la_in : ++ CODE_FOR_reloadsi_la_in); ++ else ++ sri->icode = (TARGET_64BIT ? ++ CODE_FOR_reloaddi_la_out : ++ CODE_FOR_reloadsi_la_out); ++ } ++ } ++ + if (TARGET_Z10) + { + HOST_WIDE_INT offset; +@@ -3299,17 +4311,15 @@ s390_secondary_reload (bool in_p, rtx x, + sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10 + : CODE_FOR_reloadsi_larl_odd_addend_z10); + +- /* On z10 we need a scratch register when moving QI, TI or floating +- point mode values from or to a memory location with a SYMBOL_REF +- or if the symref addend of a SI or DI move is not aligned to the +- width of the access. */ ++ /* Handle all the (mem (symref)) accesses we cannot use the z10 ++ instructions for. */ + if (MEM_P (x) + && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL) +- && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode) +- || (!TARGET_ZARCH && mode == DImode) +- || ((mode == HImode || mode == SImode || mode == DImode) +- && (!s390_check_symref_alignment (XEXP (x, 0), +- GET_MODE_SIZE (mode)))))) ++ && (mode == QImode ++ || !reg_class_subset_p (rclass, GENERAL_REGS) ++ || GET_MODE_SIZE (mode) > UNITS_PER_WORD ++ || !s390_check_symref_alignment (XEXP (x, 0), ++ GET_MODE_SIZE (mode)))) + { + #define __SECONDARY_RELOAD_CASE(M,m) \ + case M##mode: \ +@@ -3334,7 +4344,27 @@ s390_secondary_reload (bool in_p, rtx x, + __SECONDARY_RELOAD_CASE (SD, sd); + __SECONDARY_RELOAD_CASE (DD, dd); + __SECONDARY_RELOAD_CASE (TD, td); +- ++ __SECONDARY_RELOAD_CASE (V1QI, v1qi); ++ __SECONDARY_RELOAD_CASE (V2QI, v2qi); ++ __SECONDARY_RELOAD_CASE (V4QI, v4qi); ++ __SECONDARY_RELOAD_CASE (V8QI, v8qi); ++ __SECONDARY_RELOAD_CASE (V16QI, v16qi); ++ __SECONDARY_RELOAD_CASE (V1HI, v1hi); ++ __SECONDARY_RELOAD_CASE (V2HI, v2hi); ++ __SECONDARY_RELOAD_CASE (V4HI, v4hi); ++ __SECONDARY_RELOAD_CASE (V8HI, v8hi); ++ __SECONDARY_RELOAD_CASE (V1SI, v1si); ++ __SECONDARY_RELOAD_CASE (V2SI, v2si); ++ __SECONDARY_RELOAD_CASE (V4SI, v4si); ++ __SECONDARY_RELOAD_CASE (V1DI, v1di); ++ __SECONDARY_RELOAD_CASE (V2DI, v2di); ++ __SECONDARY_RELOAD_CASE (V1TI, v1ti); ++ __SECONDARY_RELOAD_CASE (V1SF, v1sf); ++ __SECONDARY_RELOAD_CASE (V2SF, v2sf); ++ __SECONDARY_RELOAD_CASE (V4SF, v4sf); ++ __SECONDARY_RELOAD_CASE (V1DF, v1df); ++ __SECONDARY_RELOAD_CASE (V2DF, v2df); ++ __SECONDARY_RELOAD_CASE (V1TF, v1tf); + default: + gcc_unreachable (); + } +@@ -3371,12 +4401,12 @@ s390_secondary_reload (bool in_p, rtx x, + { + if (in_p) + sri->icode = (TARGET_64BIT ? +- CODE_FOR_reloaddi_nonoffmem_in : +- CODE_FOR_reloadsi_nonoffmem_in); ++ CODE_FOR_reloaddi_la_in : ++ CODE_FOR_reloadsi_la_in); + else + sri->icode = (TARGET_64BIT ? +- CODE_FOR_reloaddi_nonoffmem_out : +- CODE_FOR_reloadsi_nonoffmem_out); ++ CODE_FOR_reloaddi_la_out : ++ CODE_FOR_reloadsi_la_out); + } + } + +@@ -4610,6 +5640,256 @@ s390_expand_cmpmem (rtx target, rtx op0, + return true; + } + ++/* Emit a conditional jump to LABEL for condition code mask MASK using ++ comparsion operator COMPARISON. Return the emitted jump insn. */ ++ ++static rtx ++s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label) ++{ ++ rtx temp; ++ ++ gcc_assert (comparison == EQ || comparison == NE); ++ gcc_assert (mask > 0 && mask < 15); ++ ++ temp = gen_rtx_fmt_ee (comparison, VOIDmode, ++ gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask)); ++ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, ++ gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); ++ temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); ++ return emit_jump_insn (temp); ++} ++ ++/* Emit the instructions to implement strlen of STRING and store the ++ result in TARGET. The string has the known ALIGNMENT. This ++ version uses vector instructions and is therefore not appropriate ++ for targets prior to z13. */ ++ ++void ++s390_expand_vec_strlen (rtx target, rtx string, rtx alignment) ++{ ++ int very_unlikely = REG_BR_PROB_BASE / 100 - 1; ++ int very_likely = REG_BR_PROB_BASE - 1; ++ rtx highest_index_to_load_reg = gen_reg_rtx (Pmode); ++ rtx str_reg = gen_reg_rtx (V16QImode); ++ rtx str_addr_base_reg = gen_reg_rtx (Pmode); ++ rtx str_idx_reg = gen_reg_rtx (Pmode); ++ rtx result_reg = gen_reg_rtx (V16QImode); ++ rtx is_aligned_label = gen_label_rtx (); ++ rtx into_loop_label = NULL_RTX; ++ rtx loop_start_label = gen_label_rtx (); ++ rtx temp; ++ rtx len = gen_reg_rtx (QImode); ++ rtx cond; ++ ++ s390_load_address (str_addr_base_reg, XEXP (string, 0)); ++ emit_move_insn (str_idx_reg, const0_rtx); ++ ++ if (INTVAL (alignment) < 16) ++ { ++ /* Check whether the address happens to be aligned properly so ++ jump directly to the aligned loop. */ ++ emit_cmp_and_jump_insns (gen_rtx_AND (Pmode, ++ str_addr_base_reg, GEN_INT (15)), ++ const0_rtx, EQ, NULL_RTX, ++ Pmode, 1, is_aligned_label); ++ ++ temp = gen_reg_rtx (Pmode); ++ temp = expand_binop (Pmode, and_optab, str_addr_base_reg, ++ GEN_INT (15), temp, 1, OPTAB_DIRECT); ++ gcc_assert (REG_P (temp)); ++ highest_index_to_load_reg = ++ expand_binop (Pmode, sub_optab, GEN_INT (15), temp, ++ highest_index_to_load_reg, 1, OPTAB_DIRECT); ++ gcc_assert (REG_P (highest_index_to_load_reg)); ++ emit_insn (gen_vllv16qi (str_reg, ++ convert_to_mode (SImode, highest_index_to_load_reg, 1), ++ gen_rtx_MEM (BLKmode, str_addr_base_reg))); ++ ++ into_loop_label = gen_label_rtx (); ++ s390_emit_jump (into_loop_label, NULL_RTX); ++ emit_barrier (); ++ } ++ ++ emit_label (is_aligned_label); ++ LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1; ++ ++ /* Reaching this point we are only performing 16 bytes aligned ++ loads. */ ++ emit_move_insn (highest_index_to_load_reg, GEN_INT (15)); ++ ++ emit_label (loop_start_label); ++ LABEL_NUSES (loop_start_label) = 1; ++ ++ /* Load 16 bytes of the string into VR. */ ++ emit_move_insn (str_reg, ++ gen_rtx_MEM (V16QImode, ++ gen_rtx_PLUS (Pmode, str_idx_reg, ++ str_addr_base_reg))); ++ if (into_loop_label != NULL_RTX) ++ { ++ emit_label (into_loop_label); ++ LABEL_NUSES (into_loop_label) = 1; ++ } ++ ++ /* Increment string index by 16 bytes. */ ++ expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16), ++ str_idx_reg, 1, OPTAB_DIRECT); ++ ++ emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg, ++ GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); ++ ++ add_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label), ++ REG_BR_PROB, GEN_INT (very_likely)); ++ emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7))); ++ ++ /* If the string pointer wasn't aligned we have loaded less then 16 ++ bytes and the remaining bytes got filled with zeros (by vll). ++ Now we have to check whether the resulting index lies within the ++ bytes actually part of the string. */ ++ ++ cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1), ++ highest_index_to_load_reg); ++ s390_load_address (highest_index_to_load_reg, ++ gen_rtx_PLUS (Pmode, highest_index_to_load_reg, ++ const1_rtx)); ++ if (TARGET_64BIT) ++ emit_insn (gen_movdicc (str_idx_reg, cond, ++ highest_index_to_load_reg, str_idx_reg)); ++ else ++ emit_insn (gen_movsicc (str_idx_reg, cond, ++ highest_index_to_load_reg, str_idx_reg)); ++ ++ add_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB, ++ GEN_INT (very_unlikely)); ++ ++ expand_binop (Pmode, add_optab, str_idx_reg, ++ GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT); ++ /* FIXME: len is already zero extended - so avoid the llgcr emitted ++ here. */ ++ temp = expand_binop (Pmode, add_optab, str_idx_reg, ++ convert_to_mode (Pmode, len, 1), ++ target, 1, OPTAB_DIRECT); ++ if (temp != target) ++ emit_move_insn (target, temp); ++} ++ ++void ++s390_expand_vec_movstr (rtx result, rtx dst, rtx src) ++{ ++ int very_unlikely = REG_BR_PROB_BASE / 100 - 1; ++ rtx temp = gen_reg_rtx (Pmode); ++ rtx src_addr = XEXP (src, 0); ++ rtx dst_addr = XEXP (dst, 0); ++ rtx src_addr_reg = gen_reg_rtx (Pmode); ++ rtx dst_addr_reg = gen_reg_rtx (Pmode); ++ rtx offset = gen_reg_rtx (Pmode); ++ rtx vsrc = gen_reg_rtx (V16QImode); ++ rtx vpos = gen_reg_rtx (V16QImode); ++ rtx loadlen = gen_reg_rtx (SImode); ++ rtx gpos_qi = gen_reg_rtx(QImode); ++ rtx gpos = gen_reg_rtx (SImode); ++ rtx done_label = gen_label_rtx (); ++ rtx loop_label = gen_label_rtx (); ++ rtx exit_label = gen_label_rtx (); ++ rtx full_label = gen_label_rtx (); ++ ++ /* Perform a quick check for string ending on the first up to 16 ++ bytes and exit early if successful. */ ++ ++ emit_insn (gen_vlbb (vsrc, src, GEN_INT (6))); ++ emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6))); ++ emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc)); ++ emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7))); ++ emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0)); ++ /* gpos is the byte index if a zero was found and 16 otherwise. ++ So if it is lower than the loaded bytes we have a hit. */ ++ emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1, ++ full_label); ++ emit_insn (gen_vstlv16qi (vsrc, gpos, dst)); ++ ++ force_expand_binop (Pmode, add_optab, dst_addr, gpos, result, ++ 1, OPTAB_DIRECT); ++ emit_jump (exit_label); ++ emit_barrier (); ++ ++ emit_label (full_label); ++ LABEL_NUSES (full_label) = 1; ++ ++ /* Calculate `offset' so that src + offset points to the last byte ++ before 16 byte alignment. */ ++ ++ /* temp = src_addr & 0xf */ ++ force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp, ++ 1, OPTAB_DIRECT); ++ ++ /* offset = 0xf - temp */ ++ emit_move_insn (offset, GEN_INT (15)); ++ force_expand_binop (Pmode, sub_optab, offset, temp, offset, ++ 1, OPTAB_DIRECT); ++ ++ /* Store `offset' bytes in the dstination string. The quick check ++ has loaded at least `offset' bytes into vsrc. */ ++ ++ emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst)); ++ ++ /* Advance to the next byte to be loaded. */ ++ force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset, ++ 1, OPTAB_DIRECT); ++ ++ /* Make sure the addresses are single regs which can be used as a ++ base. */ ++ emit_move_insn (src_addr_reg, src_addr); ++ emit_move_insn (dst_addr_reg, dst_addr); ++ ++ /* MAIN LOOP */ ++ ++ emit_label (loop_label); ++ LABEL_NUSES (loop_label) = 1; ++ ++ emit_move_insn (vsrc, ++ gen_rtx_MEM (V16QImode, ++ gen_rtx_PLUS (Pmode, src_addr_reg, offset))); ++ ++ emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc, ++ GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); ++ add_reg_note (s390_emit_ccraw_jump (8, EQ, done_label), ++ REG_BR_PROB, GEN_INT (very_unlikely)); ++ ++ emit_move_insn (gen_rtx_MEM (V16QImode, ++ gen_rtx_PLUS (Pmode, dst_addr_reg, offset)), ++ vsrc); ++ /* offset += 16 */ ++ force_expand_binop (Pmode, add_optab, offset, GEN_INT (16), ++ offset, 1, OPTAB_DIRECT); ++ ++ emit_jump (loop_label); ++ emit_barrier (); ++ ++ /* REGULAR EXIT */ ++ ++ /* We are done. Add the offset of the zero character to the dst_addr ++ pointer to get the result. */ ++ ++ emit_label (done_label); ++ LABEL_NUSES (done_label) = 1; ++ ++ force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg, ++ 1, OPTAB_DIRECT); ++ ++ emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7))); ++ emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0)); ++ ++ emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg))); ++ ++ force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result, ++ 1, OPTAB_DIRECT); ++ ++ /* EARLY EXIT */ ++ ++ emit_label (exit_label); ++ LABEL_NUSES (exit_label) = 1; ++} ++ + + /* Expand conditional increment or decrement using alc/slb instructions. + Should generate code setting DST to either SRC or SRC + INCREMENT, +@@ -4964,6 +6244,304 @@ s390_expand_mask_and_shift (rtx val, enu + NULL_RTX, 1, OPTAB_DIRECT); + } + ++/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store ++ the result in TARGET. */ ++ ++void ++s390_expand_vec_compare (rtx target, enum rtx_code cond, ++ rtx cmp_op1, rtx cmp_op2) ++{ ++ enum machine_mode mode = GET_MODE (target); ++ bool neg_p = false, swap_p = false; ++ rtx tmp; ++ ++ if (GET_MODE (cmp_op1) == V2DFmode) ++ { ++ switch (cond) ++ { ++ /* NE a != b -> !(a == b) */ ++ case NE: cond = EQ; neg_p = true; break; ++ /* UNGT a u> b -> !(b >= a) */ ++ case UNGT: cond = GE; neg_p = true; swap_p = true; break; ++ /* UNGE a u>= b -> !(b > a) */ ++ case UNGE: cond = GT; neg_p = true; swap_p = true; break; ++ /* LE: a <= b -> b >= a */ ++ case LE: cond = GE; swap_p = true; break; ++ /* UNLE: a u<= b -> !(a > b) */ ++ case UNLE: cond = GT; neg_p = true; break; ++ /* LT: a < b -> b > a */ ++ case LT: cond = GT; swap_p = true; break; ++ /* UNLT: a u< b -> !(a >= b) */ ++ case UNLT: cond = GE; neg_p = true; break; ++ case UNEQ: ++ emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2)); ++ return; ++ case LTGT: ++ emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2)); ++ return; ++ case ORDERED: ++ emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2)); ++ return; ++ case UNORDERED: ++ emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2)); ++ return; ++ default: break; ++ } ++ } ++ else ++ { ++ switch (cond) ++ { ++ /* NE: a != b -> !(a == b) */ ++ case NE: cond = EQ; neg_p = true; break; ++ /* GE: a >= b -> !(b > a) */ ++ case GE: cond = GT; neg_p = true; swap_p = true; break; ++ /* GEU: a >= b -> !(b > a) */ ++ case GEU: cond = GTU; neg_p = true; swap_p = true; break; ++ /* LE: a <= b -> !(a > b) */ ++ case LE: cond = GT; neg_p = true; break; ++ /* LEU: a <= b -> !(a > b) */ ++ case LEU: cond = GTU; neg_p = true; break; ++ /* LT: a < b -> b > a */ ++ case LT: cond = GT; swap_p = true; break; ++ /* LTU: a < b -> b > a */ ++ case LTU: cond = GTU; swap_p = true; break; ++ default: break; ++ } ++ } ++ ++ if (swap_p) ++ { ++ tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp; ++ } ++ ++ emit_insn (gen_rtx_SET (VOIDmode, ++ target, gen_rtx_fmt_ee (cond, ++ mode, ++ cmp_op1, cmp_op2))); ++ if (neg_p) ++ emit_insn (gen_rtx_SET (VOIDmode, target, gen_rtx_NOT (mode, target))); ++} ++ ++/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into ++ TARGET if either all (ALL_P is true) or any (ALL_P is false) of the ++ elements in CMP1 and CMP2 fulfill the comparison. */ ++void ++s390_expand_vec_compare_cc (rtx target, enum rtx_code code, ++ rtx cmp1, rtx cmp2, bool all_p) ++{ ++ enum rtx_code new_code = code; ++ enum machine_mode cmp_mode, full_cmp_mode, scratch_mode; ++ rtx tmp_reg = gen_reg_rtx (SImode); ++ bool swap_p = false; ++ ++ if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT) ++ { ++ switch (code) ++ { ++ case EQ: cmp_mode = CCVEQmode; break; ++ case NE: cmp_mode = CCVEQmode; break; ++ case GT: cmp_mode = CCVHmode; break; ++ case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break; ++ case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break; ++ case LE: cmp_mode = CCVHmode; new_code = LE; break; ++ case GTU: cmp_mode = CCVHUmode; break; ++ case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break; ++ case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break; ++ case LEU: cmp_mode = CCVHUmode; new_code = LEU; break; ++ default: gcc_unreachable (); ++ } ++ scratch_mode = GET_MODE (cmp1); ++ } ++ else if (GET_MODE (cmp1) == V2DFmode) ++ { ++ switch (code) ++ { ++ case EQ: cmp_mode = CCVEQmode; break; ++ case NE: cmp_mode = CCVEQmode; break; ++ case GT: cmp_mode = CCVFHmode; break; ++ case GE: cmp_mode = CCVFHEmode; break; ++ case UNLE: cmp_mode = CCVFHmode; break; ++ case UNLT: cmp_mode = CCVFHEmode; break; ++ case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break; ++ case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break; ++ default: gcc_unreachable (); ++ } ++ scratch_mode = V2DImode; ++ } ++ else ++ gcc_unreachable (); ++ ++ if (!all_p) ++ switch (cmp_mode) ++ { ++ case CCVEQmode: full_cmp_mode = CCVEQANYmode; break; ++ case CCVHmode: full_cmp_mode = CCVHANYmode; break; ++ case CCVHUmode: full_cmp_mode = CCVHUANYmode; break; ++ case CCVFHmode: full_cmp_mode = CCVFHANYmode; break; ++ case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break; ++ default: gcc_unreachable (); ++ } ++ else ++ /* The modes without ANY match the ALL modes. */ ++ full_cmp_mode = cmp_mode; ++ ++ if (swap_p) ++ { ++ rtx tmp = cmp2; ++ cmp2 = cmp1; ++ cmp1 = tmp; ++ } ++ ++ emit_insn (gen_rtx_PARALLEL (VOIDmode, ++ gen_rtvec (2, gen_rtx_SET ( ++ VOIDmode, ++ gen_rtx_REG (cmp_mode, CC_REGNUM), ++ gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)), ++ gen_rtx_CLOBBER (VOIDmode, ++ gen_rtx_SCRATCH (scratch_mode))))); ++ emit_move_insn (target, const0_rtx); ++ emit_move_insn (tmp_reg, const1_rtx); ++ ++ emit_move_insn (target, ++ gen_rtx_IF_THEN_ELSE (SImode, ++ gen_rtx_fmt_ee (new_code, VOIDmode, ++ gen_rtx_REG (full_cmp_mode, CC_REGNUM), ++ const0_rtx), ++ target, tmp_reg)); ++} ++ ++/* Generate a vector comparison expression loading either elements of ++ THEN or ELS into TARGET depending on the comparison COND of CMP_OP1 ++ and CMP_OP2. */ ++ ++void ++s390_expand_vcond (rtx target, rtx then, rtx els, ++ enum rtx_code cond, rtx cmp_op1, rtx cmp_op2) ++{ ++ rtx tmp; ++ enum machine_mode result_mode; ++ rtx result_target; ++ ++ /* We always use an integral type vector to hold the comparison ++ result. */ ++ result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1); ++ result_target = gen_reg_rtx (result_mode); ++ ++ /* Alternatively this could be done by reload by lowering the cmp* ++ predicates. But it appears to be better for scheduling etc. to ++ have that in early. */ ++ if (!REG_P (cmp_op1)) ++ cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1); ++ ++ if (!REG_P (cmp_op2)) ++ cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2); ++ ++ s390_expand_vec_compare (result_target, cond, ++ cmp_op1, cmp_op2); ++ ++ /* If the results are supposed to be either -1 or 0 we are done ++ since this is what our compare instructions generate anyway. */ ++ if (all_ones_operand (then, GET_MODE (then)) ++ && const0_operand (els, GET_MODE (els))) ++ { ++ emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target), ++ result_target, 0)); ++ return; ++ } ++ ++ /* Otherwise we will do a vsel afterwards. */ ++ /* This gets triggered e.g. ++ with gcc.c-torture/compile/pr53410-1.c */ ++ if (!REG_P (then)) ++ then = force_reg (GET_MODE (target), then); ++ ++ if (!REG_P (els)) ++ els = force_reg (GET_MODE (target), els); ++ ++ tmp = gen_rtx_fmt_ee (EQ, VOIDmode, ++ result_target, ++ CONST0_RTX (result_mode)); ++ ++ /* We compared the result against zero above so we have to swap then ++ and els here. */ ++ tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then); ++ ++ gcc_assert (GET_MODE (target) == GET_MODE (then)); ++ emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); ++} ++ ++/* Emit the RTX necessary to initialize the vector TARGET with values ++ in VALS. */ ++void ++s390_expand_vec_init (rtx target, rtx vals) ++{ ++ enum machine_mode mode = GET_MODE (target); ++ enum machine_mode inner_mode = GET_MODE_INNER (mode); ++ int n_elts = GET_MODE_NUNITS (mode); ++ bool all_same = true, all_regs = true, all_const_int = true; ++ rtx x; ++ int i; ++ ++ for (i = 0; i < n_elts; ++i) ++ { ++ x = XVECEXP (vals, 0, i); ++ ++ if (!CONST_INT_P (x)) ++ all_const_int = false; ++ ++ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) ++ all_same = false; ++ ++ if (!REG_P (x)) ++ all_regs = false; ++ } ++ ++ /* Use vector gen mask or vector gen byte mask if possible. */ ++ if (all_same && all_const_int ++ && (XVECEXP (vals, 0, 0) == const0_rtx ++ || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0), ++ NULL, NULL) ++ || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL))) ++ { ++ emit_insn (gen_rtx_SET (VOIDmode, target, ++ gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)))); ++ return; ++ } ++ ++ if (all_same) ++ { ++ emit_insn (gen_rtx_SET (VOIDmode, target, ++ gen_rtx_VEC_DUPLICATE (mode, ++ XVECEXP (vals, 0, 0)))); ++ return; ++ } ++ ++ if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode) ++ { ++ /* Use vector load pair. */ ++ emit_insn (gen_rtx_SET (VOIDmode, target, ++ gen_rtx_VEC_CONCAT (mode, ++ XVECEXP (vals, 0, 0), ++ XVECEXP (vals, 0, 1)))); ++ return; ++ } ++ ++ /* We are about to set the vector elements one by one. Zero out the ++ full register first in order to help the data flow framework to ++ detect it as full VR set. */ ++ emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode))); ++ ++ /* Unfortunately the vec_init expander is not allowed to fail. So ++ we have to implement the fallback ourselves. */ ++ for (i = 0; i < n_elts; i++) ++ emit_insn (gen_rtx_SET (VOIDmode, target, ++ gen_rtx_UNSPEC (mode, ++ gen_rtvec (3, XVECEXP (vals, 0, i), ++ GEN_INT (i), target), ++ UNSPEC_VEC_SET))); ++} ++ + /* Structure to hold the initial parameters for a compare_and_swap operation + in HImode and QImode. */ + +@@ -5259,12 +6837,37 @@ s390_output_dwarf_dtprel (FILE *file, in + fputs ("@DTPOFF", file); + } + ++/* Return the proper mode for REGNO being represented in the dwarf ++ unwind table. */ ++enum machine_mode ++s390_dwarf_frame_reg_mode (int regno) ++{ ++ enum machine_mode save_mode = default_dwarf_frame_reg_mode (regno); ++ ++ /* The rightmost 64 bits of vector registers are call-clobbered. */ ++ if (GET_MODE_SIZE (save_mode) > 8) ++ save_mode = DImode; ++ ++ return save_mode; ++} ++ + #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING + /* Implement TARGET_MANGLE_TYPE. */ + + static const char * + s390_mangle_type (const_tree type) + { ++ type = TYPE_MAIN_VARIANT (type); ++ ++ if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE ++ && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) ++ return NULL; ++ ++ if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc"; ++ if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools"; ++ if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli"; ++ if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll"; ++ + if (TYPE_MAIN_VARIANT (type) == long_double_type_node + && TARGET_LONG_DOUBLE_128) + return "g"; +@@ -5627,24 +7230,26 @@ print_operand_address (FILE *file, rtx a + 'J': print tls_load/tls_gdcall/tls_ldcall suffix + 'M': print the second word of a TImode operand. + 'N': print the second word of a DImode operand. +- 'O': print only the displacement of a memory reference. +- 'R': print only the base register of a memory reference. ++ 'O': print only the displacement of a memory reference or address. ++ 'R': print only the base register of a memory reference or address. + 'S': print S-type memory reference (base+displacement). + 'Y': print shift count operand. + + 'b': print integer X as if it's an unsigned byte. + 'c': print integer X as if it's an signed byte. +- 'e': "end" of DImode contiguous bitmask X. +- 'f': "end" of SImode contiguous bitmask X. ++ 'e': "end" contiguous bitmask X in either DImode or vector inner mode. ++ 'f': "end" contiguous bitmask X in SImode. + 'h': print integer X as if it's a signed halfword. + 'i': print the first nonzero HImode part of X. + 'j': print the first HImode part unequal to -1 of X. + 'k': print the first nonzero SImode part of X. + 'm': print the first SImode part unequal to -1 of X. + 'o': print integer X as if it's an unsigned 32bit word. +- 's': "start" of DImode contiguous bitmask X. +- 't': "start" of SImode contiguous bitmask X. ++ 's': "start" of contiguous bitmask X in either DImode or vector inner mode. ++ 't': CONST_INT: "start" of contiguous bitmask X in SImode. ++ CONST_VECTOR: Generate a bitmask for vgbm instruction. + 'x': print integer X as if it's an unsigned halfword. ++ 'v': print register number as vector register (v1 instead of f1). + */ + + void +@@ -5701,14 +7306,7 @@ print_operand (FILE *file, rtx x, int co + struct s390_address ad; + int ret; + +- if (!MEM_P (x)) +- { +- output_operand_lossage ("memory reference expected for " +- "'O' output modifier"); +- return; +- } +- +- ret = s390_decompose_address (XEXP (x, 0), &ad); ++ ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); + + if (!ret + || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) +@@ -5730,14 +7328,7 @@ print_operand (FILE *file, rtx x, int co + struct s390_address ad; + int ret; + +- if (!MEM_P (x)) +- { +- output_operand_lossage ("memory reference expected for " +- "'R' output modifier"); +- return; +- } +- +- ret = s390_decompose_address (XEXP (x, 0), &ad); ++ ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); + + if (!ret + || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) +@@ -5815,7 +7406,17 @@ print_operand (FILE *file, rtx x, int co + switch (GET_CODE (x)) + { + case REG: +- fprintf (file, "%s", reg_names[REGNO (x)]); ++ /* Print FP regs as fx instead of vx when they are accessed ++ through non-vector mode. */ ++ if (code == 'v' ++ || VECTOR_NOFP_REG_P (x) ++ || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x))) ++ || (VECTOR_REG_P (x) ++ && (GET_MODE_SIZE (GET_MODE (x)) / ++ s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8)) ++ fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2); ++ else ++ fprintf (file, "%s", reg_names[REGNO (x)]); + break; + + case MEM: +@@ -5902,6 +7503,44 @@ print_operand (FILE *file, rtx x, int co + code); + } + break; ++ case CONST_VECTOR: ++ switch (code) ++ { ++ case 'h': ++ gcc_assert (s390_const_vec_duplicate_p (x)); ++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, ++ ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000); ++ break; ++ case 'e': ++ case 's': ++ { ++ int start, stop, inner_len; ++ bool ok; ++ ++ inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x)); ++ ok = s390_contiguous_bitmask_vector_p (x, &start, &stop); ++ gcc_assert (ok); ++ if (code == 's' || code == 't') ++ ival = inner_len - stop - 1; ++ else ++ ival = inner_len - start - 1; ++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival); ++ } ++ break; ++ case 't': ++ { ++ unsigned mask; ++ bool ok = s390_bytemask_vector_p (x, &mask); ++ gcc_assert (ok); ++ fprintf (file, "%u", mask); ++ } ++ break; ++ ++ default: ++ output_operand_lossage ("invalid constant vector for output " ++ "modifier '%c'", code); ++ } ++ break; + + default: + if (code == 0) +@@ -6051,7 +7690,8 @@ s390_adjust_priority (rtx insn ATTRIBUTE + && s390_tune != PROCESSOR_2094_Z9_109 + && s390_tune != PROCESSOR_2097_Z10 + && s390_tune != PROCESSOR_2817_Z196 +- && s390_tune != PROCESSOR_2827_ZEC12) ++ && s390_tune != PROCESSOR_2827_ZEC12 ++ && s390_tune != PROCESSOR_2964_Z13) + return priority; + + switch (s390_safe_attr_type (insn)) +@@ -6459,14 +8099,20 @@ replace_ltrel_base (rtx *x) + /* We keep a list of constants which we have to add to internal + constant tables in the middle of large functions. */ + +-#define NR_C_MODES 11 ++#define NR_C_MODES 32 + enum machine_mode constant_modes[NR_C_MODES] = + { + TFmode, TImode, TDmode, ++ V16QImode, V8HImode, V4SImode, V2DImode, V1TImode, ++ V4SFmode, V2DFmode, V1TFmode, + DFmode, DImode, DDmode, ++ V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode, + SFmode, SImode, SDmode, ++ V4QImode, V2HImode, V1SImode, V1SFmode, + HImode, +- QImode ++ V2QImode, V1HImode, ++ QImode, ++ V1QImode + }; + + struct constant +@@ -7490,6 +9136,23 @@ s390_output_pool_entry (rtx exp, enum ma + mark_symbol_refs_as_used (exp); + break; + ++ case MODE_VECTOR_INT: ++ case MODE_VECTOR_FLOAT: ++ { ++ int i; ++ enum machine_mode inner_mode; ++ gcc_assert (GET_CODE (exp) == CONST_VECTOR); ++ ++ inner_mode = GET_MODE_INNER (GET_MODE (exp)); ++ for (i = 0; i < XVECLEN (exp, 0); i++) ++ s390_output_pool_entry (XVECEXP (exp, 0, i), ++ inner_mode, ++ i == 0 ++ ? align ++ : GET_MODE_BITSIZE (inner_mode)); ++ } ++ break; ++ + default: + gcc_unreachable (); + } +@@ -8205,9 +9868,25 @@ s390_update_frame_layout (void) + bool + s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) + { ++ if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno)) ++ return false; ++ + switch (REGNO_REG_CLASS (regno)) + { ++ case VEC_REGS: ++ return ((GET_MODE_CLASS (mode) == MODE_INT ++ && s390_class_max_nregs (VEC_REGS, mode) == 1) ++ || mode == DFmode ++ || s390_vector_mode_supported_p (mode)); ++ break; + case FP_REGS: ++ if (TARGET_VX ++ && ((GET_MODE_CLASS (mode) == MODE_INT ++ && s390_class_max_nregs (FP_REGS, mode) == 1) ++ || mode == DFmode ++ || s390_vector_mode_supported_p (mode))) ++ return true; ++ + if (REGNO_PAIR_OK (regno, mode)) + { + if (mode == SImode || mode == DImode) +@@ -8269,19 +9948,86 @@ s390_hard_regno_rename_ok (unsigned int + int + s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode) + { ++ int reg_size; ++ bool reg_pair_required_p = false; ++ + switch (rclass) + { + case FP_REGS: ++ case VEC_REGS: ++ reg_size = TARGET_VX ? 16 : 8; ++ ++ /* TF and TD modes would fit into a VR but we put them into a ++ register pair since we do not have 128bit FP instructions on ++ full VRs. */ ++ if (TARGET_VX ++ && SCALAR_FLOAT_MODE_P (mode) ++ && GET_MODE_SIZE (mode) >= 16) ++ reg_pair_required_p = true; ++ ++ /* Even if complex types would fit into a single FPR/VR we force ++ them into a register pair to deal with the parts more easily. ++ (FIXME: What about complex ints?) */ + if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) +- return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8); +- else +- return (GET_MODE_SIZE (mode) + 8 - 1) / 8; ++ reg_pair_required_p = true; ++ break; + case ACCESS_REGS: +- return (GET_MODE_SIZE (mode) + 4 - 1) / 4; ++ reg_size = 4; ++ break; + default: ++ reg_size = UNITS_PER_WORD; + break; + } +- return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; ++ ++ if (reg_pair_required_p) ++ return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size); ++ ++ return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size; ++} ++ ++/* Return TRUE if changing mode from FROM to TO should not be allowed ++ for register class CLASS. */ ++ ++int ++s390_cannot_change_mode_class (enum machine_mode from_mode, ++ enum machine_mode to_mode, ++ enum reg_class rclass) ++{ ++ enum machine_mode small_mode; ++ enum machine_mode big_mode; ++ ++ if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode)) ++ return 0; ++ ++ if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode)) ++ { ++ small_mode = from_mode; ++ big_mode = to_mode; ++ } ++ else ++ { ++ small_mode = to_mode; ++ big_mode = from_mode; ++ } ++ ++ /* Values residing in VRs are little-endian style. All modes are ++ placed left-aligned in an VR. This means that we cannot allow ++ switching between modes with differing sizes. Also if the vector ++ facility is available we still place TFmode values in VR register ++ pairs, since the only instructions we have operating on TFmodes ++ only deal with register pairs. Therefore we have to allow DFmode ++ subregs of TFmodes to enable the TFmode splitters. */ ++ if (reg_classes_intersect_p (VEC_REGS, rclass) ++ && (GET_MODE_SIZE (small_mode) < 8 ++ || s390_class_max_nregs (VEC_REGS, big_mode) == 1)) ++ return 1; ++ ++ /* Likewise for access registers, since they have only half the ++ word size on 64-bit. */ ++ if (reg_classes_intersect_p (ACCESS_REGS, rclass)) ++ return 1; ++ ++ return 0; + } + + /* Return true if register FROM can be eliminated via register TO. */ +@@ -9112,6 +10858,23 @@ s390_emit_epilogue (bool sibcall) + } + + ++/* The VX ABI differs for vararg functions. Therefore we need the ++ prototype of the callee to be available when passing vector type ++ values. */ ++static const char * ++s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val) ++{ ++ return ((TARGET_VX_ABI ++ && typelist == 0 ++ && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE ++ && (funcdecl == NULL_TREE ++ || (TREE_CODE (funcdecl) == FUNCTION_DECL ++ && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD))) ++ ? N_("Vector argument passed to unprototyped function") ++ : NULL); ++} ++ ++ + /* Return the size in bytes of a function argument of + type TYPE and/or mode MODE. At least one of TYPE or + MODE must be specified. */ +@@ -9126,8 +10889,57 @@ s390_function_arg_size (enum machine_mod + if (mode != BLKmode) + return GET_MODE_SIZE (mode); + +- /* If we have neither type nor mode, abort */ +- gcc_unreachable (); ++ /* If we have neither type nor mode, abort */ ++ gcc_unreachable (); ++} ++ ++/* Return true if a function argument of type TYPE and mode MODE ++ is to be passed in a vector register, if available. */ ++ ++bool ++s390_function_arg_vector (enum machine_mode mode, const_tree type) ++{ ++ if (!TARGET_VX_ABI) ++ return false; ++ ++ if (s390_function_arg_size (mode, type) > 16) ++ return false; ++ ++ /* No type info available for some library calls ... */ ++ if (!type) ++ return VECTOR_MODE_P (mode); ++ ++ /* The ABI says that record types with a single member are treated ++ just like that member would be. */ ++ while (TREE_CODE (type) == RECORD_TYPE) ++ { ++ tree field, single = NULL_TREE; ++ ++ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) != FIELD_DECL) ++ continue; ++ ++ if (single == NULL_TREE) ++ single = TREE_TYPE (field); ++ else ++ return false; ++ } ++ ++ if (single == NULL_TREE) ++ return false; ++ else ++ { ++ /* If the field declaration adds extra byte due to ++ e.g. padding this is not accepted as vector type. */ ++ if (int_size_in_bytes (single) <= 0 ++ || int_size_in_bytes (single) != int_size_in_bytes (type)) ++ return false; ++ type = single; ++ } ++ } ++ ++ return TREE_CODE (type) == VECTOR_TYPE; + } + + /* Return true if a function argument of type TYPE and mode MODE +@@ -9136,8 +10948,7 @@ s390_function_arg_size (enum machine_mod + static bool + s390_function_arg_float (enum machine_mode mode, const_tree type) + { +- int size = s390_function_arg_size (mode, type); +- if (size > 8) ++ if (s390_function_arg_size (mode, type) > 8) + return false; + + /* Soft-float changes the ABI: no floating-point registers are used. */ +@@ -9220,20 +11031,24 @@ s390_pass_by_reference (cumulative_args_ + bool named ATTRIBUTE_UNUSED) + { + int size = s390_function_arg_size (mode, type); ++ ++ if (s390_function_arg_vector (mode, type)) ++ return false; ++ + if (size > 8) + return true; + + if (type) + { + if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0) +- return 1; ++ return true; + + if (TREE_CODE (type) == COMPLEX_TYPE + || TREE_CODE (type) == VECTOR_TYPE) +- return 1; ++ return true; + } + +- return 0; ++ return false; + } + + /* Update the data in CUM to advance over an argument of mode MODE and +@@ -9244,11 +11059,21 @@ s390_pass_by_reference (cumulative_args_ + + static void + s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, +- const_tree type, bool named ATTRIBUTE_UNUSED) ++ const_tree type, bool named) + { + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + +- if (s390_function_arg_float (mode, type)) ++ if (s390_function_arg_vector (mode, type)) ++ { ++ /* We are called for unnamed vector stdarg arguments which are ++ passed on the stack. In this case this hook does not have to ++ do anything since stack arguments are tracked by common ++ code. */ ++ if (!named) ++ return; ++ cum->vrs += 1; ++ } ++ else if (s390_function_arg_float (mode, type)) + { + cum->fprs += 1; + } +@@ -9282,14 +11107,26 @@ s390_function_arg_advance (cumulative_ar + + static rtx + s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode, +- const_tree type, bool named ATTRIBUTE_UNUSED) ++ const_tree type, bool named) + { + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + +- if (s390_function_arg_float (mode, type)) ++ if (!named) ++ s390_check_type_for_vector_abi (type, true, false); ++ ++ if (s390_function_arg_vector (mode, type)) ++ { ++ /* Vector arguments being part of the ellipsis are passed on the ++ stack. */ ++ if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG)) ++ return NULL_RTX; ++ ++ return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO); ++ } ++ else if (s390_function_arg_float (mode, type)) + { + if (cum->fprs + 1 > FP_ARG_NUM_REG) +- return 0; ++ return NULL_RTX; + else + return gen_rtx_REG (mode, cum->fprs + 16); + } +@@ -9299,7 +11136,7 @@ s390_function_arg (cumulative_args_t cum + int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG; + + if (cum->gprs + n_gprs > GP_ARG_NUM_REG) +- return 0; ++ return NULL_RTX; + else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG) + return gen_rtx_REG (mode, cum->gprs + 2); + else if (n_gprs == 2) +@@ -9342,6 +11179,12 @@ s390_return_in_memory (const_tree type, + || TREE_CODE (type) == REAL_TYPE) + return int_size_in_bytes (type) > 8; + ++ /* vector types which fit into a VR. */ ++ if (TARGET_VX_ABI ++ && TREE_CODE (type) == VECTOR_TYPE ++ && int_size_in_bytes (type) <= 16) ++ return false; ++ + /* Aggregates and similar constructs are always returned + in memory. */ + if (AGGREGATE_TYPE_P (type) +@@ -9384,6 +11227,12 @@ s390_function_and_libcall_value (enum ma + const_tree fntype_or_decl, + bool outgoing ATTRIBUTE_UNUSED) + { ++ /* For vector return types it is important to use the RET_TYPE ++ argument whenever available since the middle-end might have ++ changed the mode to a scalar mode. */ ++ bool vector_ret_type_p = ((ret_type && TREE_CODE (ret_type) == VECTOR_TYPE) ++ || (!ret_type && VECTOR_MODE_P (mode))); ++ + /* For normal functions perform the promotion as + promote_function_mode would do. */ + if (ret_type) +@@ -9393,10 +11242,14 @@ s390_function_and_libcall_value (enum ma + fntype_or_decl, 1); + } + +- gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode)); +- gcc_assert (GET_MODE_SIZE (mode) <= 8); +- +- if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode)) ++ gcc_assert (GET_MODE_CLASS (mode) == MODE_INT ++ || SCALAR_FLOAT_MODE_P (mode) ++ || (TARGET_VX_ABI && vector_ret_type_p)); ++ gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8)); ++ ++ if (TARGET_VX_ABI && vector_ret_type_p) ++ return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO); ++ else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode)) + return gen_rtx_REG (mode, 16); + else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG + || UNITS_PER_LONG == UNITS_PER_WORD) +@@ -9560,9 +11413,13 @@ s390_va_start (tree valist, rtx nextarg + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + +- /* Find the overflow area. */ ++ /* Find the overflow area. ++ FIXME: This currently is too pessimistic when the vector ABI is ++ enabled. In that case we *always* set up the overflow area ++ pointer. */ + if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG +- || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG) ++ || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG ++ || TARGET_VX_ABI) + { + t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); + +@@ -9604,6 +11461,9 @@ s390_va_start (tree valist, rtx nextarg + ret = args.reg_save_area[args.gpr+8] + else + ret = *args.overflow_arg_area++; ++ } else if (vector value) { ++ ret = *args.overflow_arg_area; ++ args.overflow_arg_area += size / 8; + } else if (float value) { + if (args.fgpr < 2) + ret = args.reg_save_area[args.fpr+64] +@@ -9623,7 +11483,10 @@ s390_gimplify_va_arg (tree valist, tree + tree f_gpr, f_fpr, f_ovf, f_sav; + tree gpr, fpr, ovf, sav, reg, t, u; + int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg; +- tree lab_false, lab_over, addr; ++ tree lab_false, lab_over; ++ tree addr = create_tmp_var (ptr_type_node, "addr"); ++ bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within ++ a stack slot. */ + + f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); + f_fpr = DECL_CHAIN (f_gpr); +@@ -9642,6 +11505,8 @@ s390_gimplify_va_arg (tree valist, tree + + size = int_size_in_bytes (type); + ++ s390_check_type_for_vector_abi (type, true, false); ++ + if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) + { + if (TARGET_DEBUG_ARG) +@@ -9662,6 +11527,23 @@ s390_gimplify_va_arg (tree valist, tree + sav_scale = UNITS_PER_LONG; + size = UNITS_PER_LONG; + max_reg = GP_ARG_NUM_REG - n_reg; ++ left_align_p = false; ++ } ++ else if (s390_function_arg_vector (TYPE_MODE (type), type)) ++ { ++ if (TARGET_DEBUG_ARG) ++ { ++ fprintf (stderr, "va_arg: vector type"); ++ debug_tree (type); ++ } ++ ++ indirect_p = 0; ++ reg = NULL_TREE; ++ n_reg = 0; ++ sav_ofs = 0; ++ sav_scale = 8; ++ max_reg = 0; ++ left_align_p = true; + } + else if (s390_function_arg_float (TYPE_MODE (type), type)) + { +@@ -9678,6 +11560,7 @@ s390_gimplify_va_arg (tree valist, tree + sav_ofs = 16 * UNITS_PER_LONG; + sav_scale = 8; + max_reg = FP_ARG_NUM_REG - n_reg; ++ left_align_p = false; + } + else + { +@@ -9702,53 +11585,74 @@ s390_gimplify_va_arg (tree valist, tree + + sav_scale = UNITS_PER_LONG; + max_reg = GP_ARG_NUM_REG - n_reg; ++ left_align_p = false; + } + + /* Pull the value out of the saved registers ... */ + +- lab_false = create_artificial_label (UNKNOWN_LOCATION); +- lab_over = create_artificial_label (UNKNOWN_LOCATION); +- addr = create_tmp_var (ptr_type_node, "addr"); +- +- t = fold_convert (TREE_TYPE (reg), size_int (max_reg)); +- t = build2 (GT_EXPR, boolean_type_node, reg, t); +- u = build1 (GOTO_EXPR, void_type_node, lab_false); +- t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE); +- gimplify_and_add (t, pre_p); +- +- t = fold_build_pointer_plus_hwi (sav, sav_ofs); +- u = build2 (MULT_EXPR, TREE_TYPE (reg), reg, +- fold_convert (TREE_TYPE (reg), size_int (sav_scale))); +- t = fold_build_pointer_plus (t, u); ++ if (reg != NULL_TREE) ++ { ++ /* ++ if (reg > ((typeof (reg))max_reg)) ++ goto lab_false; + +- gimplify_assign (addr, t, pre_p); ++ addr = sav + sav_ofs + reg * save_scale; ++ ++ goto lab_over; ++ ++ lab_false: ++ */ + +- gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); ++ lab_false = create_artificial_label (UNKNOWN_LOCATION); ++ lab_over = create_artificial_label (UNKNOWN_LOCATION); + +- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); ++ t = fold_convert (TREE_TYPE (reg), size_int (max_reg)); ++ t = build2 (GT_EXPR, boolean_type_node, reg, t); ++ u = build1 (GOTO_EXPR, void_type_node, lab_false); ++ t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE); ++ gimplify_and_add (t, pre_p); + ++ t = fold_build_pointer_plus_hwi (sav, sav_ofs); ++ u = build2 (MULT_EXPR, TREE_TYPE (reg), reg, ++ fold_convert (TREE_TYPE (reg), size_int (sav_scale))); ++ t = fold_build_pointer_plus (t, u); ++ ++ gimplify_assign (addr, t, pre_p); ++ ++ gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); ++ ++ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); ++ } + + /* ... Otherwise out of the overflow area. */ + + t = ovf; +- if (size < UNITS_PER_LONG) ++ if (size < UNITS_PER_LONG && !left_align_p) + t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size); + + gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); + + gimplify_assign (addr, t, pre_p); + +- t = fold_build_pointer_plus_hwi (t, size); ++ if (size < UNITS_PER_LONG && left_align_p) ++ t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG); ++ else ++ t = fold_build_pointer_plus_hwi (t, size); ++ + gimplify_assign (ovf, t, pre_p); + +- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); ++ if (reg != NULL_TREE) ++ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); + + + /* Increment register save count. */ + +- u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg, +- fold_convert (TREE_TYPE (reg), size_int (n_reg))); +- gimplify_and_add (u, pre_p); ++ if (n_reg > 0) ++ { ++ u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg, ++ fold_convert (TREE_TYPE (reg), size_int (n_reg))); ++ gimplify_and_add (u, pre_p); ++ } + + if (indirect_p) + { +@@ -9793,7 +11697,14 @@ s390_expand_tbegin (rtx dest, rtx tdb, r + } + + if (clobber_fprs_p) +- emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb)); ++ { ++ if (TARGET_VX) ++ emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), ++ tdb)); ++ else ++ emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), ++ tdb)); ++ } + else + emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), + tdb)); +@@ -9831,210 +11742,6 @@ s390_expand_tbegin (rtx dest, rtx tdb, r + } + } + +-/* Builtins. */ +- +-enum s390_builtin +-{ +- S390_BUILTIN_TBEGIN, +- S390_BUILTIN_TBEGIN_NOFLOAT, +- S390_BUILTIN_TBEGIN_RETRY, +- S390_BUILTIN_TBEGIN_RETRY_NOFLOAT, +- S390_BUILTIN_TBEGINC, +- S390_BUILTIN_TEND, +- S390_BUILTIN_TABORT, +- S390_BUILTIN_NON_TX_STORE, +- S390_BUILTIN_TX_NESTING_DEPTH, +- S390_BUILTIN_TX_ASSIST, +- +- S390_BUILTIN_max +-}; +- +-static enum insn_code const code_for_builtin[S390_BUILTIN_max] = { +- CODE_FOR_tbegin, +- CODE_FOR_tbegin_nofloat, +- CODE_FOR_tbegin_retry, +- CODE_FOR_tbegin_retry_nofloat, +- CODE_FOR_tbeginc, +- CODE_FOR_tend, +- CODE_FOR_tabort, +- CODE_FOR_ntstg, +- CODE_FOR_etnd, +- CODE_FOR_tx_assist +-}; +- +-static void +-s390_init_builtins (void) +-{ +- tree ftype, uint64_type; +- tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"), +- NULL, NULL); +- tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL); +- +- /* void foo (void) */ +- ftype = build_function_type_list (void_type_node, NULL_TREE); +- add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC, +- BUILT_IN_MD, NULL, NULL_TREE); +- +- /* void foo (int) */ +- ftype = build_function_type_list (void_type_node, integer_type_node, +- NULL_TREE); +- add_builtin_function ("__builtin_tabort", ftype, +- S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, noreturn_attr); +- add_builtin_function ("__builtin_tx_assist", ftype, +- S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE); +- +- /* int foo (void *) */ +- ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE); +- add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN, +- BUILT_IN_MD, NULL, returns_twice_attr); +- add_builtin_function ("__builtin_tbegin_nofloat", ftype, +- S390_BUILTIN_TBEGIN_NOFLOAT, +- BUILT_IN_MD, NULL, returns_twice_attr); +- +- /* int foo (void *, int) */ +- ftype = build_function_type_list (integer_type_node, ptr_type_node, +- integer_type_node, NULL_TREE); +- add_builtin_function ("__builtin_tbegin_retry", ftype, +- S390_BUILTIN_TBEGIN_RETRY, +- BUILT_IN_MD, +- NULL, returns_twice_attr); +- add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype, +- S390_BUILTIN_TBEGIN_RETRY_NOFLOAT, +- BUILT_IN_MD, +- NULL, returns_twice_attr); +- +- /* int foo (void) */ +- ftype = build_function_type_list (integer_type_node, NULL_TREE); +- add_builtin_function ("__builtin_tx_nesting_depth", ftype, +- S390_BUILTIN_TX_NESTING_DEPTH, +- BUILT_IN_MD, NULL, NULL_TREE); +- add_builtin_function ("__builtin_tend", ftype, +- S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE); +- +- /* void foo (uint64_t *, uint64_t) */ +- if (TARGET_64BIT) +- uint64_type = long_unsigned_type_node; +- else +- uint64_type = long_long_unsigned_type_node; +- +- ftype = build_function_type_list (void_type_node, +- build_pointer_type (uint64_type), +- uint64_type, NULL_TREE); +- add_builtin_function ("__builtin_non_tx_store", ftype, +- S390_BUILTIN_NON_TX_STORE, +- BUILT_IN_MD, NULL, NULL_TREE); +-} +- +-/* Expand an expression EXP that calls a built-in function, +- with result going to TARGET if that's convenient +- (and in mode MODE if that's convenient). +- SUBTARGET may be used as the target for computing one of EXP's operands. +- IGNORE is nonzero if the value is to be ignored. */ +- +-static rtx +-s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, +- enum machine_mode mode ATTRIBUTE_UNUSED, +- int ignore ATTRIBUTE_UNUSED) +-{ +-#define MAX_ARGS 2 +- +- tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); +- unsigned int fcode = DECL_FUNCTION_CODE (fndecl); +- enum insn_code icode; +- rtx op[MAX_ARGS], pat; +- int arity; +- bool nonvoid; +- tree arg; +- call_expr_arg_iterator iter; +- +- if (fcode >= S390_BUILTIN_max) +- internal_error ("bad builtin fcode"); +- icode = code_for_builtin[fcode]; +- if (icode == 0) +- internal_error ("bad builtin fcode"); +- +- if (!TARGET_HTM) +- error ("Transactional execution builtins not enabled (-mhtm)\n"); +- +- /* Set a flag in the machine specific cfun part in order to support +- saving/restoring of FPRs. */ +- if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY) +- cfun->machine->tbegin_p = true; +- +- nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; +- +- arity = 0; +- FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) +- { +- const struct insn_operand_data *insn_op; +- +- if (arg == error_mark_node) +- return NULL_RTX; +- if (arity >= MAX_ARGS) +- return NULL_RTX; +- +- insn_op = &insn_data[icode].operand[arity + nonvoid]; +- +- op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); +- +- if (!(*insn_op->predicate) (op[arity], insn_op->mode)) +- { +- if (insn_op->predicate == memory_operand) +- { +- /* Don't move a NULL pointer into a register. Otherwise +- we have to rely on combine being able to move it back +- in order to get an immediate 0 in the instruction. */ +- if (op[arity] != const0_rtx) +- op[arity] = copy_to_mode_reg (Pmode, op[arity]); +- op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]); +- } +- else +- op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); +- } +- +- arity++; +- } +- +- if (nonvoid) +- { +- enum machine_mode tmode = insn_data[icode].operand[0].mode; +- if (!target +- || GET_MODE (target) != tmode +- || !(*insn_data[icode].operand[0].predicate) (target, tmode)) +- target = gen_reg_rtx (tmode); +- } +- +- switch (arity) +- { +- case 0: +- pat = GEN_FCN (icode) (target); +- break; +- case 1: +- if (nonvoid) +- pat = GEN_FCN (icode) (target, op[0]); +- else +- pat = GEN_FCN (icode) (op[0]); +- break; +- case 2: +- if (nonvoid) +- pat = GEN_FCN (icode) (target, op[0], op[1]); +- else +- pat = GEN_FCN (icode) (op[0], op[1]); +- break; +- default: +- gcc_unreachable (); +- } +- if (!pat) +- return NULL_RTX; +- emit_insn (pat); +- +- if (nonvoid) +- return target; +- else +- return const0_rtx; +-} +- +- + /* Output assembly code for the trampoline template to + stdio stream FILE. + +@@ -10496,15 +12203,18 @@ s390_call_saved_register_used (tree call + mode = TYPE_MODE (type); + gcc_assert (mode); + ++ /* We assume that in the target function all parameters are ++ named. This only has an impact on vector argument register ++ usage none of which is call-saved. */ + if (pass_by_reference (&cum_v, mode, type, true)) + { + mode = Pmode; + type = build_pointer_type (type); + } + +- parm_rtx = s390_function_arg (cum, mode, type, 0); ++ parm_rtx = s390_function_arg (cum, mode, type, true); + +- s390_function_arg_advance (cum, mode, type, 0); ++ s390_function_arg_advance (cum, mode, type, true); + + if (!parm_rtx) + continue; +@@ -10711,6 +12421,13 @@ s390_conditional_register_usage (void) + for (i = 16; i < 32; i++) + call_used_regs[i] = fixed_regs[i] = 1; + } ++ ++ /* Disable v16 - v31 for non-vector target. */ ++ if (!TARGET_VX) ++ { ++ for (i = VR16_REGNUM; i <= VR31_REGNUM; i++) ++ fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; ++ } + } + + /* Corresponding function to eh_return expander. */ +@@ -11232,7 +12949,8 @@ s390_reorg (void) + /* Walk over the insns and do some >=z10 specific changes. */ + if (s390_tune == PROCESSOR_2097_Z10 + || s390_tune == PROCESSOR_2817_Z196 +- || s390_tune == PROCESSOR_2827_ZEC12) ++ || s390_tune == PROCESSOR_2827_ZEC12 ++ || s390_tune == PROCESSOR_2964_Z13) + { + rtx insn; + bool insn_added_p = false; +@@ -11392,27 +13110,66 @@ s390_z10_prevent_earlyload_conflicts (rt + + static int s390_sched_state; + +-#define S390_OOO_SCHED_STATE_NORMAL 3 +-#define S390_OOO_SCHED_STATE_CRACKED 4 ++#define S390_SCHED_STATE_NORMAL 3 ++#define S390_SCHED_STATE_CRACKED 4 + +-#define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1 +-#define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2 +-#define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4 +-#define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8 ++#define S390_SCHED_ATTR_MASK_CRACKED 0x1 ++#define S390_SCHED_ATTR_MASK_EXPANDED 0x2 ++#define S390_SCHED_ATTR_MASK_ENDGROUP 0x4 ++#define S390_SCHED_ATTR_MASK_GROUPALONE 0x8 + + static unsigned int + s390_get_sched_attrmask (rtx insn) + { + unsigned int mask = 0; + +- if (get_attr_ooo_cracked (insn)) +- mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED; +- if (get_attr_ooo_expanded (insn)) +- mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED; +- if (get_attr_ooo_endgroup (insn)) +- mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP; +- if (get_attr_ooo_groupalone (insn)) +- mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE; ++ switch (s390_tune) ++ { ++ case PROCESSOR_2827_ZEC12: ++ if (get_attr_zEC12_cracked (insn)) ++ mask |= S390_SCHED_ATTR_MASK_CRACKED; ++ if (get_attr_zEC12_expanded (insn)) ++ mask |= S390_SCHED_ATTR_MASK_EXPANDED; ++ if (get_attr_zEC12_endgroup (insn)) ++ mask |= S390_SCHED_ATTR_MASK_ENDGROUP; ++ if (get_attr_zEC12_groupalone (insn)) ++ mask |= S390_SCHED_ATTR_MASK_GROUPALONE; ++ break; ++ case PROCESSOR_2964_Z13: ++ if (get_attr_z13_cracked (insn)) ++ mask |= S390_SCHED_ATTR_MASK_CRACKED; ++ if (get_attr_z13_expanded (insn)) ++ mask |= S390_SCHED_ATTR_MASK_EXPANDED; ++ if (get_attr_z13_endgroup (insn)) ++ mask |= S390_SCHED_ATTR_MASK_ENDGROUP; ++ if (get_attr_z13_groupalone (insn)) ++ mask |= S390_SCHED_ATTR_MASK_GROUPALONE; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ return mask; ++} ++ ++static unsigned int ++s390_get_unit_mask (rtx insn, int *units) ++{ ++ unsigned int mask = 0; ++ ++ switch (s390_tune) ++ { ++ case PROCESSOR_2964_Z13: ++ *units = 3; ++ if (get_attr_z13_unit_lsu (insn)) ++ mask |= 1 << 0; ++ if (get_attr_z13_unit_fxu (insn)) ++ mask |= 1 << 1; ++ if (get_attr_z13_unit_vfu (insn)) ++ mask |= 1 << 2; ++ break; ++ default: ++ gcc_unreachable (); ++ } + return mask; + } + +@@ -11430,48 +13187,66 @@ s390_sched_score (rtx insn) + case 0: + /* Try to put insns into the first slot which would otherwise + break a group. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0 +- || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0) ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 ++ || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) + score += 5; +- if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0) ++ if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) + score += 10; + case 1: + /* Prefer not cracked insns while trying to put together a + group. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0 +- && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0 +- && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0) ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 ++ && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0 ++ && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0) + score += 10; +- if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0) ++ if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0) + score += 5; + break; + case 2: + /* Prefer not cracked insns while trying to put together a + group. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0 +- && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0 +- && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0) ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 ++ && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0 ++ && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0) + score += 10; + /* Prefer endgroup insns in the last slot. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0) ++ if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0) + score += 10; + break; +- case S390_OOO_SCHED_STATE_NORMAL: ++ case S390_SCHED_STATE_NORMAL: + /* Prefer not cracked insns if the last was not cracked. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0 +- && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0) ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 ++ && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0) + score += 5; +- if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0) ++ if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) + score += 10; + break; +- case S390_OOO_SCHED_STATE_CRACKED: ++ case S390_SCHED_STATE_CRACKED: + /* Try to keep cracked insns together to prevent them from + interrupting groups. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0 +- || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0) ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 ++ || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) + score += 5; + break; + } ++ ++ if (s390_tune == PROCESSOR_2964_Z13) ++ { ++ int units, i; ++ unsigned unit_mask, m = 1; ++ ++ unit_mask = s390_get_unit_mask (insn, &units); ++ gcc_assert (units <= MAX_SCHED_UNITS); ++ ++ /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long ++ ago the last insn of this unit type got scheduled. This is ++ supposed to help providing a proper instruction mix to the ++ CPU. */ ++ for (i = 0; i < units; i++, m <<= 1) ++ if (m & unit_mask) ++ score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE / ++ MAX_SCHED_MIX_DISTANCE); ++ } + return score; + } + +@@ -11487,7 +13262,8 @@ s390_sched_reorder (FILE *file, int verb + if (reload_completed && *nreadyp > 1) + s390_z10_prevent_earlyload_conflicts (ready, nreadyp); + +- if (s390_tune == PROCESSOR_2827_ZEC12 ++ if ((s390_tune == PROCESSOR_2827_ZEC12 ++ || s390_tune == PROCESSOR_2964_Z13) + && reload_completed + && *nreadyp > 1) + { +@@ -11526,12 +13302,12 @@ s390_sched_reorder (FILE *file, int verb + + if (verbose > 5) + fprintf (file, +- "move insn %d to the top of list\n", ++ ";;\t\tBACKEND: move insn %d to the top of list\n", + INSN_UID (ready[last_index])); + } + else if (verbose > 5) + fprintf (file, +- "best insn %d already on top\n", ++ ";;\t\tBACKEND: best insn %d already on top\n", + INSN_UID (ready[last_index])); + } + +@@ -11542,16 +13318,35 @@ s390_sched_reorder (FILE *file, int verb + + for (i = last_index; i >= 0; i--) + { +- if (recog_memoized (ready[i]) < 0) ++ unsigned int sched_mask; ++ rtx insn = ready[i]; ++ ++ if (recog_memoized (insn) < 0) + continue; +- fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]), +- s390_sched_score (ready[i])); +-#define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR); +- PRINT_OOO_ATTR (ooo_cracked); +- PRINT_OOO_ATTR (ooo_expanded); +- PRINT_OOO_ATTR (ooo_endgroup); +- PRINT_OOO_ATTR (ooo_groupalone); +-#undef PRINT_OOO_ATTR ++ ++ sched_mask = s390_get_sched_attrmask (insn); ++ fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ", ++ INSN_UID (insn), ++ s390_sched_score (insn)); ++#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\ ++ ((M) & sched_mask) ? #ATTR : ""); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone); ++#undef PRINT_SCHED_ATTR ++ if (s390_tune == PROCESSOR_2964_Z13) ++ { ++ unsigned int unit_mask, m = 1; ++ int units, j; ++ ++ unit_mask = s390_get_unit_mask (insn, &units); ++ fprintf (file, "(units:"); ++ for (j = 0; j < units; j++, m <<= 1) ++ if (m & unit_mask) ++ fprintf (file, " u%d", j); ++ fprintf (file, ")"); ++ } + fprintf (file, "\n"); + } + } +@@ -11570,18 +13365,19 @@ s390_sched_variable_issue (FILE *file, i + { + last_scheduled_insn = insn; + +- if (s390_tune == PROCESSOR_2827_ZEC12 ++ if ((s390_tune == PROCESSOR_2827_ZEC12 ++ || s390_tune == PROCESSOR_2964_Z13) + && reload_completed + && recog_memoized (insn) >= 0) + { + unsigned int mask = s390_get_sched_attrmask (insn); + +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0 +- || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0) +- s390_sched_state = S390_OOO_SCHED_STATE_CRACKED; +- else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0 +- || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0) +- s390_sched_state = S390_OOO_SCHED_STATE_NORMAL; ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 ++ || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) ++ s390_sched_state = S390_SCHED_STATE_CRACKED; ++ else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0 ++ || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) ++ s390_sched_state = S390_SCHED_STATE_NORMAL; + else + { + /* Only normal insns are left (mask == 0). */ +@@ -11590,30 +13386,73 @@ s390_sched_variable_issue (FILE *file, i + case 0: + case 1: + case 2: +- case S390_OOO_SCHED_STATE_NORMAL: +- if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL) ++ case S390_SCHED_STATE_NORMAL: ++ if (s390_sched_state == S390_SCHED_STATE_NORMAL) + s390_sched_state = 1; + else + s390_sched_state++; + + break; +- case S390_OOO_SCHED_STATE_CRACKED: +- s390_sched_state = S390_OOO_SCHED_STATE_NORMAL; ++ case S390_SCHED_STATE_CRACKED: ++ s390_sched_state = S390_SCHED_STATE_NORMAL; + break; + } + } ++ ++ if (s390_tune == PROCESSOR_2964_Z13) ++ { ++ int units, i; ++ unsigned unit_mask, m = 1; ++ ++ unit_mask = s390_get_unit_mask (insn, &units); ++ gcc_assert (units <= MAX_SCHED_UNITS); ++ ++ for (i = 0; i < units; i++, m <<= 1) ++ if (m & unit_mask) ++ last_scheduled_unit_distance[i] = 0; ++ else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE) ++ last_scheduled_unit_distance[i]++; ++ } ++ + if (verbose > 5) + { +- fprintf (file, "insn %d: ", INSN_UID (insn)); +-#define PRINT_OOO_ATTR(ATTR) \ +- fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : ""); +- PRINT_OOO_ATTR (ooo_cracked); +- PRINT_OOO_ATTR (ooo_expanded); +- PRINT_OOO_ATTR (ooo_endgroup); +- PRINT_OOO_ATTR (ooo_groupalone); +-#undef PRINT_OOO_ATTR +- fprintf (file, "\n"); +- fprintf (file, "sched state: %d\n", s390_sched_state); ++ unsigned int sched_mask; ++ ++ sched_mask = s390_get_sched_attrmask (insn); ++ ++ fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn)); ++#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : ""); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone); ++#undef PRINT_SCHED_ATTR ++ ++ if (s390_tune == PROCESSOR_2964_Z13) ++ { ++ unsigned int unit_mask, m = 1; ++ int units, j; ++ ++ unit_mask = s390_get_unit_mask (insn, &units); ++ fprintf (file, "(units:"); ++ for (j = 0; j < units; j++, m <<= 1) ++ if (m & unit_mask) ++ fprintf (file, " %d", j); ++ fprintf (file, ")"); ++ } ++ fprintf (file, " sched state: %d\n", s390_sched_state); ++ ++ if (s390_tune == PROCESSOR_2964_Z13) ++ { ++ int units, j; ++ ++ s390_get_unit_mask (insn, &units); ++ ++ fprintf (file, ";;\t\tBACKEND: units unused for: "); ++ for (j = 0; j < units; j++) ++ fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]); ++ fprintf (file, "\n"); ++ } + } + } + +@@ -11630,6 +13469,7 @@ s390_sched_init (FILE *file ATTRIBUTE_UN + int max_ready ATTRIBUTE_UNUSED) + { + last_scheduled_insn = NULL_RTX; ++ memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int)); + s390_sched_state = 0; + } + +@@ -11663,7 +13503,8 @@ s390_loop_unroll_adjust (unsigned nunrol + + if (s390_tune != PROCESSOR_2097_Z10 + && s390_tune != PROCESSOR_2817_Z196 +- && s390_tune != PROCESSOR_2827_ZEC12) ++ && s390_tune != PROCESSOR_2827_ZEC12 ++ && s390_tune != PROCESSOR_2964_Z13) + return nunroll; + + /* Count the number of memory references within the loop body. */ +@@ -11691,6 +13532,84 @@ s390_loop_unroll_adjust (unsigned nunrol + } + } + ++/* Return the vector mode to be used for inner mode MODE when doing ++ vectorization. */ ++static enum machine_mode ++s390_preferred_simd_mode (enum machine_mode mode) ++{ ++ if (TARGET_VX) ++ switch (mode) ++ { ++ case DFmode: ++ return V2DFmode; ++ case DImode: ++ return V2DImode; ++ case SImode: ++ return V4SImode; ++ case HImode: ++ return V8HImode; ++ case QImode: ++ return V16QImode; ++ default:; ++ } ++ return word_mode; ++} ++ ++/* Our hardware does not require vectors to be strictly aligned. */ ++static bool ++s390_support_vector_misalignment (enum machine_mode mode ATTRIBUTE_UNUSED, ++ const_tree type ATTRIBUTE_UNUSED, ++ int misalignment ATTRIBUTE_UNUSED, ++ bool is_packed ATTRIBUTE_UNUSED) ++{ ++ if (TARGET_VX) ++ return true; ++ ++ return default_builtin_support_vector_misalignment (mode, type, misalignment, ++ is_packed); ++} ++ ++/* The vector ABI requires vector types to be aligned on an 8 byte ++ boundary (our stack alignment). However, we allow this to be ++ overriden by the user, while this definitely breaks the ABI. */ ++static HOST_WIDE_INT ++s390_vector_alignment (const_tree type) ++{ ++ if (!TARGET_VX_ABI) ++ return default_vector_alignment (type); ++ ++ if (TYPE_USER_ALIGN (type)) ++ return TYPE_ALIGN (type); ++ ++ return MIN (64, tree_low_cst (TYPE_SIZE (type), 0)); ++} ++ ++/* Implement TARGET_ASM_FILE_END. */ ++static void ++s390_asm_file_end (void) ++{ ++#ifdef HAVE_AS_GNU_ATTRIBUTE ++ varpool_node *vnode; ++ cgraph_node *cnode; ++ ++ FOR_EACH_VARIABLE (vnode) ++ if (TREE_PUBLIC (vnode->symbol.decl)) ++ s390_check_type_for_vector_abi (TREE_TYPE (vnode->symbol.decl), ++ false, false); ++ ++ FOR_EACH_FUNCTION (cnode) ++ if (TREE_PUBLIC (cnode->symbol.decl)) ++ s390_check_type_for_vector_abi (TREE_TYPE (cnode->symbol.decl), ++ false, false); ++ ++ ++ if (s390_vector_abi != 0) ++ fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n", ++ s390_vector_abi); ++#endif ++ file_end_indicate_exec_stack (); ++} ++ + /* Initialize GCC target structure. */ + + #undef TARGET_ASM_ALIGNED_HI_OP +@@ -11797,6 +13716,8 @@ s390_loop_unroll_adjust (unsigned nunrol + #define TARGET_FUNCTION_VALUE s390_function_value + #undef TARGET_LIBCALL_VALUE + #define TARGET_LIBCALL_VALUE s390_libcall_value ++#undef TARGET_STRICT_ARGUMENT_NAMING ++#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true + + #undef TARGET_FIXED_CONDITION_CODE_REGS + #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs +@@ -11812,6 +13733,9 @@ s390_loop_unroll_adjust (unsigned nunrol + #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel + #endif + ++#undef TARGET_DWARF_FRAME_REG_MODE ++#define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode ++ + #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING + #undef TARGET_MANGLE_TYPE + #define TARGET_MANGLE_TYPE s390_mangle_type +@@ -11820,6 +13744,9 @@ s390_loop_unroll_adjust (unsigned nunrol + #undef TARGET_SCALAR_MODE_SUPPORTED_P + #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p + ++#undef TARGET_VECTOR_MODE_SUPPORTED_P ++#define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p ++ + #undef TARGET_PREFERRED_RELOAD_CLASS + #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class + +@@ -11864,6 +13791,21 @@ s390_loop_unroll_adjust (unsigned nunrol + #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P + #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true + ++#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN ++#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn ++ ++#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE ++#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode ++ ++#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT ++#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment ++ ++#undef TARGET_VECTOR_ALIGNMENT ++#define TARGET_VECTOR_ALIGNMENT s390_vector_alignment ++ ++#undef TARGET_ASM_FILE_END ++#define TARGET_ASM_FILE_END s390_asm_file_end ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + #include "gt-s390.h" +--- gcc/config/s390/s390-c.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/s390-c.c 2016-05-11 19:20:42.792826040 +0200 +@@ -0,0 +1,903 @@ ++/* Language specific subroutines used for code generation on IBM S/390 ++ and zSeries ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ ++ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com). ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . ++ ++ Based on gcc/config/rs6000/rs6000-c.c. ++ ++ In GCC terms this file belongs to the frontend. It will be ++ compiled with -DIN_GCC_FRONTEND. With that rtl.h cannot be ++ included anymore - a mechanism supposed to avoid adding frontend - ++ backend dependencies. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "cpplib.h" ++#include "machmode.h" ++#include "vec.h" ++#include "double-int.h" ++#include "input.h" ++#include "alias.h" ++#include "symtab.h" ++#include "tree.h" ++#include "c-family/c-common.h" ++#include "c-family/c-pragma.h" ++#include "diagnostic-core.h" ++#include "tm_p.h" ++#include "target.h" ++#include "langhooks.h" ++#include "tree-pretty-print.h" ++#include "c/c-tree.h" ++ ++#include "s390-builtins.h" ++ ++static GTY(()) tree __vector_keyword; ++static GTY(()) tree vector_keyword; ++static GTY(()) tree __bool_keyword; ++static GTY(()) tree bool_keyword; ++static GTY(()) tree _Bool_keyword; ++ ++ ++/* Generate an array holding all the descriptions of variants of ++ overloaded builtins defined with OB_DEF_VAR in ++ s390-builtins.def. */ ++static enum s390_builtin_ov_type_index ++type_for_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(...) ++#define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FNTYPE, ++#include "s390-builtins.def" ++ BT_OV_MAX ++ }; ++ ++ ++/* Generate an array indexed by an overloaded builtin index returning ++ the first index in desc_for_overloaded_builtin_var where the ++ variants for the builtin can be found. */ ++static enum s390_overloaded_builtin_vars ++desc_start_for_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(NAME, FIRST_VAR_NAME,...) \ ++ S390_OVERLOADED_BUILTIN_VAR_##FIRST_VAR_NAME, ++#define OB_DEF_VAR(...) ++ #include "s390-builtins.def" ++ S390_OVERLOADED_BUILTIN_VAR_MAX ++ }; ++ ++/* Generate an array indexed by an overloaded builtin index returning ++ the last index in desc_for_overloaded_builtin_var where the ++ variants for the builtin can be found. */ ++static enum s390_overloaded_builtin_vars ++desc_end_for_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME,...) \ ++ S390_OVERLOADED_BUILTIN_VAR_##LAST_VAR_NAME, ++#define OB_DEF_VAR(...) ++ #include "s390-builtins.def" ++ S390_OVERLOADED_BUILTIN_VAR_MAX ++ }; ++ ++static enum s390_builtin_type_index ++s390_builtin_ov_types[BT_OV_MAX][MAX_OV_OPERANDS] = ++ { ++#undef DEF_TYPE ++#undef DEF_POINTER_TYPE ++#undef DEF_DISTINCT_TYPE ++#undef DEF_VECTOR_TYPE ++#undef DEF_OPAQUE_VECTOR_TYPE ++#undef DEF_FN_TYPE ++#undef DEF_OV_TYPE ++#define DEF_TYPE(...) ++#define DEF_POINTER_TYPE(...) ++#define DEF_DISTINCT_TYPE(...) ++#define DEF_VECTOR_TYPE(...) ++#define DEF_OPAQUE_VECTOR_TYPE(...) ++#define DEF_FN_TYPE(...) ++#define DEF_OV_TYPE(INDEX, args...) { args }, ++#include "s390-builtin-types.def" ++ }; ++ ++static const enum s390_builtins ++bt_for_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX] = { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(...) ++#define OB_DEF_VAR(NAME, BT, ...) S390_BUILTIN_##BT, ++ ++#include "s390-builtins.def" ++ }; ++ ++/* In addition to calling fold_convert for EXPR of type TYPE, also ++ call c_fully_fold to remove any C_MAYBE_CONST_EXPRs that could be ++ hiding there (PR47197). */ ++tree ++fully_fold_convert (tree type, tree expr) ++{ ++ tree result = fold_convert (type, expr); ++ bool maybe_const = true; ++ ++ if (!c_dialect_cxx ()) ++ result = c_fully_fold (result, false, &maybe_const); ++ ++ return result; ++} ++ ++/* Unify the different variants to the same nodes in order to keep the ++ code working with it simple. */ ++static cpp_hashnode * ++s390_categorize_keyword (const cpp_token *tok) ++{ ++ if (tok->type == CPP_NAME) ++ { ++ cpp_hashnode *ident = tok->val.node.node; ++ ++ if (ident == C_CPP_HASHNODE (vector_keyword)) ++ return C_CPP_HASHNODE (__vector_keyword); ++ ++ if (ident == C_CPP_HASHNODE (bool_keyword)) ++ return C_CPP_HASHNODE (__bool_keyword); ++ ++ if (ident == C_CPP_HASHNODE (_Bool_keyword)) ++ return C_CPP_HASHNODE (__bool_keyword); ++ return ident; ++ } ++ ++ return 0; ++} ++ ++ ++/* Called to decide whether a conditional macro should be expanded. ++ Since we have exactly one such macro (i.e, 'vector'), we do not ++ need to examine the 'tok' parameter. */ ++ ++static cpp_hashnode * ++s390_macro_to_expand (cpp_reader *pfile, const cpp_token *tok) ++{ ++ cpp_hashnode *expand_this = tok->val.node.node; ++ cpp_hashnode *ident; ++ static bool expand_bool_p = false; ++ int idx = 0; ++ enum rid rid_code; ++ ++ /* The vector keyword is only expanded if the machine actually ++ provides hardware support. */ ++ if (!TARGET_ZVECTOR) ++ return NULL; ++ ++ ident = s390_categorize_keyword (tok); ++ ++ /* Triggered when we picked a different variant in ++ s390_categorize_keyword. */ ++ if (ident != expand_this) ++ expand_this = NULL; ++ ++ /* The vector keyword has been found already and we remembered to ++ expand the next bool. */ ++ if (expand_bool_p && ident == C_CPP_HASHNODE (__bool_keyword)) ++ { ++ expand_bool_p = false; ++ return ident; ++ } ++ ++ if (ident != C_CPP_HASHNODE (__vector_keyword)) ++ return expand_this; ++ ++ do ++ tok = cpp_peek_token (pfile, idx++); ++ while (tok->type == CPP_PADDING); ++ ident = s390_categorize_keyword (tok); ++ ++ if (!ident) ++ return expand_this; ++ ++ /* vector bool - remember to expand the next bool. */ ++ if (ident == C_CPP_HASHNODE (__bool_keyword)) ++ { ++ expand_bool_p = true; ++ return C_CPP_HASHNODE (__vector_keyword); ++ } ++ ++ /* The boost libraries have code with Iterator::vector vector in it. ++ If we allow the normal handling, this module will be called ++ recursively, and the vector will be skipped.; */ ++ if (ident == C_CPP_HASHNODE (__vector_keyword)) ++ return expand_this; ++ ++ rid_code = (enum rid)(ident->rid_code); ++ ++ if (ident->type == NT_MACRO) ++ { ++ /* Now actually fetch the tokens we "peeked" before and do a ++ lookahead for the next. */ ++ do ++ (void) cpp_get_token (pfile); ++ while (--idx > 0); ++ do ++ tok = cpp_peek_token (pfile, idx++); ++ while (tok->type == CPP_PADDING); ++ ident = s390_categorize_keyword (tok); ++ ++ if (ident == C_CPP_HASHNODE (__bool_keyword)) ++ { ++ expand_bool_p = true; ++ return C_CPP_HASHNODE (__vector_keyword); ++ } ++ else if (ident) ++ rid_code = (enum rid)(ident->rid_code); ++ } ++ ++ /* vector keyword followed by type identifier: vector unsigned, ++ vector long, ... ++ Types consisting of more than one identifier are not supported by ++ zvector e.g. long long, long double, unsigned long int. */ ++ if (rid_code == RID_UNSIGNED || rid_code == RID_LONG ++ || rid_code == RID_SHORT || rid_code == RID_SIGNED ++ || rid_code == RID_INT || rid_code == RID_CHAR ++ || rid_code == RID_DOUBLE) ++ { ++ expand_this = C_CPP_HASHNODE (__vector_keyword); ++ /* If the next keyword is bool, it will need to be expanded as ++ well. */ ++ do ++ tok = cpp_peek_token (pfile, idx++); ++ while (tok->type == CPP_PADDING); ++ ident = s390_categorize_keyword (tok); ++ ++ /* __vector long __bool a; */ ++ if (ident == C_CPP_HASHNODE (__bool_keyword)) ++ expand_bool_p = true; ++ else ++ { ++ /* Triggered with: __vector long long __bool a; */ ++ do ++ tok = cpp_peek_token (pfile, idx++); ++ while (tok->type == CPP_PADDING); ++ ident = s390_categorize_keyword (tok); ++ ++ if (ident == C_CPP_HASHNODE (__bool_keyword)) ++ expand_bool_p = true; ++ } ++ } ++ ++ return expand_this; ++} ++ ++/* Define platform dependent macros. */ ++void ++s390_cpu_cpp_builtins (cpp_reader *pfile) ++{ ++ cpp_assert (pfile, "cpu=s390"); ++ cpp_assert (pfile, "machine=s390"); ++ cpp_define (pfile, "__s390__"); ++ if (TARGET_ZARCH) ++ cpp_define (pfile, "__zarch__"); ++ if (TARGET_64BIT) ++ cpp_define (pfile, "__s390x__"); ++ if (TARGET_LONG_DOUBLE_128) ++ cpp_define (pfile, "__LONG_DOUBLE_128__"); ++ if (TARGET_HTM) ++ cpp_define (pfile, "__HTM__"); ++ if (TARGET_ZVECTOR) ++ { ++ cpp_define (pfile, "__VEC__=10301"); ++ cpp_define (pfile, "__vector=__attribute__((vector_size(16)))"); ++ cpp_define (pfile, "__bool=__attribute__((s390_vector_bool)) unsigned"); ++ ++ if (!flag_iso) ++ { ++ cpp_define (pfile, "__VECTOR_KEYWORD_SUPPORTED__"); ++ cpp_define (pfile, "vector=vector"); ++ cpp_define (pfile, "bool=bool"); ++ ++ __vector_keyword = get_identifier ("__vector"); ++ C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL; ++ ++ vector_keyword = get_identifier ("vector"); ++ C_CPP_HASHNODE (vector_keyword)->flags |= NODE_CONDITIONAL; ++ ++ __bool_keyword = get_identifier ("__bool"); ++ C_CPP_HASHNODE (__bool_keyword)->flags |= NODE_CONDITIONAL; ++ ++ bool_keyword = get_identifier ("bool"); ++ C_CPP_HASHNODE (bool_keyword)->flags |= NODE_CONDITIONAL; ++ ++ _Bool_keyword = get_identifier ("_Bool"); ++ C_CPP_HASHNODE (_Bool_keyword)->flags |= NODE_CONDITIONAL; ++ ++ /* Enable context-sensitive macros. */ ++ cpp_get_callbacks (pfile)->macro_to_expand = s390_macro_to_expand; ++ } ++ } ++} ++ ++/* Expand builtins which can directly be mapped to tree expressions. ++ LOC - location information ++ FCODE - function code of the builtin ++ ARGLIST - value supposed to be passed as arguments ++ RETURN-TYPE - expected return type of the builtin */ ++static tree ++s390_expand_overloaded_builtin (location_t loc, ++ unsigned fcode, ++ vec *arglist, ++ tree return_type) ++{ ++ switch (fcode) ++ { ++ case S390_OVERLOADED_BUILTIN_s390_vec_step: ++ if (TREE_CODE (TREE_TYPE ((*arglist)[0])) != VECTOR_TYPE) ++ { ++ error_at (loc, "Builtin vec_step can only be used on vector types."); ++ return error_mark_node; ++ } ++ return build_int_cst (NULL_TREE, ++ TYPE_VECTOR_SUBPARTS (TREE_TYPE ((*arglist)[0]))); ++ case S390_OVERLOADED_BUILTIN_s390_vec_xld2: ++ case S390_OVERLOADED_BUILTIN_s390_vec_xlw4: ++ return build2 (MEM_REF, return_type, ++ fold_build_pointer_plus ((*arglist)[1], (*arglist)[0]), ++ build_int_cst (TREE_TYPE ((*arglist)[1]), 0)); ++ case S390_OVERLOADED_BUILTIN_s390_vec_xstd2: ++ case S390_OVERLOADED_BUILTIN_s390_vec_xstw4: ++ return build2 (MODIFY_EXPR, TREE_TYPE((*arglist)[0]), ++ build1 (INDIRECT_REF, TREE_TYPE((*arglist)[0]), ++ fold_build_pointer_plus ((*arglist)[2], (*arglist)[1])), ++ (*arglist)[0]); ++ case S390_OVERLOADED_BUILTIN_s390_vec_load_pair: ++ { ++ vec *v; ++ constructor_elt elt1 = { NULL_TREE , (*arglist)[0] }; ++ constructor_elt elt2 = { NULL_TREE , (*arglist)[1] }; ++ ++ vec_alloc (v, 2); ++ v->quick_push (elt1); ++ v->quick_push (elt2); ++ return build_constructor (return_type, v); ++ } ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* invert result */ ++#define __VSTRING_FLAG_IN 8 ++/* result type */ ++#define __VSTRING_FLAG_RT 4 ++/* zero search */ ++#define __VSTRING_FLAG_ZS 2 ++/* set condition code */ ++#define __VSTRING_FLAG_CS 1 ++ ++/* Return the flags value to be used for string low-level builtins ++ when expanded from overloaded builtin OB_FCODE. */ ++static unsigned int ++s390_get_vstring_flags (int ob_fcode) ++{ ++ unsigned int flags = 0; ++ ++ switch (ob_fcode) ++ { ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc: ++ flags |= __VSTRING_FLAG_IN; ++ break; ++ default: ++ break; ++ } ++ switch (ob_fcode) ++ { ++ ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc: ++ flags |= __VSTRING_FLAG_RT; ++ break; ++ default: ++ break; ++ } ++ switch (ob_fcode) ++ { ++ ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc: ++ flags |= __VSTRING_FLAG_ZS; ++ break; ++ default: ++ break; ++ } ++ switch (ob_fcode) ++ { ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc: ++ flags |= __VSTRING_FLAG_CS; ++ break; ++ default: ++ break; ++ } ++ return flags; ++} ++#undef __VSTRING_FLAG_IN ++#undef __VSTRING_FLAG_RT ++#undef __VSTRING_FLAG_ZS ++#undef __VSTRING_FLAG_CS ++ ++/* For several overloaded builtins the argument lists do not match ++ exactly the signature of a low-level builtin. This function ++ adjusts the argument list ARGLIST for the overloaded builtin ++ OB_FCODE to the signature of the low-level builtin given by ++ DECL. */ ++static void ++s390_adjust_builtin_arglist (unsigned int ob_fcode, tree decl, ++ vec **arglist) ++{ ++ tree arg_chain; ++ int src_arg_index, dest_arg_index; ++ vec *folded_args = NULL; ++ ++ /* We at most add one more operand to the list. */ ++ vec_alloc (folded_args, (*arglist)->allocated () + 1); ++ for (arg_chain = TYPE_ARG_TYPES (TREE_TYPE (decl)), ++ src_arg_index = 0, dest_arg_index = 0; ++ !VOID_TYPE_P (TREE_VALUE (arg_chain)); ++ arg_chain = TREE_CHAIN (arg_chain), dest_arg_index++) ++ { ++ bool arg_assigned_p = false; ++ switch (ob_fcode) ++ { ++ /* For all these the low level builtin needs an additional flags parameter. */ ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc: ++ if (dest_arg_index == 2) ++ { ++ folded_args->quick_push (build_int_cst (integer_type_node, ++ s390_get_vstring_flags (ob_fcode))); ++ arg_assigned_p = true; ++ } ++ break; ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc: ++ if (dest_arg_index == 3) ++ { ++ folded_args->quick_push (build_int_cst (integer_type_node, ++ s390_get_vstring_flags (ob_fcode))); ++ arg_assigned_p = true; ++ } ++ break; ++ case S390_OVERLOADED_BUILTIN_s390_vec_sel: ++ case S390_OVERLOADED_BUILTIN_s390_vec_insert: ++ case S390_OVERLOADED_BUILTIN_s390_vec_load_len: ++ /* Swap the first to arguments. It is better to do it here ++ instead of the header file to avoid operand checking ++ throwing error messages for a weird operand index. */ ++ if (dest_arg_index < 2) ++ { ++ folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain), ++ (**arglist)[1 - dest_arg_index])); ++ src_arg_index++; ++ arg_assigned_p = true; ++ } ++ break; ++ case S390_OVERLOADED_BUILTIN_s390_vec_store_len: ++ if (dest_arg_index == 1 || dest_arg_index == 2) ++ { ++ folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain), ++ (**arglist)[3 - dest_arg_index])); ++ src_arg_index++; ++ arg_assigned_p = true; ++ } ++ break; ++ ++ case S390_OVERLOADED_BUILTIN_s390_vec_load_bndry: ++ { ++ int code; ++ ++ if (dest_arg_index == 1) ++ { ++ switch (tree_low_cst ((**arglist)[src_arg_index], 1)) ++ { ++ case 64: code = 0; break; ++ case 128: code = 1; break; ++ case 256: code = 2; break; ++ case 512: code = 3; break; ++ case 1024: code = 4; break; ++ case 2048: code = 5; break; ++ case 4096: code = 6; break; ++ default: ++ error ("valid values for builtin %qF argument %d are 64, " ++ "128, 256, 512, 1024, 2048, and 4096", decl, ++ src_arg_index + 1); ++ return; ++ } ++ folded_args->quick_push (build_int_cst (integer_type_node, ++ code)); ++ src_arg_index++; ++ arg_assigned_p = true; ++ } ++ } ++ break; ++ case S390_OVERLOADED_BUILTIN_s390_vec_rl_mask: ++ /* Duplicate the first src arg. */ ++ if (dest_arg_index == 0) ++ { ++ folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain), ++ (**arglist)[src_arg_index])); ++ arg_assigned_p = true; ++ } ++ break; ++ default: ++ break; ++ } ++ if (!arg_assigned_p) ++ { ++ folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain), ++ (**arglist)[src_arg_index])); ++ src_arg_index++; ++ } ++ } ++ *arglist = folded_args; ++} ++ ++/* Check whether the arguments in ARGLIST match the function type ++ DEF_TYPE. Return the number of argument types which required ++ conversion/promotion in order to make it match. ++ 0 stands for a perfect match - all operand types match without changes ++ INT_MAX stands for a mismatch. */ ++static int ++s390_fn_types_compatible (enum s390_builtin_ov_type_index typeindex, ++ vec *arglist) ++{ ++ unsigned int i; ++ int match_type = 0; ++ ++ for (i = 0; i < vec_safe_length (arglist); i++) ++ { ++ tree b_arg_type = s390_builtin_types[s390_builtin_ov_types[typeindex][i + 1]]; ++ tree in_arg = (*arglist)[i]; ++ tree in_type = TREE_TYPE (in_arg); ++ ++ if (TREE_CODE (b_arg_type) == VECTOR_TYPE) ++ { ++ /* Vector types have to match precisely. */ ++ if (b_arg_type != in_type ++ && TYPE_MAIN_VARIANT (b_arg_type) != TYPE_MAIN_VARIANT (in_type)) ++ goto mismatch; ++ } ++ ++ if (lang_hooks.types_compatible_p (in_type, b_arg_type)) ++ continue; ++ ++ if (lang_hooks.types_compatible_p ( ++ lang_hooks.types.type_promotes_to (in_type), ++ lang_hooks.types.type_promotes_to (b_arg_type))) ++ { ++ match_type++; ++ continue; ++ } ++ ++ /* In this stage the C++ frontend would go ahead trying to find ++ implicit conversion chains for the argument to match the ++ target type. We will mimic this here only for our limited ++ subset of argument types. */ ++ if (TREE_CODE (b_arg_type) == INTEGER_TYPE ++ && TREE_CODE (in_type) == INTEGER_TYPE) ++ { ++ match_type++; ++ continue; ++ } ++ ++ /* If the incoming pointer argument has more qualifiers than the ++ argument type it can still be an imperfect match. */ ++ if (POINTER_TYPE_P (b_arg_type) && POINTER_TYPE_P (in_type) ++ && !(TYPE_QUALS (TREE_TYPE (in_type)) ++ & ~TYPE_QUALS (TREE_TYPE (b_arg_type))) ++ && (TYPE_QUALS (TREE_TYPE (b_arg_type)) ++ & ~TYPE_QUALS (TREE_TYPE (in_type)))) ++ { ++ tree qual_in_type = ++ build_qualified_type (TREE_TYPE (in_type), ++ TYPE_QUALS (TREE_TYPE (b_arg_type))); ++ ++ if (lang_hooks.types_compatible_p (qual_in_type, ++ TREE_TYPE (b_arg_type))) ++ { ++ match_type++; ++ continue; ++ } ++ } ++ ++ mismatch: ++ if (TARGET_DEBUG_ARG) ++ fprintf (stderr, " mismatch in operand: %d\n", i + 1); ++ return INT_MAX; ++ } ++ ++ return match_type; ++} ++ ++/* Return the number of elements in the vector arguments of FNDECL in ++ case all it matches for all vector arguments, -1 otherwise. */ ++static int ++s390_vec_n_elem (tree fndecl) ++{ ++ tree b_arg_chain; ++ int n_elem = -1; ++ ++ if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) == VECTOR_TYPE) ++ n_elem = TYPE_VECTOR_SUBPARTS (TREE_TYPE (TREE_TYPE ((fndecl)))); ++ ++ for (b_arg_chain = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); ++ !VOID_TYPE_P (TREE_VALUE (b_arg_chain)); ++ b_arg_chain = TREE_CHAIN (b_arg_chain)) ++ { ++ int tmp_n_elem; ++ if (TREE_CODE (TREE_VALUE (b_arg_chain)) != VECTOR_TYPE) ++ continue; ++ tmp_n_elem = TYPE_VECTOR_SUBPARTS (TREE_VALUE (b_arg_chain)); ++ if (n_elem != -1 && n_elem != tmp_n_elem) ++ return -1; ++ n_elem = tmp_n_elem; ++ } ++ return n_elem; ++} ++ ++ ++/* Return a tree expression for a call to the overloaded builtin ++ function OB_FNDECL at LOC with arguments PASSED_ARGLIST. */ ++tree ++s390_resolve_overloaded_builtin (location_t loc, ++ tree ob_fndecl, ++ void *passed_arglist) ++{ ++ vec *arglist = static_cast *> (passed_arglist); ++ unsigned int in_args_num = vec_safe_length (arglist); ++ unsigned int ob_args_num = 0; ++ unsigned int ob_fcode = DECL_FUNCTION_CODE (ob_fndecl); ++ enum s390_overloaded_builtin_vars bindex; ++ unsigned int i; ++ int last_match_type = INT_MAX; ++ int last_match_index = -1; ++ unsigned int all_op_flags; ++ int num_matches = 0; ++ tree target_builtin_decl, b_arg_chain, return_type; ++ enum s390_builtin_ov_type_index last_match_fntype_index; ++ ++ if (TARGET_DEBUG_ARG) ++ fprintf (stderr, ++ "s390_resolve_overloaded_builtin, code = %4d, %s - %s overloaded\n", ++ (int)ob_fcode, IDENTIFIER_POINTER (DECL_NAME (ob_fndecl)), ++ ob_fcode < S390_BUILTIN_MAX ? "not" : ""); ++ ++ /* 0...S390_BUILTIN_MAX-1 is for non-overloaded builtins. */ ++ if (ob_fcode < S390_BUILTIN_MAX) ++ { ++ if (bflags_for_builtin(ob_fcode) & B_INT) ++ { ++ error_at (loc, ++ "Builtin %qF is for GCC internal use only.", ++ ob_fndecl); ++ return error_mark_node; ++ } ++ return NULL_TREE; ++ } ++ ++ ob_fcode -= S390_BUILTIN_MAX; ++ ++ for (b_arg_chain = TYPE_ARG_TYPES (TREE_TYPE (ob_fndecl)); ++ !VOID_TYPE_P (TREE_VALUE (b_arg_chain)); ++ b_arg_chain = TREE_CHAIN (b_arg_chain)) ++ ob_args_num++; ++ ++ if (ob_args_num != in_args_num) ++ { ++ error_at (loc, ++ "Mismatch in number of arguments for builtin %qF. " ++ "Expected: %d got %d", ob_fndecl, ++ ob_args_num, in_args_num); ++ return error_mark_node; ++ } ++ ++ for (i = 0; i < in_args_num; i++) ++ if ((*arglist)[i] == error_mark_node) ++ return error_mark_node; ++ ++ /* Overloaded builtins without any variants are directly expanded here. */ ++ if (desc_start_for_overloaded_builtin[ob_fcode] == ++ S390_OVERLOADED_BUILTIN_VAR_MAX) ++ return s390_expand_overloaded_builtin (loc, ob_fcode, arglist, NULL_TREE); ++ ++ for (bindex = desc_start_for_overloaded_builtin[ob_fcode]; ++ bindex <= desc_end_for_overloaded_builtin[ob_fcode]; ++ bindex = (enum s390_overloaded_builtin_vars)((int)bindex + 1)) ++ { ++ int match_type; ++ enum s390_builtin_ov_type_index type_index = ++ type_for_overloaded_builtin_var[bindex]; ++ ++ if (TARGET_DEBUG_ARG) ++ fprintf (stderr, "checking variant number: %d", (int)bindex); ++ ++ match_type = s390_fn_types_compatible (type_index, arglist); ++ ++ if (match_type == INT_MAX) ++ continue; ++ ++ if (TARGET_DEBUG_ARG) ++ fprintf (stderr, ++ " %s match score: %d\n", match_type == 0 ? "perfect" : "imperfect", ++ match_type); ++ ++ if (match_type < last_match_type) ++ { ++ num_matches = 1; ++ last_match_type = match_type; ++ last_match_fntype_index = type_index; ++ last_match_index = bindex; ++ } ++ else if (match_type == last_match_type) ++ num_matches++; ++ } ++ ++ if (last_match_type == INT_MAX) ++ { ++ error_at (loc, "invalid parameter combination for intrinsic %qs", ++ IDENTIFIER_POINTER (DECL_NAME (ob_fndecl))); ++ return error_mark_node; ++ } ++ else if (num_matches > 1) ++ { ++ error_at (loc, "ambiguous overload for intrinsic %qs", ++ IDENTIFIER_POINTER (DECL_NAME (ob_fndecl))); ++ return error_mark_node; ++ } ++ ++ if (bt_for_overloaded_builtin_var[last_match_index] == S390_BUILTIN_MAX) ++ target_builtin_decl = ob_fndecl; ++ else ++ target_builtin_decl = s390_builtin_decls[bt_for_overloaded_builtin_var[last_match_index]]; ++ ++ all_op_flags = opflags_overloaded_builtin_var[last_match_index]; ++ return_type = s390_builtin_types[s390_builtin_ov_types[last_match_fntype_index][0]]; ++ ++ /* Check for the operand flags in the overloaded builtin variant. */ ++ for (i = 0; i < ob_args_num; i++) ++ { ++ unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1); ++ tree arg = (*arglist)[i]; ++ tree type = s390_builtin_types[s390_builtin_ov_types[last_match_fntype_index][i + 1]]; ++ ++ all_op_flags = all_op_flags >> O_SHIFT; ++ ++ if (op_flags == O_ELEM) ++ { ++ int n_elem = s390_vec_n_elem (target_builtin_decl); ++ gcc_assert (n_elem > 0); ++ gcc_assert (type == integer_type_node); ++ (*arglist)[i] = build2 (BIT_AND_EXPR, integer_type_node, ++ fold_convert (integer_type_node, arg), ++ build_int_cst (NULL_TREE, n_elem - 1)); ++ } ++ ++ if (TREE_CODE (arg) != INTEGER_CST || !O_IMM_P (op_flags)) ++ continue; ++ ++ if ((TYPE_UNSIGNED (type) ++ && !int_fits_type_p (arg, c_common_unsigned_type (type))) ++ || (!TYPE_UNSIGNED (type) ++ && !int_fits_type_p (arg, c_common_signed_type (type)))) ++ { ++ error("constant argument %d for builtin %qF is out " ++ "of range for target type", ++ i + 1, target_builtin_decl); ++ return error_mark_node; ++ } ++ ++ if (TREE_CODE (arg) == INTEGER_CST ++ && !s390_const_operand_ok (arg, i + 1, op_flags, target_builtin_decl)) ++ return error_mark_node; ++ } ++ ++ /* Handle builtins we expand directly - without mapping it to a low ++ level builtin. */ ++ if (bt_for_overloaded_builtin_var[last_match_index] == S390_BUILTIN_MAX) ++ return s390_expand_overloaded_builtin (loc, ob_fcode, arglist, return_type); ++ ++ s390_adjust_builtin_arglist (ob_fcode, target_builtin_decl, &arglist); ++ ++ if (VOID_TYPE_P (return_type)) ++ return build_function_call_vec (loc, target_builtin_decl, ++ arglist, NULL); ++ else ++ return fully_fold_convert (return_type, ++ build_function_call_vec (loc, target_builtin_decl, ++ arglist, NULL)); ++} ++ ++/* This is used to define the REGISTER_TARGET_PRAGMAS macro in s390.h. */ ++void ++s390_register_target_pragmas (void) ++{ ++ targetm.resolve_overloaded_builtin = s390_resolve_overloaded_builtin; ++} +--- gcc/config/s390/s390.h 2016-05-11 14:46:08.219982746 +0200 ++++ gcc/config/s390/s390.h 2016-05-11 17:12:39.000000000 +0200 +@@ -35,7 +35,9 @@ enum processor_flags + PF_Z10 = 32, + PF_Z196 = 64, + PF_ZEC12 = 128, +- PF_TX = 256 ++ PF_TX = 256, ++ PF_Z13 = 512, ++ PF_VX = 1024 + }; + + /* This is necessary to avoid a warning about comparing different enum +@@ -64,6 +66,10 @@ enum processor_flags + (s390_arch_flags & PF_ZEC12) + #define TARGET_CPU_HTM \ + (s390_arch_flags & PF_TX) ++#define TARGET_CPU_Z13 \ ++ (s390_arch_flags & PF_Z13) ++#define TARGET_CPU_VX \ ++ (s390_arch_flags & PF_VX) + + /* These flags indicate that the generated code should run on a cpu + providing the respective hardware facility when run in +@@ -82,7 +88,15 @@ enum processor_flags + #define TARGET_ZEC12 \ + (TARGET_ZARCH && TARGET_CPU_ZEC12) + #define TARGET_HTM (TARGET_OPT_HTM) +- ++#define TARGET_Z13 \ ++ (TARGET_ZARCH && TARGET_CPU_Z13) ++#define TARGET_VX \ ++ (TARGET_ZARCH && TARGET_CPU_VX && TARGET_OPT_VX && TARGET_HARD_FLOAT) ++ ++/* Use the ABI introduced with IBM z13: ++ - pass vector arguments <= 16 bytes in VRs ++ - align *all* vector types to 8 bytes */ ++#define TARGET_VX_ABI TARGET_VX + + #define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196) + +@@ -97,25 +111,11 @@ enum processor_flags + #define TARGET_TPF 0 + + /* Target CPU builtins. */ +-#define TARGET_CPU_CPP_BUILTINS() \ +- do \ +- { \ +- builtin_assert ("cpu=s390"); \ +- builtin_assert ("machine=s390"); \ +- builtin_define ("__s390__"); \ +- if (TARGET_ZARCH) \ +- builtin_define ("__zarch__"); \ +- if (TARGET_64BIT) \ +- builtin_define ("__s390x__"); \ +- if (TARGET_LONG_DOUBLE_128) \ +- builtin_define ("__LONG_DOUBLE_128__"); \ +- if (TARGET_HTM) \ +- builtin_define ("__HTM__"); \ +- } \ +- while (0) ++#define TARGET_CPU_CPP_BUILTINS() s390_cpu_cpp_builtins (pfile) + + #ifdef DEFAULT_TARGET_64BIT +-#define TARGET_DEFAULT (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_OPT_HTM) ++#define TARGET_DEFAULT (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP \ ++ | MASK_OPT_HTM | MASK_OPT_VX) + #else + #define TARGET_DEFAULT 0 + #endif +@@ -184,6 +184,13 @@ enum processor_flags + + #define STACK_SIZE_MODE (Pmode) + ++/* Vector arguments are left-justified when placed on the stack during ++ parameter passing. */ ++#define FUNCTION_ARG_PADDING(MODE, TYPE) \ ++ (s390_function_arg_vector ((MODE), (TYPE)) \ ++ ? upward \ ++ : DEFAULT_FUNCTION_ARG_PADDING ((MODE), (TYPE))) ++ + #ifndef IN_LIBGCC2 + + /* Width of a word, in units (bytes). */ +@@ -289,9 +296,11 @@ enum processor_flags + Reg 35: Return address pointer + + Registers 36 and 37 are mapped to access registers +- 0 and 1, used to implement thread-local storage. */ ++ 0 and 1, used to implement thread-local storage. ++ ++ Reg 38-53: Vector registers v16-v31 */ + +-#define FIRST_PSEUDO_REGISTER 38 ++#define FIRST_PSEUDO_REGISTER 54 + + /* Standard register usage. */ + #define GENERAL_REGNO_P(N) ((int)(N) >= 0 && (N) < 16) +@@ -300,6 +309,8 @@ enum processor_flags + #define CC_REGNO_P(N) ((N) == 33) + #define FRAME_REGNO_P(N) ((N) == 32 || (N) == 34 || (N) == 35) + #define ACCESS_REGNO_P(N) ((N) == 36 || (N) == 37) ++#define VECTOR_NOFP_REGNO_P(N) ((N) >= 38 && (N) <= 53) ++#define VECTOR_REGNO_P(N) (FP_REGNO_P (N) || VECTOR_NOFP_REGNO_P (N)) + + #define GENERAL_REG_P(X) (REG_P (X) && GENERAL_REGNO_P (REGNO (X))) + #define ADDR_REG_P(X) (REG_P (X) && ADDR_REGNO_P (REGNO (X))) +@@ -307,6 +318,8 @@ enum processor_flags + #define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X))) + #define FRAME_REG_P(X) (REG_P (X) && FRAME_REGNO_P (REGNO (X))) + #define ACCESS_REG_P(X) (REG_P (X) && ACCESS_REGNO_P (REGNO (X))) ++#define VECTOR_NOFP_REG_P(X) (REG_P (X) && VECTOR_NOFP_REGNO_P (REGNO (X))) ++#define VECTOR_REG_P(X) (REG_P (X) && VECTOR_REGNO_P (REGNO (X))) + + /* Set up fixed registers and calling convention: + +@@ -321,7 +334,9 @@ enum processor_flags + + On 31-bit, FPRs 18-19 are call-clobbered; + on 64-bit, FPRs 24-31 are call-clobbered. +- The remaining FPRs are call-saved. */ ++ The remaining FPRs are call-saved. ++ ++ All non-FP vector registers are call-clobbered v16-v31. */ + + #define FIXED_REGISTERS \ + { 0, 0, 0, 0, \ +@@ -333,7 +348,11 @@ enum processor_flags + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 1, 1, 1, 1, \ +- 1, 1 } ++ 1, 1, \ ++ 0, 0, 0, 0, \ ++ 0, 0, 0, 0, \ ++ 0, 0, 0, 0, \ ++ 0, 0, 0, 0 } + + #define CALL_USED_REGISTERS \ + { 1, 1, 1, 1, \ +@@ -345,26 +364,35 @@ enum processor_flags + 1, 1, 1, 1, \ + 1, 1, 1, 1, \ + 1, 1, 1, 1, \ +- 1, 1 } ++ 1, 1, \ ++ 1, 1, 1, 1, \ ++ 1, 1, 1, 1, \ ++ 1, 1, 1, 1, \ ++ 1, 1, 1, 1 } + + #define CALL_REALLY_USED_REGISTERS \ +-{ 1, 1, 1, 1, \ ++{ 1, 1, 1, 1, /* r0 - r15 */ \ + 1, 1, 0, 0, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ ++ 1, 1, 1, 1, /* f0 (16) - f15 (31) */ \ + 1, 1, 1, 1, \ + 1, 1, 1, 1, \ + 1, 1, 1, 1, \ +- 1, 1, 1, 1, \ ++ 1, 1, 1, 1, /* arg, cc, fp, ret addr */ \ ++ 0, 0, /* a0 (36), a1 (37) */ \ ++ 1, 1, 1, 1, /* v16 (38) - v23 (45) */ \ + 1, 1, 1, 1, \ +- 0, 0 } ++ 1, 1, 1, 1, /* v24 (46) - v31 (53) */ \ ++ 1, 1, 1, 1 } + + /* Preferred register allocation order. */ +-#define REG_ALLOC_ORDER \ +-{ 1, 2, 3, 4, 5, 0, 12, 11, 10, 9, 8, 7, 6, 14, 13, \ +- 16, 17, 18, 19, 20, 21, 22, 23, \ +- 24, 25, 26, 27, 28, 29, 30, 31, \ +- 15, 32, 33, 34, 35, 36, 37 } ++#define REG_ALLOC_ORDER \ ++ { 1, 2, 3, 4, 5, 0, 12, 11, 10, 9, 8, 7, 6, 14, 13, \ ++ 16, 17, 18, 19, 20, 21, 22, 23, \ ++ 24, 25, 26, 27, 28, 29, 30, 31, \ ++ 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, \ ++ 15, 32, 33, 34, 35, 36, 37 } + + + /* Fitting values into registers. */ +@@ -404,26 +432,22 @@ enum processor_flags + but conforms to the 31-bit ABI, GPRs can hold 8 bytes; + the ABI guarantees only that the lower 4 bytes are + saved across calls, however. */ +-#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \ +- (!TARGET_64BIT && TARGET_ZARCH \ +- && GET_MODE_SIZE (MODE) > 4 \ +- && (((REGNO) >= 6 && (REGNO) <= 15) || (REGNO) == 32)) ++#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \ ++ ((!TARGET_64BIT && TARGET_ZARCH \ ++ && GET_MODE_SIZE (MODE) > 4 \ ++ && (((REGNO) >= 6 && (REGNO) <= 15) || (REGNO) == 32)) \ ++ || (TARGET_VX \ ++ && GET_MODE_SIZE (MODE) > 8 \ ++ && (((TARGET_64BIT && (REGNO) >= 24 && (REGNO) <= 31)) \ ++ || (!TARGET_64BIT && ((REGNO) == 18 || (REGNO) == 19))))) + + /* Maximum number of registers to represent a value of mode MODE + in a register of class CLASS. */ + #define CLASS_MAX_NREGS(CLASS, MODE) \ + s390_class_max_nregs ((CLASS), (MODE)) + +-/* If a 4-byte value is loaded into a FPR, it is placed into the +- *upper* half of the register, not the lower. Therefore, we +- cannot use SUBREGs to switch between modes in FP registers. +- Likewise for access registers, since they have only half the +- word size on 64-bit. */ + #define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ +- (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \ +- ? ((reg_classes_intersect_p (FP_REGS, CLASS) \ +- && (GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8)) \ +- || reg_classes_intersect_p (ACCESS_REGS, CLASS)) : 0) ++ s390_cannot_change_mode_class ((FROM), (TO), (CLASS)) + + /* Register classes. */ + +@@ -451,6 +475,7 @@ enum reg_class + NO_REGS, CC_REGS, ADDR_REGS, GENERAL_REGS, ACCESS_REGS, + ADDR_CC_REGS, GENERAL_CC_REGS, + FP_REGS, ADDR_FP_REGS, GENERAL_FP_REGS, ++ VEC_REGS, ADDR_VEC_REGS, GENERAL_VEC_REGS, + ALL_REGS, LIM_REG_CLASSES + }; + #define N_REG_CLASSES (int) LIM_REG_CLASSES +@@ -458,11 +483,13 @@ enum reg_class + #define REG_CLASS_NAMES \ + { "NO_REGS", "CC_REGS", "ADDR_REGS", "GENERAL_REGS", "ACCESS_REGS", \ + "ADDR_CC_REGS", "GENERAL_CC_REGS", \ +- "FP_REGS", "ADDR_FP_REGS", "GENERAL_FP_REGS", "ALL_REGS" } ++ "FP_REGS", "ADDR_FP_REGS", "GENERAL_FP_REGS", \ ++ "VEC_REGS", "ADDR_VEC_REGS", "GENERAL_VEC_REGS", \ ++ "ALL_REGS" } + + /* Class -> register mapping. */ +-#define REG_CLASS_CONTENTS \ +-{ \ ++#define REG_CLASS_CONTENTS \ ++{ \ + { 0x00000000, 0x00000000 }, /* NO_REGS */ \ + { 0x00000000, 0x00000002 }, /* CC_REGS */ \ + { 0x0000fffe, 0x0000000d }, /* ADDR_REGS */ \ +@@ -473,7 +500,10 @@ enum reg_class + { 0xffff0000, 0x00000000 }, /* FP_REGS */ \ + { 0xfffffffe, 0x0000000d }, /* ADDR_FP_REGS */ \ + { 0xffffffff, 0x0000000d }, /* GENERAL_FP_REGS */ \ +- { 0xffffffff, 0x0000003f }, /* ALL_REGS */ \ ++ { 0xffff0000, 0x003fffc0 }, /* VEC_REGS */ \ ++ { 0xfffffffe, 0x003fffcd }, /* ADDR_VEC_REGS */ \ ++ { 0xffffffff, 0x003fffcd }, /* GENERAL_VEC_REGS */ \ ++ { 0xffffffff, 0x003fffff }, /* ALL_REGS */ \ + } + + /* In some case register allocation order is not enough for IRA to +@@ -504,14 +534,27 @@ extern const enum reg_class regclass_map + #define REGNO_OK_FOR_BASE_P(REGNO) REGNO_OK_FOR_INDEX_P (REGNO) + + +-/* We need secondary memory to move data between GPRs and FPRs. With +- DFP the ldgr lgdr instructions are available. But these +- instructions do not handle GPR pairs so it is not possible for 31 +- bit. */ +-#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \ +- ((CLASS1) != (CLASS2) \ +- && ((CLASS1) == FP_REGS || (CLASS2) == FP_REGS) \ +- && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (MODE) != 8)) ++/* We need secondary memory to move data between GPRs and FPRs. ++ ++ - With DFP the ldgr lgdr instructions are available. Due to the ++ different alignment we cannot use them for SFmode. For 31 bit a ++ 64 bit value in GPR would be a register pair so here we still ++ need to go via memory. ++ ++ - With z13 we can do the SF/SImode moves with vlgvf. Due to the ++ overlapping of FPRs and VRs we still disallow TF/TD modes to be ++ in full VRs so as before also on z13 we do these moves via ++ memory. ++ ++ FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */ ++#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \ ++ (((reg_classes_intersect_p (CLASS1, VEC_REGS) \ ++ && reg_classes_intersect_p (CLASS2, GENERAL_REGS)) \ ++ || (reg_classes_intersect_p (CLASS1, GENERAL_REGS) \ ++ && reg_classes_intersect_p (CLASS2, VEC_REGS))) \ ++ && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (MODE) != 8) \ ++ && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (MODE) \ ++ && GET_MODE_SIZE (MODE) > 8))) + + /* Get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit + because the movsi and movsf patterns don't handle r/f moves. */ +@@ -605,6 +648,11 @@ extern const enum reg_class regclass_map + /* Let the assembler generate debug line info. */ + #define DWARF2_ASM_LINE_DEBUG_INFO 1 + ++/* Define the dwarf register mapping. ++ v16-v31 -> 68-83 ++ rX -> X otherwise */ ++#define DBX_REGISTER_NUMBER(regno) \ ++ ((regno >= 38 && regno <= 53) ? regno + 30 : regno) + + /* Frame registers. */ + +@@ -652,21 +700,29 @@ typedef struct s390_arg_structure + { + int gprs; /* gpr so far */ + int fprs; /* fpr so far */ ++ int vrs; /* vr so far */ + } + CUMULATIVE_ARGS; + + #define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, NN, N_NAMED_ARGS) \ +- ((CUM).gprs=0, (CUM).fprs=0) ++ ((CUM).gprs=0, (CUM).fprs=0, (CUM).vrs=0) ++ ++#define FIRST_VEC_ARG_REGNO 46 ++#define LAST_VEC_ARG_REGNO 53 + + /* Arguments can be placed in general registers 2 to 6, or in floating + point registers 0 and 2 for 31 bit and fprs 0, 2, 4 and 6 for 64 + bit. */ +-#define FUNCTION_ARG_REGNO_P(N) (((N) >=2 && (N) <7) || \ +- (N) == 16 || (N) == 17 || (TARGET_64BIT && ((N) == 18 || (N) == 19))) ++#define FUNCTION_ARG_REGNO_P(N) \ ++ (((N) >=2 && (N) < 7) || (N) == 16 || (N) == 17 \ ++ || (TARGET_64BIT && ((N) == 18 || (N) == 19)) \ ++ || (TARGET_VX && ((N) >= FIRST_VEC_ARG_REGNO && (N) <= LAST_VEC_ARG_REGNO))) + + +-/* Only gpr 2 and fpr 0 are ever used as return registers. */ +-#define FUNCTION_VALUE_REGNO_P(N) ((N) == 2 || (N) == 16) ++/* Only gpr 2, fpr 0, and v24 are ever used as return registers. */ ++#define FUNCTION_VALUE_REGNO_P(N) \ ++ ((N) == 2 || (N) == 16 \ ++ || (TARGET_VX && (N) == FIRST_VEC_ARG_REGNO)) + + + /* Function entry and exit. */ +@@ -844,12 +900,20 @@ do { \ + /* How to refer to registers in assembler output. This sequence is + indexed by compiler's hard-register-number (see above). */ + #define REGISTER_NAMES \ +-{ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", \ +- "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", \ +- "%f0", "%f2", "%f4", "%f6", "%f1", "%f3", "%f5", "%f7", \ +- "%f8", "%f10", "%f12", "%f14", "%f9", "%f11", "%f13", "%f15", \ +- "%ap", "%cc", "%fp", "%rp", "%a0", "%a1" \ +-} ++ { "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", \ ++ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", \ ++ "%f0", "%f2", "%f4", "%f6", "%f1", "%f3", "%f5", "%f7", \ ++ "%f8", "%f10", "%f12", "%f14", "%f9", "%f11", "%f13", "%f15", \ ++ "%ap", "%cc", "%fp", "%rp", "%a0", "%a1", \ ++ "%v16", "%v18", "%v20", "%v22", "%v17", "%v19", "%v21", "%v23", \ ++ "%v24", "%v26", "%v28", "%v30", "%v25", "%v27", "%v29", "%v31" \ ++ } ++ ++#define ADDITIONAL_REGISTER_NAMES \ ++ { { "v0", 16 }, { "v2", 17 }, { "v4", 18 }, { "v6", 19 }, \ ++ { "v1", 20 }, { "v3", 21 }, { "v5", 22 }, { "v7", 23 }, \ ++ { "v8", 24 }, { "v10", 25 }, { "v12", 26 }, { "v14", 27 }, \ ++ { "v9", 28 }, { "v11", 29 }, { "v13", 30 }, { "v15", 31 } }; + + /* Print operand X (an rtx) in assembler syntax to file FILE. */ + #define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE) +@@ -915,13 +979,31 @@ do { \ + #define SYMBOL_REF_NOT_NATURALLY_ALIGNED_P(X) \ + ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_NOT_NATURALLY_ALIGNED)) + ++/* Check whether integer displacement is in range for a short displacement. */ ++#define SHORT_DISP_IN_RANGE(d) ((d) >= 0 && (d) <= 4095) ++ + /* Check whether integer displacement is in range. */ + #define DISP_IN_RANGE(d) \ + (TARGET_LONG_DISPLACEMENT? ((d) >= -524288 && (d) <= 524287) \ +- : ((d) >= 0 && (d) <= 4095)) ++ : SHORT_DISP_IN_RANGE(d)) + + /* Reads can reuse write prefetches, used by tree-ssa-prefetch-loops.c. */ + #define READ_CAN_USE_WRITE_PREFETCH 1 + + extern const int processor_flags_table[]; +-#endif ++ ++/* The truth element value for vector comparisons. Our instructions ++ always generate -1 in that case. */ ++#define VECTOR_STORE_FLAG_VALUE(MODE) CONSTM1_RTX (GET_MODE_INNER (MODE)) ++ ++/* Target pragma. */ ++ ++/* resolve_overloaded_builtin can not be defined the normal way since ++ it is defined in code which technically belongs to the ++ front-end. */ ++#define REGISTER_TARGET_PRAGMAS() \ ++ do { \ ++ s390_register_target_pragmas (); \ ++ } while (0) ++ ++#endif /* S390_H */ +--- gcc/config/s390/s390intrin.h 2013-08-14 13:55:12.000000000 +0200 ++++ gcc/config/s390/s390intrin.h 2016-05-11 17:12:39.000000000 +0200 +@@ -29,5 +29,8 @@ along with GCC; see the file COPYING3. + #include + #endif + ++#ifdef __VEC__ ++#include ++#endif + + #endif /* _S390INTRIN_H*/ +--- gcc/config/s390/s390.md 2015-06-18 16:33:04.000000000 +0200 ++++ gcc/config/s390/s390.md 2016-05-11 19:22:59.245881189 +0200 +@@ -125,7 +125,109 @@ + UNSPEC_FPINT_CEIL + UNSPEC_FPINT_NEARBYINT + UNSPEC_FPINT_RINT +- ]) ++ ++ UNSPEC_LCBB ++ ++ ; Vector ++ UNSPEC_VEC_SMULT_HI ++ UNSPEC_VEC_UMULT_HI ++ UNSPEC_VEC_SMULT_LO ++ UNSPEC_VEC_SMULT_EVEN ++ UNSPEC_VEC_UMULT_EVEN ++ UNSPEC_VEC_SMULT_ODD ++ UNSPEC_VEC_UMULT_ODD ++ ++ UNSPEC_VEC_VMAL ++ UNSPEC_VEC_VMAH ++ UNSPEC_VEC_VMALH ++ UNSPEC_VEC_VMAE ++ UNSPEC_VEC_VMALE ++ UNSPEC_VEC_VMAO ++ UNSPEC_VEC_VMALO ++ ++ UNSPEC_VEC_GATHER ++ UNSPEC_VEC_EXTRACT ++ UNSPEC_VEC_INSERT_AND_ZERO ++ UNSPEC_VEC_LOAD_BNDRY ++ UNSPEC_VEC_LOAD_LEN ++ UNSPEC_VEC_MERGEH ++ UNSPEC_VEC_MERGEL ++ UNSPEC_VEC_PACK ++ UNSPEC_VEC_PACK_SATURATE ++ UNSPEC_VEC_PACK_SATURATE_CC ++ UNSPEC_VEC_PACK_SATURATE_GENCC ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC ++ UNSPEC_VEC_PERM ++ UNSPEC_VEC_PERMI ++ UNSPEC_VEC_EXTEND ++ UNSPEC_VEC_STORE_LEN ++ UNSPEC_VEC_UNPACKH ++ UNSPEC_VEC_UNPACKH_L ++ UNSPEC_VEC_UNPACKL ++ UNSPEC_VEC_UNPACKL_L ++ UNSPEC_VEC_ADDC ++ UNSPEC_VEC_ADDC_U128 ++ UNSPEC_VEC_ADDE_U128 ++ UNSPEC_VEC_ADDEC_U128 ++ UNSPEC_VEC_AVG ++ UNSPEC_VEC_AVGU ++ UNSPEC_VEC_CHECKSUM ++ UNSPEC_VEC_GFMSUM ++ UNSPEC_VEC_GFMSUM_128 ++ UNSPEC_VEC_GFMSUM_ACCUM ++ UNSPEC_VEC_GFMSUM_ACCUM_128 ++ UNSPEC_VEC_SET ++ ++ UNSPEC_VEC_VSUMG ++ UNSPEC_VEC_VSUMQ ++ UNSPEC_VEC_VSUM ++ UNSPEC_VEC_RL_MASK ++ UNSPEC_VEC_SLL ++ UNSPEC_VEC_SLB ++ UNSPEC_VEC_SLDB ++ UNSPEC_VEC_SRAL ++ UNSPEC_VEC_SRAB ++ UNSPEC_VEC_SRL ++ UNSPEC_VEC_SRLB ++ ++ UNSPEC_VEC_SUB_U128 ++ UNSPEC_VEC_SUBC ++ UNSPEC_VEC_SUBC_U128 ++ UNSPEC_VEC_SUBE_U128 ++ UNSPEC_VEC_SUBEC_U128 ++ ++ UNSPEC_VEC_TEST_MASK ++ ++ UNSPEC_VEC_VFAE ++ UNSPEC_VEC_VFAECC ++ ++ UNSPEC_VEC_VFEE ++ UNSPEC_VEC_VFEECC ++ UNSPEC_VEC_VFENE ++ UNSPEC_VEC_VFENECC ++ ++ UNSPEC_VEC_VISTR ++ UNSPEC_VEC_VISTRCC ++ ++ UNSPEC_VEC_VSTRC ++ UNSPEC_VEC_VSTRCCC ++ ++ UNSPEC_VEC_VCDGB ++ UNSPEC_VEC_VCDLGB ++ ++ UNSPEC_VEC_VCGDB ++ UNSPEC_VEC_VCLGDB ++ ++ UNSPEC_VEC_VFIDB ++ ++ UNSPEC_VEC_VLDEB ++ UNSPEC_VEC_VLEDB ++ ++ UNSPEC_VEC_VFTCIDB ++ UNSPEC_VEC_VFTCIDBCC ++]) + + ;; + ;; UNSPEC_VOLATILE usage +@@ -167,6 +269,10 @@ + UNSPECV_ETND + UNSPECV_NTSTG + UNSPECV_PPA ++ ++ ; Set and get floating point control register ++ UNSPECV_SFPC ++ UNSPECV_EFPC + ]) + + ;; +@@ -198,6 +304,11 @@ + ; Floating point registers. + (FPR0_REGNUM 16) + (FPR2_REGNUM 18) ++ (VR0_REGNUM 16) ++ (VR16_REGNUM 38) ++ (VR23_REGNUM 45) ++ (VR24_REGNUM 46) ++ (VR31_REGNUM 53) + ]) + + ;; +@@ -228,7 +339,7 @@ + ;; Used to determine defaults for length and other attribute values. + + (define_attr "op_type" +- "NN,E,RR,RRE,RX,RS,RSI,RI,SI,S,SS,SSE,RXE,RSE,RIL,RIE,RXY,RSY,SIY,RRF,RRR,SIL,RRS,RIS" ++ "NN,E,RR,RRE,RX,RS,RSI,RI,SI,S,SS,SSE,RXE,RSE,RIL,RIE,RXY,RSY,SIY,RRF,RRR,SIL,RRS,RIS,VRI,VRR,VRS,VRV,VRX" + (const_string "NN")) + + ;; Instruction type attribute used for scheduling. +@@ -306,10 +417,11 @@ + ;; distinguish between g5 and g6, but there are differences between the two + ;; CPUs could in theory be modeled. + +-(define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12" ++(define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13" + (const (symbol_ref "s390_tune_attr"))) + +-(define_attr "cpu_facility" "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12" ++(define_attr "cpu_facility" ++ "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12,vec" + (const_string "standard")) + + (define_attr "enabled" "" +@@ -346,6 +458,10 @@ + + (and (eq_attr "cpu_facility" "zEC12") + (match_test "TARGET_ZEC12")) ++ (const_int 1) ++ ++ (and (eq_attr "cpu_facility" "vec") ++ (match_test "TARGET_VX")) + (const_int 1)] + (const_int 0))) + +@@ -365,6 +481,9 @@ + ;; Pipeline description for zEC12 + (include "2827.md") + ++;; Pipeline description for z13 ++(include "2964.md") ++ + ;; Predicates + (include "predicates.md") + +@@ -376,12 +495,13 @@ + + ;; Iterators + ++(define_mode_iterator ALL [TI DI SI HI QI TF DF SF TD DD SD V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF V2SF V4SF V1TI V1DF V2DF V1TF]) ++ + ;; These mode iterators allow floating point patterns to be generated from the + ;; same template. + (define_mode_iterator FP_ALL [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP") + (SD "TARGET_HARD_DFP")]) + (define_mode_iterator FP [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP")]) +-(define_mode_iterator FPALL [TF DF SF TD DD SD]) + (define_mode_iterator BFP [TF DF SF]) + (define_mode_iterator DFP [TD DD]) + (define_mode_iterator DFP_ALL [TD DD SD]) +@@ -417,7 +537,6 @@ + ;; This mode iterator allows the integer patterns to be defined from the + ;; same template. + (define_mode_iterator INT [(DI "TARGET_ZARCH") SI HI QI]) +-(define_mode_iterator INTALL [TI DI SI HI QI]) + (define_mode_iterator DINT [(TI "TARGET_ZARCH") DI SI HI QI]) + + ;; This iterator allows some 'ashift' and 'lshiftrt' pattern to be defined from +@@ -476,6 +595,14 @@ + ;; first and the second operand match for bfp modes. + (define_mode_attr f0 [(TF "0") (DF "0") (SF "0") (TD "f") (DD "f") (DD "f")]) + ++;; This attribute is used to merge the scalar vector instructions into ++;; the FP patterns. For non-supported modes (all but DF) it expands ++;; to constraints which are supposed to be matched by an earlier ++;; variant. ++(define_mode_attr v0 [(TF "0") (DF "v") (SF "0") (TD "0") (DD "0") (DD "0") (TI "0") (DI "v") (SI "0")]) ++(define_mode_attr vf [(TF "f") (DF "v") (SF "f") (TD "f") (DD "f") (DD "f") (TI "f") (DI "v") (SI "f")]) ++(define_mode_attr vd [(TF "d") (DF "v") (SF "d") (TD "d") (DD "d") (DD "d") (TI "d") (DI "v") (SI "d")]) ++ + ;; This attribute is used in the operand list of the instruction to have an + ;; additional operand for the dfp instructions. + (define_mode_attr op1 [(TF "") (DF "") (SF "") +@@ -584,6 +711,19 @@ + ;; In place of GET_MODE_BITSIZE (mode) + (define_mode_attr bitsize [(DI "64") (SI "32") (HI "16") (QI "8")]) + ++ ++ ++; Condition code modes generated by vector fp comparisons. These will ++; be used also in single element mode. ++(define_mode_iterator VFCMP [CCVEQ CCVFH CCVFHE]) ++; Used with VFCMP to expand part of the mnemonic ++; For fp we have a mismatch: eq in the insn name - e in asm ++(define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")]) ++(define_mode_attr insn_cmp [(CCVEQ "eq") (CCVH "h") (CCVHU "hl") (CCVFH "h") (CCVFHE "he")]) ++ ++ ++(include "vector.md") ++ + ;; + ;;- Compare instructions. + ;; +@@ -1091,6 +1231,15 @@ + [(set_attr "op_type" "RRE,RXE") + (set_attr "type" "fsimp")]) + ++; wfcedbs, wfchdbs, wfchedbs ++(define_insn "*vec_cmpdf_cconly" ++ [(set (reg:VFCMP CC_REGNUM) ++ (compare:VFCMP (match_operand:DF 0 "register_operand" "v") ++ (match_operand:DF 1 "register_operand" "v"))) ++ (clobber (match_scratch:V2DI 2 "=v"))] ++ "TARGET_VX && TARGET_HARD_FLOAT" ++ "wfcdbs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) + + ; Compare and Branch instructions + +@@ -1216,17 +1365,27 @@ + ; movti instruction pattern(s). + ; + ++; FIXME: More constants are possible by enabling jxx, jyy constraints ++; for TImode (use double-int for the calculations) + (define_insn "movti" +- [(set (match_operand:TI 0 "nonimmediate_operand" "=d,QS,d,o") +- (match_operand:TI 1 "general_operand" "QS,d,dPRT,d"))] ++ [(set (match_operand:TI 0 "nonimmediate_operand" "=d,QS,v, v, v,v,d, v,QR, d,o") ++ (match_operand:TI 1 "general_operand" "QS, d,v,j00,jm1,d,v,QR, v,dPRT,d"))] + "TARGET_ZARCH" + "@ + lmg\t%0,%N0,%S1 + stmg\t%1,%N1,%S0 ++ vlr\t%v0,%v1 ++ vzero\t%v0 ++ vone\t%v0 ++ vlvgp\t%v0,%1,%N1 ++ # ++ vl\t%v0,%1 ++ vst\t%v1,%0 + # + #" +- [(set_attr "op_type" "RSY,RSY,*,*") +- (set_attr "type" "lm,stm,*,*")]) ++ [(set_attr "op_type" "RSY,RSY,VRR,VRI,VRI,VRR,*,VRX,VRX,*,*") ++ (set_attr "type" "lm,stm,*,*,*,*,*,*,*,*,*") ++ (set_attr "cpu_facility" "*,*,vec,vec,vec,vec,vec,vec,vec,*,*")]) + + (define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") +@@ -1256,10 +1415,14 @@ + operands[5] = operand_subword (operands[1], 0, 0, TImode); + }) + ++; Use part of the TImode target reg to perform the address ++; calculation. If the TImode value is supposed to be copied into a VR ++; this splitter is not necessary. + (define_split + [(set (match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "memory_operand" ""))] + "TARGET_ZARCH && reload_completed ++ && !VECTOR_REG_P (operands[0]) + && !s_operand (operands[1], VOIDmode)" + [(set (match_dup 0) (match_dup 1))] + { +@@ -1270,6 +1433,25 @@ + }) + + ++; Split a VR -> GPR TImode move into 2 vector load GR from VR element. ++; For the higher order bits we do simply a DImode move while the ++; second part is done via vec extract. Both will end up as vlgvg. ++(define_split ++ [(set (match_operand:TI 0 "register_operand" "") ++ (match_operand:TI 1 "register_operand" ""))] ++ "TARGET_VX && reload_completed ++ && GENERAL_REG_P (operands[0]) ++ && VECTOR_REG_P (operands[1])" ++ [(set (match_dup 2) (match_dup 4)) ++ (set (match_dup 3) (unspec:DI [(match_dup 5) (const_int 1)] ++ UNSPEC_VEC_EXTRACT))] ++{ ++ operands[2] = operand_subword (operands[0], 0, 0, TImode); ++ operands[3] = operand_subword (operands[0], 1, 0, TImode); ++ operands[4] = gen_rtx_REG (DImode, REGNO (operands[1])); ++ operands[5] = gen_rtx_REG (V2DImode, REGNO (operands[1])); ++}) ++ + ; + ; Patterns used for secondary reloads + ; +@@ -1278,40 +1460,20 @@ + ; Unfortunately there is no such variant for QI, TI and FP mode moves. + ; These patterns are also used for unaligned SI and DI accesses. + +-(define_expand "reload_tomem_z10" +- [(parallel [(match_operand:INTALL 0 "memory_operand" "") +- (match_operand:INTALL 1 "register_operand" "=d") +- (match_operand:P 2 "register_operand" "=&a")])] +- "TARGET_Z10" +-{ +- s390_reload_symref_address (operands[1], operands[0], operands[2], 1); +- DONE; +-}) +- +-(define_expand "reload_toreg_z10" +- [(parallel [(match_operand:INTALL 0 "register_operand" "=d") +- (match_operand:INTALL 1 "memory_operand" "") +- (match_operand:P 2 "register_operand" "=a")])] +- "TARGET_Z10" +-{ +- s390_reload_symref_address (operands[0], operands[1], operands[2], 0); +- DONE; +-}) +- +-(define_expand "reload_tomem_z10" +- [(parallel [(match_operand:FPALL 0 "memory_operand" "") +- (match_operand:FPALL 1 "register_operand" "=d") +- (match_operand:P 2 "register_operand" "=&a")])] ++(define_expand "reload_tomem_z10" ++ [(parallel [(match_operand:ALL 0 "memory_operand" "") ++ (match_operand:ALL 1 "register_operand" "=d") ++ (match_operand:P 2 "register_operand" "=&a")])] + "TARGET_Z10" + { + s390_reload_symref_address (operands[1], operands[0], operands[2], 1); + DONE; + }) + +-(define_expand "reload_toreg_z10" +- [(parallel [(match_operand:FPALL 0 "register_operand" "=d") +- (match_operand:FPALL 1 "memory_operand" "") +- (match_operand:P 2 "register_operand" "=a")])] ++(define_expand "reload_toreg_z10" ++ [(parallel [(match_operand:ALL 0 "register_operand" "=d") ++ (match_operand:ALL 1 "memory_operand" "") ++ (match_operand:P 2 "register_operand" "=a")])] + "TARGET_Z10" + { + s390_reload_symref_address (operands[0], operands[1], operands[2], 0); +@@ -1340,9 +1502,16 @@ + DONE; + }) + +-; Handles assessing a non-offsetable memory address ++; Not all the indirect memory access instructions support the full ++; format (long disp + index + base). So whenever a move from/to such ++; an address is required and the instruction cannot deal with it we do ++; a load address into a scratch register first and use this as the new ++; base register. ++; This in particular is used for: ++; - non-offsetable memory accesses for multiword moves ++; - full vector reg moves with long displacements + +-(define_expand "reload_nonoffmem_in" ++(define_expand "reload_la_in" + [(parallel [(match_operand 0 "register_operand" "") + (match_operand 1 "" "") + (match_operand:P 2 "register_operand" "=&a")])] +@@ -1355,7 +1524,7 @@ + DONE; + }) + +-(define_expand "reload_nonoffmem_out" ++(define_expand "reload_la_out" + [(parallel [(match_operand 0 "" "") + (match_operand 1 "register_operand" "") + (match_operand:P 2 "register_operand" "=&a")])] +@@ -1408,11 +1577,9 @@ + + (define_insn "*movdi_64" + [(set (match_operand:DI 0 "nonimmediate_operand" +- "=d,d,d,d,d,d,d,d,f,d,d,d,d,d, +- RT,!*f,!*f,!*f,!R,!T,b,Q,d,t,Q,t") ++ "=d, d, d, d, d, d, d, d,f,d,d,d,d, d,RT,!*f,!*f,!*f,!R,!T,b,Q,d,t,Q,t,v,v,v,d, v,QR") + (match_operand:DI 1 "general_operand" +- "K,N0HD0,N1HD0,N2HD0,N3HD0,Os,N0SD0,N1SD0,d,f,L,b,d,RT, +- d,*f,R,T,*f,*f,d,K,t,d,t,Q"))] ++ " K,N0HD0,N1HD0,N2HD0,N3HD0,Os,N0SD0,N1SD0,d,f,L,b,d,RT, d, *f, R, T,*f,*f,d,K,t,d,t,Q,K,v,d,v,QR, v"))] + "TARGET_ZARCH" + "@ + lghi\t%0,%h1 +@@ -1440,15 +1607,21 @@ + # + # + stam\t%1,%N1,%S0 +- lam\t%0,%N0,%S1" ++ lam\t%0,%N0,%S1 ++ vleig\t%v0,%h1,0 ++ vlr\t%v0,%v1 ++ vlvgg\t%v0,%1,0 ++ vlgvg\t%0,%v1,0 ++ vleg\t%v0,%1,0 ++ vsteg\t%v1,%0,0" + [(set_attr "op_type" "RI,RI,RI,RI,RI,RIL,RIL,RIL,RRE,RRE,RXY,RIL,RRE,RXY, +- RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS") ++ RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS,VRI,VRR,VRS,VRS,VRX,VRX") + (set_attr "type" "*,*,*,*,*,*,*,*,floaddf,floaddf,la,larl,lr,load,store, +- floaddf,floaddf,floaddf,fstoredf,fstoredf,larl,*,*,*, +- *,*") ++ floaddf,floaddf,floaddf,fstoredf,fstoredf,larl,*,*,*,*, ++ *,*,*,*,*,*,*") + (set_attr "cpu_facility" "*,*,*,*,*,extimm,extimm,extimm,dfp,dfp,longdisp, + z10,*,*,*,*,*,longdisp,*,longdisp, +- z10,z10,*,*,*,*") ++ z10,z10,*,*,*,*,vec,vec,vec,vec,vec,vec") + (set_attr "z10prop" "z10_fwd_A1, + z10_fwd_E1, + z10_fwd_E1, +@@ -1474,7 +1647,7 @@ + *, + *, + *, +- *") ++ *,*,*,*,*,*,*") + ]) + + (define_split +@@ -1666,9 +1839,9 @@ + + (define_insn "*movsi_zarch" + [(set (match_operand:SI 0 "nonimmediate_operand" +- "=d,d,d,d,d,d,d,d,d,R,T,!*f,!*f,!*f,!R,!T,d,t,Q,b,Q,t") ++ "=d, d, d, d,d,d,d,d,d,R,T,!*f,!*f,!*f,!*f,!*f,!R,!T,d,t,Q,b,Q,t,v,v,v,d, v,QR") + (match_operand:SI 1 "general_operand" +- "K,N0HS0,N1HS0,Os,L,b,d,R,T,d,d,*f,R,T,*f,*f,t,d,t,d,K,Q"))] ++ " K,N0HS0,N1HS0,Os,L,b,d,R,T,d,d, *f, *f, R, R, T,*f,*f,t,d,t,d,K,Q,K,v,d,v,QR, v"))] + "TARGET_ZARCH" + "@ + lhi\t%0,%h1 +@@ -1682,7 +1855,9 @@ + ly\t%0,%1 + st\t%1,%0 + sty\t%1,%0 ++ lder\t%0,%1 + ler\t%0,%1 ++ lde\t%0,%1 + le\t%0,%1 + ley\t%0,%1 + ste\t%1,%0 +@@ -1692,9 +1867,15 @@ + stam\t%1,%1,%S0 + strl\t%1,%0 + mvhi\t%0,%1 +- lam\t%0,%0,%S1" ++ lam\t%0,%0,%S1 ++ vleif\t%v0,%h1,0 ++ vlr\t%v0,%v1 ++ vlvgf\t%v0,%1,0 ++ vlgvf\t%0,%v1,0 ++ vlef\t%v0,%1,0 ++ vstef\t%v1,%0,0" + [(set_attr "op_type" "RI,RI,RI,RIL,RXY,RIL,RR,RX,RXY,RX,RXY, +- RR,RX,RXY,RX,RXY,RRE,RRE,RS,RIL,SIL,RS") ++ RRE,RR,RXE,RX,RXY,RX,RXY,RRE,RRE,RS,RIL,SIL,RS,VRI,VRR,VRS,VRS,VRX,VRX") + (set_attr "type" "*, + *, + *, +@@ -1709,6 +1890,8 @@ + floadsf, + floadsf, + floadsf, ++ floadsf, ++ floadsf, + fstoresf, + fstoresf, + *, +@@ -1716,9 +1899,9 @@ + *, + larl, + *, +- *") ++ *,*,*,*,*,*,*") + (set_attr "cpu_facility" "*,*,*,extimm,longdisp,z10,*,*,longdisp,*,longdisp, +- *,*,longdisp,*,longdisp,*,*,*,z10,z10,*") ++ vec,*,vec,*,longdisp,*,longdisp,*,*,*,z10,z10,*,vec,vec,vec,vec,vec,vec") + (set_attr "z10prop" "z10_fwd_A1, + z10_fwd_E1, + z10_fwd_E1, +@@ -1735,42 +1918,38 @@ + *, + *, + *, ++ *, ++ *, + z10_super_E1, + z10_super, + *, + z10_rec, + z10_super, +- *")]) ++ *,*,*,*,*,*,*")]) + + (define_insn "*movsi_esa" +- [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,R,!*f,!*f,!R,d,t,Q,t") +- (match_operand:SI 1 "general_operand" "K,d,R,d,*f,R,*f,t,d,t,Q"))] ++ [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,R,!*f,!*f,!*f,!*f,!R,d,t,Q,t") ++ (match_operand:SI 1 "general_operand" "K,d,R,d, *f, *f, R, R,*f,t,d,t,Q"))] + "!TARGET_ZARCH" + "@ + lhi\t%0,%h1 + lr\t%0,%1 + l\t%0,%1 + st\t%1,%0 ++ lder\t%0,%1 + ler\t%0,%1 ++ lde\t%0,%1 + le\t%0,%1 + ste\t%1,%0 + ear\t%0,%1 + sar\t%0,%1 + stam\t%1,%1,%S0 + lam\t%0,%0,%S1" +- [(set_attr "op_type" "RI,RR,RX,RX,RR,RX,RX,RRE,RRE,RS,RS") +- (set_attr "type" "*,lr,load,store,floadsf,floadsf,fstoresf,*,*,*,*") +- (set_attr "z10prop" "z10_fwd_A1, +- z10_fr_E1, +- z10_fwd_A3, +- z10_rec, +- *, +- *, +- *, +- z10_super_E1, +- z10_super, +- *, +- *") ++ [(set_attr "op_type" "RI,RR,RX,RX,RRE,RR,RXE,RX,RX,RRE,RRE,RS,RS") ++ (set_attr "type" "*,lr,load,store,floadsf,floadsf,floadsf,floadsf,fstoresf,*,*,*,*") ++ (set_attr "z10prop" "z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec,*,*,*,*,*,z10_super_E1, ++ z10_super,*,*") ++ (set_attr "cpu_facility" "*,*,*,*,vec,*,vec,*,*,*,*,*,*") + ]) + + (define_peephole2 +@@ -1880,8 +2059,8 @@ + }) + + (define_insn "*movhi" +- [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,d,R,T,b,Q") +- (match_operand:HI 1 "general_operand" " d,n,R,T,b,d,d,d,K"))] ++ [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,d,R,T,b,Q,v,v,v,d, v,QR") ++ (match_operand:HI 1 "general_operand" " d,n,R,T,b,d,d,d,K,K,v,d,v,QR, v"))] + "" + "@ + lr\t%0,%1 +@@ -1892,10 +2071,16 @@ + sth\t%1,%0 + sthy\t%1,%0 + sthrl\t%1,%0 +- mvhhi\t%0,%1" +- [(set_attr "op_type" "RR,RI,RX,RXY,RIL,RX,RXY,RIL,SIL") +- (set_attr "type" "lr,*,*,*,larl,store,store,store,*") +- (set_attr "cpu_facility" "*,*,*,*,z10,*,*,z10,z10") ++ mvhhi\t%0,%1 ++ vleih\t%v0,%h1,0 ++ vlr\t%v0,%v1 ++ vlvgh\t%v0,%1,0 ++ vlgvh\t%0,%v1,0 ++ vleh\t%v0,%1,0 ++ vsteh\t%v1,%0,0" ++ [(set_attr "op_type" "RR,RI,RX,RXY,RIL,RX,RXY,RIL,SIL,VRI,VRR,VRS,VRS,VRX,VRX") ++ (set_attr "type" "lr,*,*,*,larl,store,store,store,*,*,*,*,*,*,*") ++ (set_attr "cpu_facility" "*,*,*,*,z10,*,*,z10,z10,vec,vec,vec,vec,vec,vec") + (set_attr "z10prop" "z10_fr_E1, + z10_fwd_A1, + z10_super_E1, +@@ -1904,7 +2089,7 @@ + z10_rec, + z10_rec, + z10_rec, +- z10_super")]) ++ z10_super,*,*,*,*,*,*")]) + + (define_peephole2 + [(set (match_operand:HI 0 "register_operand" "") +@@ -1939,8 +2124,8 @@ + }) + + (define_insn "*movqi" +- [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,T,Q,S,?Q") +- (match_operand:QI 1 "general_operand" " d,n,R,T,d,d,n,n,?Q"))] ++ [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,T,Q,S,?Q,v,v,v,d, v,QR") ++ (match_operand:QI 1 "general_operand" " d,n,R,T,d,d,n,n,?Q,K,v,d,v,QR, v"))] + "" + "@ + lr\t%0,%1 +@@ -1951,9 +2136,16 @@ + stcy\t%1,%0 + mvi\t%S0,%b1 + mviy\t%S0,%b1 +- #" +- [(set_attr "op_type" "RR,RI,RX,RXY,RX,RXY,SI,SIY,SS") +- (set_attr "type" "lr,*,*,*,store,store,store,store,*") ++ # ++ vleib\t%v0,%b1,0 ++ vlr\t%v0,%v1 ++ vlvgb\t%v0,%1,0 ++ vlgvb\t%0,%v1,0 ++ vleb\t%v0,%1,0 ++ vsteb\t%v1,%0,0" ++ [(set_attr "op_type" "RR,RI,RX,RXY,RX,RXY,SI,SIY,SS,VRI,VRR,VRS,VRS,VRX,VRX") ++ (set_attr "type" "lr,*,*,*,store,store,store,store,*,*,*,*,*,*,*") ++ (set_attr "cpu_facility" "*,*,*,*,*,*,*,*,*,vec,vec,vec,vec,vec,vec") + (set_attr "z10prop" "z10_fr_E1, + z10_fwd_A1, + z10_super_E1, +@@ -1962,7 +2154,7 @@ + z10_rec, + z10_super, + z10_super, +- *")]) ++ *,*,*,*,*,*,*")]) + + (define_peephole2 + [(set (match_operand:QI 0 "nonimmediate_operand" "") +@@ -2094,7 +2286,7 @@ + [(set (match_operand:TD_TF 0 "register_operand" "") + (match_operand:TD_TF 1 "memory_operand" ""))] + "TARGET_ZARCH && reload_completed +- && !FP_REG_P (operands[0]) ++ && GENERAL_REG_P (operands[0]) + && !s_operand (operands[1], VOIDmode)" + [(set (match_dup 0) (match_dup 1))] + { +@@ -2150,9 +2342,9 @@ + + (define_insn "*mov_64dfp" + [(set (match_operand:DD_DF 0 "nonimmediate_operand" +- "=f,f,f,d,f,f,R,T,d,d, d,RT") ++ "=f,f,f,d,f,f,R,T,d,d,d, d,b,RT,v,v,d,v,QR") + (match_operand:DD_DF 1 "general_operand" +- " G,f,d,f,R,T,f,f,G,d,RT, d"))] ++ " G,f,d,f,R,T,f,f,G,d,b,RT,d, d,v,d,v,QR,v"))] + "TARGET_DFP" + "@ + lzdr\t%0 +@@ -2165,17 +2357,24 @@ + stdy\t%1,%0 + lghi\t%0,0 + lgr\t%0,%1 ++ lgrl\t%0,%1 + lg\t%0,%1 +- stg\t%1,%0" +- [(set_attr "op_type" "RRE,RR,RRE,RRE,RX,RXY,RX,RXY,RI,RRE,RXY,RXY") ++ stgrl\t%1,%0 ++ stg\t%1,%0 ++ vlr\t%v0,%v1 ++ vlvgg\t%v0,%1,0 ++ vlgvg\t%0,%v1,0 ++ vleg\t%0,%1,0 ++ vsteg\t%1,%0,0" ++ [(set_attr "op_type" "RRE,RR,RRE,RRE,RX,RXY,RX,RXY,RI,RRE,RIL,RXY,RIL,RXY,VRR,VRS,VRS,VRX,VRX") + (set_attr "type" "fsimpdf,floaddf,floaddf,floaddf,floaddf,floaddf, +- fstoredf,fstoredf,*,lr,load,store") +- (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec") +- (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")]) ++ fstoredf,fstoredf,*,lr,load,load,store,store,*,*,*,load,store") ++ (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,*,*,*,*,*") ++ (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,z10,*,z10,*,vec,vec,vec,vec,vec")]) + + (define_insn "*mov_64" +- [(set (match_operand:DD_DF 0 "nonimmediate_operand" "=f,f,f,f,R,T,d,d, d,RT") +- (match_operand:DD_DF 1 "general_operand" " G,f,R,T,f,f,G,d,RT, d"))] ++ [(set (match_operand:DD_DF 0 "nonimmediate_operand" "=f,f,f,f,R,T,d,d,d, d,b,RT,v,v,QR") ++ (match_operand:DD_DF 1 "general_operand" " G,f,R,T,f,f,G,d,b,RT,d, d,v,QR,v"))] + "TARGET_ZARCH" + "@ + lzdr\t%0 +@@ -2186,13 +2385,18 @@ + stdy\t%1,%0 + lghi\t%0,0 + lgr\t%0,%1 ++ lgrl\t%0,%1 + lg\t%0,%1 +- stg\t%1,%0" +- [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RRE,RXY,RXY") ++ stgrl\t%1,%0 ++ stg\t%1,%0 ++ vlr\t%v0,%v1 ++ vleg\t%v0,%1,0 ++ vsteg\t%v1,%0,0" ++ [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RRE,RIL,RXY,RIL,RXY,VRR,VRX,VRX") + (set_attr "type" "fsimpdf,fload,fload,fload, +- fstore,fstore,*,lr,load,store") +- (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec") +- (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*")]) ++ fstore,fstore,*,lr,load,load,store,store,*,load,store") ++ (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,*,*,*") ++ (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,z10,*,z10,*,vec,vec,vec")]) + + (define_insn "*mov_31" + [(set (match_operand:DD_DF 0 "nonimmediate_operand" +@@ -2265,28 +2469,38 @@ + + (define_insn "mov" + [(set (match_operand:SD_SF 0 "nonimmediate_operand" +- "=f,f,f,f,R,T,d,d,d,d,R,T") ++ "=f,f,f,f,f,f,R,T,d,d,d,d,d,b,R,T,v,v,v,d,v,QR") + (match_operand:SD_SF 1 "general_operand" +- " G,f,R,T,f,f,G,d,R,T,d,d"))] ++ " G,f,f,R,R,T,f,f,G,d,b,R,T,d,d,d,v,G,d,v,QR,v"))] + "" + "@ + lzer\t%0 ++ lder\t%0,%1 + ler\t%0,%1 ++ lde\t%0,%1 + le\t%0,%1 + ley\t%0,%1 + ste\t%1,%0 + stey\t%1,%0 + lhi\t%0,0 + lr\t%0,%1 ++ lrl\t%0,%1 + l\t%0,%1 + ly\t%0,%1 ++ strl\t%1,%0 + st\t%1,%0 +- sty\t%1,%0" +- [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RR,RX,RXY,RX,RXY") +- (set_attr "type" "fsimpsf,fload,fload,fload, +- fstore,fstore,*,lr,load,load,store,store") +- (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec") +- (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")]) ++ sty\t%1,%0 ++ vlr\t%v0,%v1 ++ vleif\t%v0,0 ++ vlvgf\t%v0,%1,0 ++ vlgvf\t%0,%v1,0 ++ vleg\t%0,%1,0 ++ vsteg\t%1,%0,0" ++ [(set_attr "op_type" "RRE,RRE,RR,RXE,RX,RXY,RX,RXY,RI,RR,RIL,RX,RXY,RIL,RX,RXY,VRR,VRI,VRS,VRS,VRX,VRX") ++ (set_attr "type" "fsimpsf,fsimpsf,fload,fload,fload,fload, ++ fstore,fstore,*,lr,load,load,load,store,store,store,*,*,*,*,load,store") ++ (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,z10_rec,*,*,*,*,*,*") ++ (set_attr "cpu_facility" "z196,vec,*,vec,*,*,*,*,*,*,z10,*,*,z10,*,*,vec,vec,vec,vec,vec,vec")]) + + ; + ; movcc instruction pattern +@@ -2577,6 +2791,22 @@ + ; + + (define_expand "strlen" ++ [(match_operand:P 0 "register_operand" "") ; result ++ (match_operand:BLK 1 "memory_operand" "") ; input string ++ (match_operand:SI 2 "immediate_operand" "") ; search character ++ (match_operand:SI 3 "immediate_operand" "")] ; known alignment ++ "" ++{ ++ if (!TARGET_VX || operands[2] != const0_rtx) ++ emit_insn (gen_strlen_srst (operands[0], operands[1], ++ operands[2], operands[3])); ++ else ++ s390_expand_vec_strlen (operands[0], operands[1], operands[3]); ++ ++ DONE; ++}) ++ ++(define_expand "strlen_srst" + [(set (reg:SI 0) (match_operand:SI 2 "immediate_operand" "")) + (parallel + [(set (match_dup 4) +@@ -2674,8 +2904,16 @@ + (clobber (reg:CC CC_REGNUM))])] + "" + { +- rtx addr1 = gen_reg_rtx (Pmode); +- rtx addr2 = gen_reg_rtx (Pmode); ++ rtx addr1, addr2; ++ ++ if (TARGET_VX && optimize_function_for_speed_p (cfun)) ++ { ++ s390_expand_vec_movstr (operands[0], operands[1], operands[2]); ++ DONE; ++ } ++ ++ addr1 = gen_reg_rtx (Pmode); ++ addr2 = gen_reg_rtx (Pmode); + + emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX)); + emit_move_insn (addr2, force_operand (XEXP (operands[2], 0), NULL_RTX)); +@@ -2886,8 +3124,12 @@ + operands[2] = GEN_INT (S390_TDC_INFINITY); + }) + ++; This extracts CC into a GPR properly shifted. The actual IPM ++; instruction will be issued by reload. The constraint of operand 1 ++; forces reload to use a GPR. So reload will issue a movcc insn for ++; copying CC into a GPR first. + (define_insn_and_split "*cc_to_int" +- [(set (match_operand:SI 0 "register_operand" "=d") ++ [(set (match_operand:SI 0 "nonimmediate_operand" "=d") + (unspec:SI [(match_operand 1 "register_operand" "0")] + UNSPEC_CC_TO_INT))] + "operands != NULL" +@@ -4223,14 +4465,27 @@ + + ; fixuns_trunc(tf|df|sf|td|dd)(di|si)2 instruction patterns. + ++(define_insn "*fixuns_truncdfdi2_z13" ++ [(set (match_operand:DI 0 "register_operand" "=d,v") ++ (unsigned_fix:DI (match_operand:DF 1 "register_operand" "f,v"))) ++ (unspec:DI [(match_operand:DI 2 "immediate_operand" "K,K")] UNSPEC_ROUND) ++ (clobber (reg:CC CC_REGNUM))] ++ "TARGET_VX && TARGET_HARD_FLOAT" ++ "@ ++ clgdbr\t%0,%h2,%1,0 ++ wclgdb\t%v0,%v1,0,%h2" ++ [(set_attr "op_type" "RRF,VRR") ++ (set_attr "type" "ftoi")]) ++ + ; clfebr, clfdbr, clfxbr, clgebr, clgdbr, clgxbr + ; clfdtr, clfxtr, clgdtr, clgxtr + (define_insn "*fixuns_trunc2_z196" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (unsigned_fix:GPR (match_operand:FP 1 "register_operand" "f"))) +- (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND) ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (unsigned_fix:GPR (match_operand:FP 1 "register_operand" "f"))) ++ (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_Z196" ++ "TARGET_Z196 && TARGET_HARD_FLOAT ++ && (!TARGET_VX || mode != DImode || mode != DFmode)" + "clr\t%0,%h2,%1,0" + [(set_attr "op_type" "RRF") + (set_attr "type" "ftoi")]) +@@ -4245,18 +4500,37 @@ + DONE; + }) + ++(define_insn "*fix_truncdfdi2_bfp_z13" ++ [(set (match_operand:DI 0 "register_operand" "=d,v") ++ (fix:DI (match_operand:DF 1 "register_operand" "f,v"))) ++ (unspec:DI [(match_operand:DI 2 "immediate_operand" "K,K")] UNSPEC_ROUND) ++ (clobber (reg:CC CC_REGNUM))] ++ "TARGET_VX && TARGET_HARD_FLOAT" ++ "@ ++ cgdbr\t%0,%h2,%1 ++ wcgdb\t%v0,%v1,0,%h2" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "type" "ftoi")]) ++ + ; cgxbr, cgdbr, cgebr, cfxbr, cfdbr, cfebr +-(define_insn "fix_trunc2_bfp" +- [(set (match_operand:GPR 0 "register_operand" "=d") +- (fix:GPR (match_operand:BFP 1 "register_operand" "f"))) +- (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND) ++(define_insn "*fix_trunc2_bfp" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (fix:GPR (match_operand:BFP 1 "register_operand" "f"))) ++ (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_HARD_FLOAT" ++ "TARGET_HARD_FLOAT ++ && (!TARGET_VX || mode != DImode || mode != DFmode)" + "cbr\t%0,%h2,%1" + [(set_attr "op_type" "RRE") + (set_attr "type" "ftoi")]) + +- ++(define_expand "fix_trunc2_bfp" ++ [(parallel ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (fix:GPR (match_operand:BFP 1 "register_operand" "f"))) ++ (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND) ++ (clobber (reg:CC CC_REGNUM))])] ++ "TARGET_HARD_FLOAT") + ; + ; fix_trunc(td|dd)di2 instruction pattern(s). + ; +@@ -4303,12 +4577,15 @@ + + ; cxgbr, cdgbr, cegbr, cxgtr, cdgtr + (define_insn "floatdi2" +- [(set (match_operand:FP 0 "register_operand" "=f") +- (float:FP (match_operand:DI 1 "register_operand" "d")))] ++ [(set (match_operand:FP 0 "register_operand" "=f,") ++ (float:FP (match_operand:DI 1 "register_operand" "d,")))] + "TARGET_ZARCH && TARGET_HARD_FLOAT" +- "cgr\t%0,%1" +- [(set_attr "op_type" "RRE") +- (set_attr "type" "itof" )]) ++ "@ ++ cgr\t%0,%1 ++ wcdgb\t%v0,%v1,0,0" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "type" "itof" ) ++ (set_attr "cpu_facility" "*,vec")]) + + ; cxfbr, cdfbr, cefbr + (define_insn "floatsi2" +@@ -4332,27 +4609,47 @@ + ; floatuns(si|di)(tf|df|sf|td|dd)2 instruction pattern(s). + ; + ++(define_insn "*floatunsdidf2_z13" ++ [(set (match_operand:DF 0 "register_operand" "=f,v") ++ (unsigned_float:DF (match_operand:DI 1 "register_operand" "d,v")))] ++ "TARGET_VX && TARGET_HARD_FLOAT" ++ "@ ++ cdlgbr\t%0,0,%1,0 ++ wcdlgb\t%v0,%v1,0,0" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "type" "itofdf")]) ++ + ; cxlgbr, cdlgbr, celgbr, cxlgtr, cdlgtr + ; cxlfbr, cdlfbr, celfbr, cxlftr, cdlftr +-(define_insn "floatuns2" +- [(set (match_operand:FP 0 "register_operand" "=f") +- (unsigned_float:FP (match_operand:GPR 1 "register_operand" "d")))] +- "TARGET_Z196 && TARGET_HARD_FLOAT" ++(define_insn "*floatuns2" ++ [(set (match_operand:FP 0 "register_operand" "=f") ++ (unsigned_float:FP (match_operand:GPR 1 "register_operand" "d")))] ++ "TARGET_Z196 && TARGET_HARD_FLOAT ++ && (!TARGET_VX || mode != DFmode || mode != DImode)" + "clr\t%0,0,%1,0" + [(set_attr "op_type" "RRE") +- (set_attr "type" "itof" )]) ++ (set_attr "type" "itof")]) ++ ++(define_expand "floatuns2" ++ [(set (match_operand:FP 0 "register_operand" "") ++ (unsigned_float:FP (match_operand:GPR 1 "register_operand" "")))] ++ "TARGET_Z196 && TARGET_HARD_FLOAT") + + ; + ; truncdfsf2 instruction pattern(s). + ; + + (define_insn "truncdfsf2" +- [(set (match_operand:SF 0 "register_operand" "=f") +- (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] ++ [(set (match_operand:SF 0 "register_operand" "=f,v") ++ (float_truncate:SF (match_operand:DF 1 "register_operand" "f,v")))] + "TARGET_HARD_FLOAT" +- "ledbr\t%0,%1" +- [(set_attr "op_type" "RRE") +- (set_attr "type" "ftruncdf")]) ++ "@ ++ ledbr\t%0,%1 ++ wledb\t%v0,%v1,0,0" ; IEEE inexact exception not suppressed ++ ; According to BFP rounding mode ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "type" "ftruncdf") ++ (set_attr "cpu_facility" "*,vec")]) + + ; + ; trunctf(df|sf)2 instruction pattern(s). +@@ -4393,17 +4690,35 @@ + ; extend(sf|df)(df|tf)2 instruction pattern(s). + ; + ++(define_insn "*extendsfdf2_z13" ++ [(set (match_operand:DF 0 "register_operand" "=f,f,v") ++ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,R,v")))] ++ "TARGET_VX && TARGET_HARD_FLOAT" ++ "@ ++ ldebr\t%0,%1 ++ ldeb\t%0,%1 ++ wldeb\t%v0,%v1" ++ [(set_attr "op_type" "RRE,RXE,VRR") ++ (set_attr "type" "fsimpdf, floaddf,fsimpdf")]) ++ + ; ldebr, ldeb, lxdbr, lxdb, lxebr, lxeb +-(define_insn "extend2" +- [(set (match_operand:BFP 0 "register_operand" "=f,f") ++(define_insn "*extend2" ++ [(set (match_operand:BFP 0 "register_operand" "=f,f") + (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand" "f,R")))] + "TARGET_HARD_FLOAT +- && GET_MODE_SIZE (mode) > GET_MODE_SIZE (mode)" ++ && GET_MODE_SIZE (mode) > GET_MODE_SIZE (mode) ++ && (!TARGET_VX || mode != DFmode || mode != SFmode)" + "@ + lbr\t%0,%1 + lb\t%0,%1" +- [(set_attr "op_type" "RRE,RXE") +- (set_attr "type" "fsimp, fload")]) ++ [(set_attr "op_type" "RRE,RXE") ++ (set_attr "type" "fsimp, fload")]) ++ ++(define_expand "extend2" ++ [(set (match_operand:BFP 0 "register_operand" "") ++ (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand" "")))] ++ "TARGET_HARD_FLOAT ++ && GET_MODE_SIZE (mode) > GET_MODE_SIZE (mode)") + + ; + ; extendddtd2 and extendsddd2 instruction pattern(s). +@@ -4616,10 +4931,29 @@ + ; addti3 instruction pattern(s). + ; + +-(define_insn_and_split "addti3" +- [(set (match_operand:TI 0 "register_operand" "=&d") ++(define_expand "addti3" ++ [(parallel ++ [(set (match_operand:TI 0 "register_operand" "") ++ (plus:TI (match_operand:TI 1 "nonimmediate_operand" "") ++ (match_operand:TI 2 "general_operand" "") ) ) ++ (clobber (reg:CC CC_REGNUM))])] ++ "TARGET_ZARCH" ++{ ++ /* For z13 we have vaq which doesn't set CC. */ ++ if (TARGET_VX) ++ { ++ emit_insn (gen_rtx_SET (VOIDmode, operands[0], ++ gen_rtx_PLUS (TImode, ++ copy_to_mode_reg (TImode, operands[1]), ++ copy_to_mode_reg (TImode, operands[2])))); ++ DONE; ++ } ++}) ++ ++(define_insn_and_split "*addti3" ++ [(set (match_operand:TI 0 "register_operand" "=&d") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +- (match_operand:TI 2 "general_operand" "do") ) ) ++ (match_operand:TI 2 "general_operand" "do") ) ) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ZARCH" + "#" +@@ -4639,7 +4973,9 @@ + operands[5] = operand_subword (operands[2], 0, 0, TImode); + operands[6] = operand_subword (operands[0], 1, 0, TImode); + operands[7] = operand_subword (operands[1], 1, 0, TImode); +- operands[8] = operand_subword (operands[2], 1, 0, TImode);") ++ operands[8] = operand_subword (operands[2], 1, 0, TImode);" ++ [(set_attr "op_type" "*") ++ (set_attr "cpu_facility" "*")]) + + ; + ; adddi3 instruction pattern(s). +@@ -4976,17 +5312,20 @@ + ; + + ; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr ++; FIXME: wfadb does not clobber cc + (define_insn "add3" +- [(set (match_operand:FP 0 "register_operand" "=f, f") +- (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%,0") +- (match_operand:FP 2 "general_operand" " f,"))) ++ [(set (match_operand:FP 0 "register_operand" "=f, f,") ++ (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%, 0,") ++ (match_operand:FP 2 "general_operand" "f,,"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_HARD_FLOAT" + "@ + ar\t%0,%2 +- ab\t%0,%2" +- [(set_attr "op_type" ",RXE") +- (set_attr "type" "fsimp")]) ++ ab\t%0,%2 ++ wfadb\t%v0,%v1,%v2" ++ [(set_attr "op_type" ",RXE,VRR") ++ (set_attr "type" "fsimp") ++ (set_attr "cpu_facility" "*,*,vec")]) + + ; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr + (define_insn "*add3_cc" +@@ -5026,10 +5365,29 @@ + ; subti3 instruction pattern(s). + ; + +-(define_insn_and_split "subti3" +- [(set (match_operand:TI 0 "register_operand" "=&d") +- (minus:TI (match_operand:TI 1 "register_operand" "0") +- (match_operand:TI 2 "general_operand" "do") ) ) ++(define_expand "subti3" ++ [(parallel ++ [(set (match_operand:TI 0 "register_operand" "") ++ (minus:TI (match_operand:TI 1 "register_operand" "") ++ (match_operand:TI 2 "general_operand" "") ) ) ++ (clobber (reg:CC CC_REGNUM))])] ++ "TARGET_ZARCH" ++{ ++ /* For z13 we have vaq which doesn't set CC. */ ++ if (TARGET_VX) ++ { ++ emit_insn (gen_rtx_SET (VOIDmode, operands[0], ++ gen_rtx_MINUS (TImode, ++ operands[1], ++ copy_to_mode_reg (TImode, operands[2])))); ++ DONE; ++ } ++}) ++ ++(define_insn_and_split "*subti3" ++ [(set (match_operand:TI 0 "register_operand" "=&d") ++ (minus:TI (match_operand:TI 1 "register_operand" "0") ++ (match_operand:TI 2 "general_operand" "do") ) ) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ZARCH" + "#" +@@ -5048,7 +5406,9 @@ + operands[5] = operand_subword (operands[2], 0, 0, TImode); + operands[6] = operand_subword (operands[0], 1, 0, TImode); + operands[7] = operand_subword (operands[1], 1, 0, TImode); +- operands[8] = operand_subword (operands[2], 1, 0, TImode);") ++ operands[8] = operand_subword (operands[2], 1, 0, TImode);" ++ [(set_attr "op_type" "*") ++ (set_attr "cpu_facility" "*")]) + + ; + ; subdi3 instruction pattern(s). +@@ -5327,16 +5687,18 @@ + + ; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr + (define_insn "sub3" +- [(set (match_operand:FP 0 "register_operand" "=f, f") +- (minus:FP (match_operand:FP 1 "register_operand" ",0") +- (match_operand:FP 2 "general_operand" "f,"))) ++ [(set (match_operand:FP 0 "register_operand" "=f, f,") ++ (minus:FP (match_operand:FP 1 "register_operand" ", 0,") ++ (match_operand:FP 2 "general_operand" "f,,"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_HARD_FLOAT" + "@ + sr\t%0,%2 +- sb\t%0,%2" +- [(set_attr "op_type" ",RXE") +- (set_attr "type" "fsimp")]) ++ sb\t%0,%2 ++ wfsdb\t%v0,%v1,%v2" ++ [(set_attr "op_type" ",RXE,VRR") ++ (set_attr "type" "fsimp") ++ (set_attr "cpu_facility" "*,*,vec")]) + + ; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr + (define_insn "*sub3_cc" +@@ -5742,41 +6104,47 @@ + + ; mxbr, mdbr, meebr, mxb, mxb, meeb, mdtr, mxtr + (define_insn "mul3" +- [(set (match_operand:FP 0 "register_operand" "=f,f") +- (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%,0") +- (match_operand:FP 2 "general_operand" "f,")))] ++ [(set (match_operand:FP 0 "register_operand" "=f, f,") ++ (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%, 0,") ++ (match_operand:FP 2 "general_operand" "f,,")))] + "TARGET_HARD_FLOAT" + "@ + mr\t%0,%2 +- mb\t%0,%2" +- [(set_attr "op_type" ",RXE") +- (set_attr "type" "fmul")]) ++ mb\t%0,%2 ++ wfmdb\t%v0,%v1,%v2" ++ [(set_attr "op_type" ",RXE,VRR") ++ (set_attr "type" "fmul") ++ (set_attr "cpu_facility" "*,*,vec")]) + + ; madbr, maebr, maxb, madb, maeb + (define_insn "fma4" +- [(set (match_operand:DSF 0 "register_operand" "=f,f") +- (fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f") +- (match_operand:DSF 2 "nonimmediate_operand" "f,R") +- (match_operand:DSF 3 "register_operand" "0,0")))] ++ [(set (match_operand:DSF 0 "register_operand" "=f,f,") ++ (fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f,") ++ (match_operand:DSF 2 "nonimmediate_operand" "f,R,") ++ (match_operand:DSF 3 "register_operand" "0,0,")))] + "TARGET_HARD_FLOAT" + "@ + mabr\t%0,%1,%2 +- mab\t%0,%1,%2" +- [(set_attr "op_type" "RRE,RXE") +- (set_attr "type" "fmadd")]) ++ mab\t%0,%1,%2 ++ wfmadb\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "RRE,RXE,VRR") ++ (set_attr "type" "fmadd") ++ (set_attr "cpu_facility" "*,*,vec")]) + + ; msxbr, msdbr, msebr, msxb, msdb, mseb + (define_insn "fms4" +- [(set (match_operand:DSF 0 "register_operand" "=f,f") +- (fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f") +- (match_operand:DSF 2 "nonimmediate_operand" "f,R") +- (neg:DSF (match_operand:DSF 3 "register_operand" "0,0"))))] ++ [(set (match_operand:DSF 0 "register_operand" "=f,f,") ++ (fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f,") ++ (match_operand:DSF 2 "nonimmediate_operand" "f,R,") ++ (neg:DSF (match_operand:DSF 3 "register_operand" "0,0,"))))] + "TARGET_HARD_FLOAT" + "@ + msbr\t%0,%1,%2 +- msb\t%0,%1,%2" +- [(set_attr "op_type" "RRE,RXE") +- (set_attr "type" "fmadd")]) ++ msb\t%0,%1,%2 ++ wfmsdb\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "RRE,RXE,VRR") ++ (set_attr "type" "fmadd") ++ (set_attr "cpu_facility" "*,*,vec")]) + + ;; + ;;- Divide and modulo instructions. +@@ -6202,15 +6570,17 @@ + + ; dxbr, ddbr, debr, dxb, ddb, deb, ddtr, dxtr + (define_insn "div3" +- [(set (match_operand:FP 0 "register_operand" "=f,f") +- (div:FP (match_operand:FP 1 "register_operand" ",0") +- (match_operand:FP 2 "general_operand" "f,")))] ++ [(set (match_operand:FP 0 "register_operand" "=f, f,") ++ (div:FP (match_operand:FP 1 "register_operand" ", 0,") ++ (match_operand:FP 2 "general_operand" "f,,")))] + "TARGET_HARD_FLOAT" + "@ + dr\t%0,%2 +- db\t%0,%2" +- [(set_attr "op_type" ",RXE") +- (set_attr "type" "fdiv")]) ++ db\t%0,%2 ++ wfddb\t%v0,%v1,%v2" ++ [(set_attr "op_type" ",RXE,VRR") ++ (set_attr "type" "fdiv") ++ (set_attr "cpu_facility" "*,*,vec")]) + + + ;; +@@ -7356,14 +7726,18 @@ + (set_attr "type" "fsimp")]) + + ; lcxbr, lcdbr, lcebr ++; FIXME: wflcdb does not clobber cc + (define_insn "*neg2" +- [(set (match_operand:BFP 0 "register_operand" "=f") +- (neg:BFP (match_operand:BFP 1 "register_operand" "f"))) ++ [(set (match_operand:BFP 0 "register_operand" "=f,") ++ (neg:BFP (match_operand:BFP 1 "register_operand" "f,"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_HARD_FLOAT" +- "lcbr\t%0,%1" +- [(set_attr "op_type" "RRE") +- (set_attr "type" "fsimp")]) ++ "@ ++ lcbr\t%0,%1 ++ wflcdb\t%0,%1" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "cpu_facility" "*,vec") ++ (set_attr "type" "fsimp,*")]) + + + ;; +@@ -7474,14 +7848,18 @@ + (set_attr "type" "fsimp")]) + + ; lpxbr, lpdbr, lpebr ++; FIXME: wflpdb does not clobber cc + (define_insn "*abs2" +- [(set (match_operand:BFP 0 "register_operand" "=f") +- (abs:BFP (match_operand:BFP 1 "register_operand" "f"))) ++ [(set (match_operand:BFP 0 "register_operand" "=f,") ++ (abs:BFP (match_operand:BFP 1 "register_operand" "f,"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_HARD_FLOAT" +- "lpbr\t%0,%1" +- [(set_attr "op_type" "RRE") +- (set_attr "type" "fsimp")]) ++ "@ ++ lpbr\t%0,%1 ++ wflpdb\t%0,%1" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "cpu_facility" "*,vec") ++ (set_attr "type" "fsimp,*")]) + + + ;; +@@ -7585,14 +7963,18 @@ + (set_attr "type" "fsimp")]) + + ; lnxbr, lndbr, lnebr ++; FIXME: wflndb does not clobber cc + (define_insn "*negabs2" +- [(set (match_operand:BFP 0 "register_operand" "=f") +- (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f")))) ++ [(set (match_operand:BFP 0 "register_operand" "=f,") ++ (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f,")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_HARD_FLOAT" +- "lnbr\t%0,%1" +- [(set_attr "op_type" "RRE") +- (set_attr "type" "fsimp")]) ++ "@ ++ lnbr\t%0,%1 ++ wflndb\t%0,%1" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "cpu_facility" "*,vec") ++ (set_attr "type" "fsimp,*")]) + + ;; + ;;- Square root instructions. +@@ -7604,14 +7986,16 @@ + + ; sqxbr, sqdbr, sqebr, sqdb, sqeb + (define_insn "sqrt2" +- [(set (match_operand:BFP 0 "register_operand" "=f,f") +- (sqrt:BFP (match_operand:BFP 1 "general_operand" "f,")))] ++ [(set (match_operand:BFP 0 "register_operand" "=f, f,") ++ (sqrt:BFP (match_operand:BFP 1 "general_operand" "f,,")))] + "TARGET_HARD_FLOAT" + "@ + sqbr\t%0,%1 +- sqb\t%0,%1" +- [(set_attr "op_type" "RRE,RXE") +- (set_attr "type" "fsqrt")]) ++ sqb\t%0,%1 ++ wfsqdb\t%v0,%v1" ++ [(set_attr "op_type" "RRE,RXE,VRR") ++ (set_attr "type" "fsqrt") ++ (set_attr "cpu_facility" "*,*,vec")]) + + + ;; +@@ -10006,6 +10390,35 @@ + DONE; + }) + ++; Clobber VRs since they don't get restored ++(define_insn "tbegin_1_z13" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")] ++ UNSPECV_TBEGIN)) ++ (set (match_operand:BLK 1 "memory_operand" "=Q") ++ (unspec_volatile:BLK [(match_dup 0)] UNSPECV_TBEGIN_TDB)) ++ (clobber (reg:TI 16)) (clobber (reg:TI 38)) ++ (clobber (reg:TI 17)) (clobber (reg:TI 39)) ++ (clobber (reg:TI 18)) (clobber (reg:TI 40)) ++ (clobber (reg:TI 19)) (clobber (reg:TI 41)) ++ (clobber (reg:TI 20)) (clobber (reg:TI 42)) ++ (clobber (reg:TI 21)) (clobber (reg:TI 43)) ++ (clobber (reg:TI 22)) (clobber (reg:TI 44)) ++ (clobber (reg:TI 23)) (clobber (reg:TI 45)) ++ (clobber (reg:TI 24)) (clobber (reg:TI 46)) ++ (clobber (reg:TI 25)) (clobber (reg:TI 47)) ++ (clobber (reg:TI 26)) (clobber (reg:TI 48)) ++ (clobber (reg:TI 27)) (clobber (reg:TI 49)) ++ (clobber (reg:TI 28)) (clobber (reg:TI 50)) ++ (clobber (reg:TI 29)) (clobber (reg:TI 51)) ++ (clobber (reg:TI 30)) (clobber (reg:TI 52)) ++ (clobber (reg:TI 31)) (clobber (reg:TI 53))] ++; CONST_OK_FOR_CONSTRAINT_P does not work with D constraint since D is ++; not supposed to be used for immediates (see genpreds.c). ++ "TARGET_VX && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff" ++ "tbegin\t%1,%x0" ++ [(set_attr "op_type" "SIL")]) ++ + (define_insn "tbegin_1" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")] +@@ -10141,3 +10554,30 @@ + "TARGET_HTM && INTVAL (operands[2]) < 16" + "ppa\t%0,%1,%2" + [(set_attr "op_type" "RRF")]) ++ ++ ++; Set and get floating point control register ++ ++(define_insn "sfpc" ++ [(unspec_volatile [(match_operand:SI 0 "register_operand" "d")] ++ UNSPECV_SFPC)] ++ "TARGET_HARD_FLOAT" ++ "sfpc\t%0") ++ ++(define_insn "efpc" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (unspec_volatile:SI [(const_int 0)] UNSPECV_EFPC))] ++ "TARGET_HARD_FLOAT" ++ "efpc\t%0") ++ ++ ++; Load count to block boundary ++ ++(define_insn "lcbb" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (unspec:SI [(match_operand 1 "address_operand" "ZQZR") ++ (match_operand:SI 2 "immediate_operand" "C")] UNSPEC_LCBB)) ++ (clobber (reg:CC CC_REGNUM))] ++ "TARGET_Z13" ++ "lcbb\t%0,%a1,%b2" ++ [(set_attr "op_type" "VRX")]) +--- gcc/config/s390/s390-modes.def 2013-08-14 13:55:12.000000000 +0200 ++++ gcc/config/s390/s390-modes.def 2016-05-11 17:12:39.000000000 +0200 +@@ -84,6 +84,23 @@ Requested mode -> Destination + CCS, CCU, CCT, CCSR, CCUR -> CCZ + CCA -> CCAP, CCAN + ++Vector comparison modes ++ ++CCVEQ EQ - - NE (VCEQ) ++CCVEQANY EQ EQ - NE (VCEQ) ++ ++CCVH GT - - LE (VCH) ++CCVHANY GT GT - LE (VCH) ++CCVHU GTU - - LEU (VCHL) ++CCVHUANY GTU GTU - LEU (VCHL) ++ ++CCVFH GT - - UNLE (VFCH) ++CCVFHANY GT GT - UNLE (VFCH) ++CCVFHE GE - - UNLT (VFCHE) ++CCVFHEANY GE GE - UNLT (VFCHE) ++ ++ ++ + + *** Comments *** + +@@ -152,6 +169,15 @@ The compare and swap instructions sets t + operands were equal/unequal. The CCZ1 mode ensures the result can be + effectively placed into a register. + ++ ++CCV* ++ ++The variants with and without ANY are generated by the same ++instructions and therefore are holding the same information. However, ++when generating a condition code mask they require checking different ++bits of CC. In that case the variants without ANY represent the ++results for *all* elements. ++ + CCRAW + + The cc mode generated by a non-compare instruction. The condition +@@ -181,3 +207,38 @@ CC_MODE (CCT1); + CC_MODE (CCT2); + CC_MODE (CCT3); + CC_MODE (CCRAW); ++ ++CC_MODE (CCVEQ); ++CC_MODE (CCVEQANY); ++ ++CC_MODE (CCVH); ++CC_MODE (CCVHANY); ++CC_MODE (CCVHU); ++CC_MODE (CCVHUANY); ++ ++CC_MODE (CCVFH); ++CC_MODE (CCVFHANY); ++CC_MODE (CCVFHE); ++CC_MODE (CCVFHEANY); ++ ++ ++/* Vector modes. */ ++ ++VECTOR_MODES (INT, 2); /* V2QI */ ++VECTOR_MODES (INT, 4); /* V4QI V2HI */ ++VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ ++VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ ++ ++VECTOR_MODE (FLOAT, SF, 2); /* V2SF */ ++VECTOR_MODE (FLOAT, SF, 4); /* V4SF */ ++VECTOR_MODE (FLOAT, DF, 2); /* V2DF */ ++ ++VECTOR_MODE (INT, QI, 1); /* V1QI */ ++VECTOR_MODE (INT, HI, 1); /* V1HI */ ++VECTOR_MODE (INT, SI, 1); /* V1SI */ ++VECTOR_MODE (INT, DI, 1); /* V1DI */ ++VECTOR_MODE (INT, TI, 1); /* V1TI */ ++ ++VECTOR_MODE (FLOAT, SF, 1); /* V1SF */ ++VECTOR_MODE (FLOAT, DF, 1); /* V1DF */ ++VECTOR_MODE (FLOAT, TF, 1); /* V1TF */ +--- gcc/config/s390/s390.opt 2015-06-18 16:33:05.000000000 +0200 ++++ gcc/config/s390/s390.opt 2016-05-11 17:33:59.000000000 +0200 +@@ -76,6 +76,9 @@ Enum(processor_type) String(z196) Value( + EnumValue + Enum(processor_type) String(zEC12) Value(PROCESSOR_2827_ZEC12) + ++EnumValue ++Enum(processor_type) String(z13) Value(PROCESSOR_2964_Z13) ++ + mbackchain + Target Report Mask(BACKCHAIN) + Maintain backchain pointer +@@ -118,6 +121,10 @@ mhtm + Target Report Mask(OPT_HTM) + Use hardware transactional execution instructions + ++mvx ++Target Report Mask(OPT_VX) ++Use hardware vector facility instructions and enable the vector ABI ++ + mpacked-stack + Target Report Mask(PACKED_STACK) + Use packed stack layout +@@ -146,6 +153,11 @@ mmvcle + Target Report Mask(MVCLE) + mvcle use + ++mzvector ++Target Report Mask(ZVECTOR) ++Enable the z vector language extension providing the context-sensitive ++vector macro and enable the Altivec-style builtins in vecintrin.h ++ + mwarn-dynamicstack + Target RejectNegative Var(s390_warn_dynamicstack_p) + Warn if a function uses alloca or creates an array with dynamic size +--- gcc/config/s390/s390-opts.h 2013-01-21 16:11:50.000000000 +0100 ++++ gcc/config/s390/s390-opts.h 2016-05-11 15:53:24.000000000 +0200 +@@ -35,6 +35,7 @@ enum processor_type + PROCESSOR_2097_Z10, + PROCESSOR_2817_Z196, + PROCESSOR_2827_ZEC12, ++ PROCESSOR_2964_Z13, + PROCESSOR_max + }; + +--- gcc/config/s390/s390-protos.h 2014-01-14 16:37:04.000000000 +0100 ++++ gcc/config/s390/s390-protos.h 2016-05-11 19:28:17.220349132 +0200 +@@ -41,6 +41,9 @@ extern void s390_set_has_landing_pad_p ( + extern bool s390_hard_regno_mode_ok (unsigned int, enum machine_mode); + extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int); + extern int s390_class_max_nregs (enum reg_class, enum machine_mode); ++extern int s390_cannot_change_mode_class (enum machine_mode, enum machine_mode, ++ enum reg_class); ++extern bool s390_function_arg_vector (enum machine_mode, const_tree); + + #ifdef RTX_CODE + extern int s390_extra_constraint_str (rtx, int, const char *); +@@ -49,6 +52,9 @@ extern int s390_const_double_ok_for_cons + extern int s390_single_part (rtx, enum machine_mode, enum machine_mode, int); + extern unsigned HOST_WIDE_INT s390_extract_part (rtx, enum machine_mode, int); + extern bool s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT, int, int *, int *); ++extern bool s390_const_vec_duplicate_p (rtx); ++extern bool s390_contiguous_bitmask_vector_p (rtx, int *, int *); ++extern bool s390_bytemask_vector_p (rtx, unsigned *); + extern bool s390_split_ok_p (rtx, rtx, enum machine_mode, int); + extern bool s390_overlap_p (rtx, rtx, HOST_WIDE_INT); + extern bool s390_offset_p (rtx, rtx, rtx); +@@ -81,6 +87,8 @@ extern void s390_load_address (rtx, rtx) + extern bool s390_expand_movmem (rtx, rtx, rtx); + extern void s390_expand_setmem (rtx, rtx, rtx); + extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx); ++extern void s390_expand_vec_strlen (rtx, rtx, rtx); ++extern void s390_expand_vec_movstr (rtx, rtx, rtx); + extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx); + extern bool s390_expand_insv (rtx, rtx, rtx, rtx); + extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx, +@@ -88,6 +96,10 @@ extern void s390_expand_cs_hqi (enum mac + extern void s390_expand_atomic (enum machine_mode, enum rtx_code, + rtx, rtx, rtx, bool); + extern void s390_expand_tbegin (rtx, rtx, rtx, bool); ++extern void s390_expand_vec_compare (rtx, enum rtx_code, rtx, rtx); ++extern void s390_expand_vec_compare_cc (rtx, enum rtx_code, rtx, rtx, bool); ++extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx); ++extern void s390_expand_vec_init (rtx, rtx); + extern rtx s390_return_addr_rtx (int, rtx); + extern rtx s390_back_chain_rtx (void); + extern rtx s390_emit_call (rtx, rtx, rtx, rtx); +@@ -113,3 +125,10 @@ extern bool s390_extzv_shift_ok (int, in + extern void s390_asm_output_function_label (FILE *, const char *, tree); + + #endif /* RTX_CODE */ ++ ++/* s390-c.c routines */ ++extern void s390_cpu_cpp_builtins (struct cpp_reader *); ++extern void s390_register_target_pragmas (void); ++ ++/* Routines for s390-c.c */ ++extern bool s390_const_operand_ok (tree, int, int, tree); +--- gcc/config/s390/t-s390 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/t-s390 2016-05-11 17:12:39.000000000 +0200 +@@ -0,0 +1,27 @@ ++# Copyright (C) 2015 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++TM_H += $(srcdir)/config/s390/s390-builtins.def ++TM_H += $(srcdir)/config/s390/s390-builtin-types.def ++ ++s390-c.o: $(srcdir)/config/s390/s390-c.c \ ++ $(srcdir)/config/s390/s390-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \ ++ $(TM_H) $(TREE_H) $(TM_P_H) $(FLAGS_H) $(C_COMMON_H) $(GGC_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) $(CPPLIB_H) $(C_PRAGMA_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/s390/s390-c.c +--- gcc/config/s390/vecintrin.h 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/vecintrin.h 2016-05-11 18:10:53.000000000 +0200 +@@ -0,0 +1,277 @@ ++/* GNU compiler hardware transactional execution intrinsics ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef _VECINTRIN_H ++#define _VECINTRIN_H ++ ++#ifdef __VEC__ ++ ++#define __VFTCI_ZERO 1<<11 ++#define __VFTCI_ZERO_N 1<<10 ++#define __VFTCI_NORMAL 1<<9 ++#define __VFTCI_NORMAL_N 1<<8 ++#define __VFTCI_SUBNORMAL 1<<7 ++#define __VFTCI_SUBNORMAL_N 1<<6 ++#define __VFTCI_INF 1<<5 ++#define __VFTCI_INF_N 1<<4 ++#define __VFTCI_QNAN 1<<3 ++#define __VFTCI_QNAN_N 1<<2 ++#define __VFTCI_SNAN 1<<1 ++#define __VFTCI_SNAN_N 1<<0 ++ ++/* This also accepts a type for its parameter, so it is not enough ++ to #define vec_step to __builtin_vec_step. */ ++#define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0) ++ ++static inline int ++__lcbb(const void *ptr, int bndry) ++{ ++ int code; ++ switch (bndry) ++ { ++ case 64: code = 0; break; ++ case 128: code = 1; break; ++ case 256: code = 2; break; ++ case 512: code = 3; break; ++ case 1024: code = 4; break; ++ case 2048: code = 5; break; ++ case 4096: code = 6; break; ++ default: return 0; ++ } ++ return __builtin_s390_lcbb (ptr, code); ++} ++ ++#define vec_all_nle(X, Y) vec_all_nge ((Y), (X)) ++#define vec_all_nlt(X, Y) vec_all_ngt ((Y), (X)) ++#define vec_any_nle(X, Y) vec_any_nge ((Y), (X)) ++#define vec_any_nlt(X, Y) vec_any_ngt ((Y), (X)) ++#define vec_genmask __builtin_s390_vgbm ++#define vec_genmasks_8 __builtin_s390_vgmb ++#define vec_genmasks_16 __builtin_s390_vgmh ++#define vec_genmasks_32 __builtin_s390_vgmf ++#define vec_genmasks_64 __builtin_s390_vgmg ++#define vec_splat_u8 __builtin_s390_vec_splat_u8 ++#define vec_splat_s8 __builtin_s390_vec_splat_s8 ++#define vec_splat_u16 __builtin_s390_vec_splat_u16 ++#define vec_splat_s16 __builtin_s390_vec_splat_s16 ++#define vec_splat_u32 __builtin_s390_vec_splat_u32 ++#define vec_splat_s32 __builtin_s390_vec_splat_s32 ++#define vec_splat_u64 __builtin_s390_vec_splat_u64 ++#define vec_splat_s64 __builtin_s390_vec_splat_s64 ++#define vec_add_u128 __builtin_s390_vaq ++#define vec_addc_u128 __builtin_s390_vaccq ++#define vec_adde_u128 __builtin_s390_vacq ++#define vec_addec_u128 __builtin_s390_vacccq ++#define vec_checksum __builtin_s390_vcksm ++#define vec_gfmsum_128 __builtin_s390_vgfmg ++#define vec_gfmsum_accum_128 __builtin_s390_vgfmag ++#define vec_sub_u128 __builtin_s390_vsq ++#define vec_subc_u128 __builtin_s390_vscbiq ++#define vec_sube_u128 __builtin_s390_vsbiq ++#define vec_subec_u128 __builtin_s390_vsbcbiq ++#define vec_ceil(X) __builtin_s390_vfidb((X), 4, 6) ++#define vec_roundp(X) __builtin_s390_vfidb((X), 4, 6) ++#define vec_floor(X) __builtin_s390_vfidb((X), 4, 7) ++#define vec_roundm(X) __builtin_s390_vfidb((X), 4, 7) ++#define vec_trunc(X) __builtin_s390_vfidb((X), 4, 5) ++#define vec_roundz(X) __builtin_s390_vfidb((X), 4, 5) ++#define vec_roundc(X) __builtin_s390_vfidb((X), 4, 0) ++#define vec_round(X) __builtin_s390_vfidb((X), 4, 4) ++#define vec_madd __builtin_s390_vfmadb ++#define vec_msub __builtin_s390_vfmsdb ++ ++static inline int ++vec_all_nan (__vector double a) ++{ ++ int cc; ++ __builtin_s390_vftcidb (a, ++ __VFTCI_QNAN ++ | __VFTCI_QNAN_N ++ | __VFTCI_SNAN ++ | __VFTCI_SNAN_N, &cc); ++ return cc == 0 ? 1 : 0; ++} ++ ++static inline int ++vec_all_numeric (__vector double a) ++{ ++ int cc; ++ __builtin_s390_vftcidb (a, ++ __VFTCI_NORMAL ++ | __VFTCI_NORMAL_N ++ | __VFTCI_SUBNORMAL ++ | __VFTCI_SUBNORMAL_N, &cc); ++ return cc == 0 ? 1 : 0; ++} ++ ++static inline int ++vec_any_nan (__vector double a) ++{ ++ int cc; ++ __builtin_s390_vftcidb (a, ++ __VFTCI_QNAN ++ | __VFTCI_QNAN_N ++ | __VFTCI_SNAN ++ | __VFTCI_SNAN_N, &cc); ++ return cc != 3 ? 1 : 0; ++} ++ ++static inline int ++vec_any_numeric (__vector double a) ++{ ++ int cc; ++ __builtin_s390_vftcidb (a, ++ __VFTCI_NORMAL ++ | __VFTCI_NORMAL_N ++ | __VFTCI_SUBNORMAL ++ | __VFTCI_SUBNORMAL_N, &cc); ++ return cc != 3 ? 1 : 0; ++} ++#define vec_gather_element __builtin_s390_vec_gather_element ++#define vec_xld2 __builtin_s390_vec_xld2 ++#define vec_xlw4 __builtin_s390_vec_xlw4 ++#define vec_splats __builtin_s390_vec_splats ++#define vec_insert __builtin_s390_vec_insert ++#define vec_promote __builtin_s390_vec_promote ++#define vec_extract __builtin_s390_vec_extract ++#define vec_insert_and_zero __builtin_s390_vec_insert_and_zero ++#define vec_load_bndry __builtin_s390_vec_load_bndry ++#define vec_load_pair __builtin_s390_vec_load_pair ++#define vec_load_len __builtin_s390_vec_load_len ++#define vec_mergeh __builtin_s390_vec_mergeh ++#define vec_mergel __builtin_s390_vec_mergel ++#define vec_pack __builtin_s390_vec_pack ++#define vec_packs __builtin_s390_vec_packs ++#define vec_packs_cc __builtin_s390_vec_packs_cc ++#define vec_packsu __builtin_s390_vec_packsu ++#define vec_packsu_cc __builtin_s390_vec_packsu_cc ++#define vec_perm __builtin_s390_vec_perm ++#define vec_permi __builtin_s390_vec_permi ++#define vec_splat __builtin_s390_vec_splat ++#define vec_scatter_element __builtin_s390_vec_scatter_element ++#define vec_sel __builtin_s390_vec_sel ++#define vec_extend_s64 __builtin_s390_vec_extend_s64 ++#define vec_xstd2 __builtin_s390_vec_xstd2 ++#define vec_xstw4 __builtin_s390_vec_xstw4 ++#define vec_store_len __builtin_s390_vec_store_len ++#define vec_unpackh __builtin_s390_vec_unpackh ++#define vec_unpackl __builtin_s390_vec_unpackl ++#define vec_addc __builtin_s390_vec_addc ++#define vec_and __builtin_s390_vec_and ++#define vec_andc __builtin_s390_vec_andc ++#define vec_avg __builtin_s390_vec_avg ++#define vec_all_eq __builtin_s390_vec_all_eq ++#define vec_all_ne __builtin_s390_vec_all_ne ++#define vec_all_ge __builtin_s390_vec_all_ge ++#define vec_all_gt __builtin_s390_vec_all_gt ++#define vec_all_le __builtin_s390_vec_all_le ++#define vec_all_lt __builtin_s390_vec_all_lt ++#define vec_any_eq __builtin_s390_vec_any_eq ++#define vec_any_ne __builtin_s390_vec_any_ne ++#define vec_any_ge __builtin_s390_vec_any_ge ++#define vec_any_gt __builtin_s390_vec_any_gt ++#define vec_any_le __builtin_s390_vec_any_le ++#define vec_any_lt __builtin_s390_vec_any_lt ++#define vec_cmpeq __builtin_s390_vec_cmpeq ++#define vec_cmpge __builtin_s390_vec_cmpge ++#define vec_cmpgt __builtin_s390_vec_cmpgt ++#define vec_cmple __builtin_s390_vec_cmple ++#define vec_cmplt __builtin_s390_vec_cmplt ++#define vec_cntlz __builtin_s390_vec_cntlz ++#define vec_cnttz __builtin_s390_vec_cnttz ++#define vec_xor __builtin_s390_vec_xor ++#define vec_gfmsum __builtin_s390_vec_gfmsum ++#define vec_gfmsum_accum __builtin_s390_vec_gfmsum_accum ++#define vec_abs __builtin_s390_vec_abs ++#define vec_max __builtin_s390_vec_max ++#define vec_min __builtin_s390_vec_min ++#define vec_mladd __builtin_s390_vec_mladd ++#define vec_mhadd __builtin_s390_vec_mhadd ++#define vec_meadd __builtin_s390_vec_meadd ++#define vec_moadd __builtin_s390_vec_moadd ++#define vec_mulh __builtin_s390_vec_mulh ++#define vec_mule __builtin_s390_vec_mule ++#define vec_mulo __builtin_s390_vec_mulo ++#define vec_nor __builtin_s390_vec_nor ++#define vec_or __builtin_s390_vec_or ++#define vec_popcnt __builtin_s390_vec_popcnt ++#define vec_rl __builtin_s390_vec_rl ++#define vec_rli __builtin_s390_vec_rli ++#define vec_rl_mask __builtin_s390_vec_rl_mask ++#define vec_sll __builtin_s390_vec_sll ++#define vec_slb __builtin_s390_vec_slb ++#define vec_sld __builtin_s390_vec_sld ++#define vec_sldw __builtin_s390_vec_sldw ++#define vec_sral __builtin_s390_vec_sral ++#define vec_srab __builtin_s390_vec_srab ++#define vec_srl __builtin_s390_vec_srl ++#define vec_srb __builtin_s390_vec_srb ++#define vec_subc __builtin_s390_vec_subc ++#define vec_sum2 __builtin_s390_vec_sum2 ++#define vec_sum_u128 __builtin_s390_vec_sum_u128 ++#define vec_sum4 __builtin_s390_vec_sum4 ++#define vec_test_mask __builtin_s390_vec_test_mask ++#define vec_find_any_eq_idx __builtin_s390_vec_find_any_eq_idx ++#define vec_find_any_ne_idx __builtin_s390_vec_find_any_ne_idx ++#define vec_find_any_eq_or_0_idx __builtin_s390_vec_find_any_eq_or_0_idx ++#define vec_find_any_ne_or_0_idx __builtin_s390_vec_find_any_ne_or_0_idx ++#define vec_find_any_eq __builtin_s390_vec_find_any_eq ++#define vec_find_any_ne __builtin_s390_vec_find_any_ne ++#define vec_find_any_eq_idx_cc __builtin_s390_vec_find_any_eq_idx_cc ++#define vec_find_any_ne_idx_cc __builtin_s390_vec_find_any_ne_idx_cc ++#define vec_find_any_eq_or_0_idx_cc __builtin_s390_vec_find_any_eq_or_0_idx_cc ++#define vec_find_any_ne_or_0_idx_cc __builtin_s390_vec_find_any_ne_or_0_idx_cc ++#define vec_find_any_eq_cc __builtin_s390_vec_find_any_eq_cc ++#define vec_find_any_ne_cc __builtin_s390_vec_find_any_ne_cc ++#define vec_cmpeq_idx __builtin_s390_vec_cmpeq_idx ++#define vec_cmpeq_or_0_idx __builtin_s390_vec_cmpeq_or_0_idx ++#define vec_cmpeq_idx_cc __builtin_s390_vec_cmpeq_idx_cc ++#define vec_cmpeq_or_0_idx_cc __builtin_s390_vec_cmpeq_or_0_idx_cc ++#define vec_cmpne_idx __builtin_s390_vec_cmpne_idx ++#define vec_cmpne_or_0_idx __builtin_s390_vec_cmpne_or_0_idx ++#define vec_cmpne_idx_cc __builtin_s390_vec_cmpne_idx_cc ++#define vec_cmpne_or_0_idx_cc __builtin_s390_vec_cmpne_or_0_idx_cc ++#define vec_cp_until_zero __builtin_s390_vec_cp_until_zero ++#define vec_cp_until_zero_cc __builtin_s390_vec_cp_until_zero_cc ++#define vec_cmprg_idx __builtin_s390_vec_cmprg_idx ++#define vec_cmpnrg_idx __builtin_s390_vec_cmpnrg_idx ++#define vec_cmprg_or_0_idx __builtin_s390_vec_cmprg_or_0_idx ++#define vec_cmpnrg_or_0_idx __builtin_s390_vec_cmpnrg_or_0_idx ++#define vec_cmprg __builtin_s390_vec_cmprg ++#define vec_cmpnrg __builtin_s390_vec_cmpnrg ++#define vec_cmprg_idx_cc __builtin_s390_vec_cmprg_idx_cc ++#define vec_cmpnrg_idx_cc __builtin_s390_vec_cmpnrg_idx_cc ++#define vec_cmprg_or_0_idx_cc __builtin_s390_vec_cmprg_or_0_idx_cc ++#define vec_cmpnrg_or_0_idx_cc __builtin_s390_vec_cmpnrg_or_0_idx_cc ++#define vec_cmprg_cc __builtin_s390_vec_cmprg_cc ++#define vec_cmpnrg_cc __builtin_s390_vec_cmpnrg_cc ++#define vec_all_nge __builtin_s390_vec_all_nge ++#define vec_all_ngt __builtin_s390_vec_all_ngt ++#define vec_any_nge __builtin_s390_vec_any_nge ++#define vec_any_ngt __builtin_s390_vec_any_ngt ++#define vec_ctd __builtin_s390_vec_ctd ++#define vec_ctd_s64 __builtin_s390_vec_ctd_s64 ++#define vec_ctd_u64 __builtin_s390_vec_ctd_u64 ++#define vec_ctsl __builtin_s390_vec_ctsl ++#define vec_ctul __builtin_s390_vec_ctul ++#define vec_ld2f __builtin_s390_vec_ld2f ++#define vec_st2f __builtin_s390_vec_st2f ++#endif /* __VEC__ */ ++#endif /* _VECINTRIN_H */ +--- gcc/config/s390/vector.md 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/vector.md 2016-05-11 18:11:04.000000000 +0200 +@@ -0,0 +1,1229 @@ ++;;- Instruction patterns for the System z vector facility ++;; Copyright (C) 2015 Free Software Foundation, Inc. ++;; Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify it under ++;; the terms of the GNU General Public License as published by the Free ++;; Software Foundation; either version 3, or (at your option) any later ++;; version. ++ ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++;; WARRANTY; without even the implied warranty of MERCHANTABILITY or ++;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++;; for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++; All vector modes supported in a vector register ++(define_mode_iterator V ++ [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF ++ V2SF V4SF V1DF V2DF]) ++(define_mode_iterator VT ++ [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF ++ V2SF V4SF V1DF V2DF V1TF V1TI TI]) ++ ++; All vector modes directly supported by the hardware having full vector reg size ++; V_HW2 is duplicate of V_HW for having two iterators expanding ++; independently e.g. vcond ++(define_mode_iterator V_HW [V16QI V8HI V4SI V2DI V2DF]) ++(define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF]) ++; Including TI for instructions that support it (va, vn, ...) ++(define_mode_iterator VT_HW [V16QI V8HI V4SI V2DI V2DF V1TI TI]) ++ ++; All full size integer vector modes supported in a vector register + TImode ++(define_mode_iterator VIT_HW [V16QI V8HI V4SI V2DI V1TI TI]) ++(define_mode_iterator VI_HW [V16QI V8HI V4SI V2DI]) ++(define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI]) ++(define_mode_iterator VI_HW_HS [V8HI V4SI]) ++(define_mode_iterator VI_HW_QH [V16QI V8HI]) ++ ++; All integer vector modes supported in a vector register + TImode ++(define_mode_iterator VIT [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1TI TI]) ++(define_mode_iterator VI [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI]) ++(define_mode_iterator VI_QHS [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI]) ++ ++(define_mode_iterator V_8 [V1QI]) ++(define_mode_iterator V_16 [V2QI V1HI]) ++(define_mode_iterator V_32 [V4QI V2HI V1SI V1SF]) ++(define_mode_iterator V_64 [V8QI V4HI V2SI V2SF V1DI V1DF]) ++(define_mode_iterator V_128 [V16QI V8HI V4SI V4SF V2DI V2DF V1TI V1TF]) ++ ++; A blank for vector modes and a * for TImode. This is used to hide ++; the TImode expander name in case it is defined already. See addti3 ++; for an example. ++(define_mode_attr ti* [(V1QI "") (V2QI "") (V4QI "") (V8QI "") (V16QI "") ++ (V1HI "") (V2HI "") (V4HI "") (V8HI "") ++ (V1SI "") (V2SI "") (V4SI "") ++ (V1DI "") (V2DI "") ++ (V1TI "*") (TI "*")]) ++ ++; The element type of the vector. ++(define_mode_attr non_vec[(V1QI "QI") (V2QI "QI") (V4QI "QI") (V8QI "QI") (V16QI "QI") ++ (V1HI "HI") (V2HI "HI") (V4HI "HI") (V8HI "HI") ++ (V1SI "SI") (V2SI "SI") (V4SI "SI") ++ (V1DI "DI") (V2DI "DI") ++ (V1TI "TI") ++ (V1SF "SF") (V2SF "SF") (V4SF "SF") ++ (V1DF "DF") (V2DF "DF") ++ (V1TF "TF")]) ++ ++; The instruction suffix ++(define_mode_attr bhfgq[(V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI "b") ++ (V1HI "h") (V2HI "h") (V4HI "h") (V8HI "h") ++ (V1SI "f") (V2SI "f") (V4SI "f") ++ (V1DI "g") (V2DI "g") ++ (V1TI "q") (TI "q") ++ (V1SF "f") (V2SF "f") (V4SF "f") ++ (V1DF "g") (V2DF "g") ++ (V1TF "q")]) ++ ++; This is for vmalhw. It gets an 'w' attached to avoid confusion with ++; multiply and add logical high vmalh. ++(define_mode_attr w [(V1QI "") (V2QI "") (V4QI "") (V8QI "") (V16QI "") ++ (V1HI "w") (V2HI "w") (V4HI "w") (V8HI "w") ++ (V1SI "") (V2SI "") (V4SI "") ++ (V1DI "") (V2DI "")]) ++ ++; Resulting mode of a vector comparison. For floating point modes an ++; integer vector mode with the same element size is picked. ++(define_mode_attr tointvec [(V1QI "V1QI") (V2QI "V2QI") (V4QI "V4QI") (V8QI "V8QI") (V16QI "V16QI") ++ (V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI "V8HI") ++ (V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI") ++ (V1DI "V1DI") (V2DI "V2DI") ++ (V1TI "V1TI") ++ (V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI") ++ (V1DF "V1DI") (V2DF "V2DI") ++ (V1TF "V1TI")]) ++ ++; Vector with doubled element size. ++(define_mode_attr vec_double [(V1QI "V1HI") (V2QI "V1HI") (V4QI "V2HI") (V8QI "V4HI") (V16QI "V8HI") ++ (V1HI "V1SI") (V2HI "V1SI") (V4HI "V2SI") (V8HI "V4SI") ++ (V1SI "V1DI") (V2SI "V1DI") (V4SI "V2DI") ++ (V1DI "V1TI") (V2DI "V1TI") ++ (V1SF "V1DF") (V2SF "V1DF") (V4SF "V2DF")]) ++ ++; Vector with half the element size. ++(define_mode_attr vec_half [(V1HI "V2QI") (V2HI "V4QI") (V4HI "V8QI") (V8HI "V16QI") ++ (V1SI "V2HI") (V2SI "V4HI") (V4SI "V8HI") ++ (V1DI "V2SI") (V2DI "V4SI") ++ (V1TI "V2DI") ++ (V1DF "V2SF") (V2DF "V4SF") ++ (V1TF "V1DF")]) ++ ++; The comparisons not setting CC iterate over the rtx code. ++(define_code_iterator VFCMP_HW_OP [eq gt ge]) ++(define_code_attr asm_fcmp_op [(eq "e") (gt "h") (ge "he")]) ++ ++ ++ ++; Comparison operators on int and fp compares which are directly ++; supported by the HW. ++(define_code_iterator VICMP_HW_OP [eq gt gtu]) ++; For int insn_cmp_op can be used in the insn name as well as in the asm output. ++(define_code_attr insn_cmp_op [(eq "eq") (gt "h") (gtu "hl") (ge "he")]) ++ ++; Flags for vector string instructions (vfae all 4, vfee only ZS and CS, vstrc all 4) ++(define_constants ++ [(VSTRING_FLAG_IN 8) ; invert result ++ (VSTRING_FLAG_RT 4) ; result type ++ (VSTRING_FLAG_ZS 2) ; zero search ++ (VSTRING_FLAG_CS 1)]) ; condition code set ++ ++(include "vx-builtins.md") ++ ++; Full HW vector size moves ++(define_insn "mov" ++ [(set (match_operand:V_128 0 "nonimmediate_operand" "=v, v,QR, v, v, v, v, v,v,d") ++ (match_operand:V_128 1 "general_operand" " v,QR, v,j00,jm1,jyy,jxx,jKK,d,v"))] ++ "TARGET_VX" ++ "@ ++ vlr\t%v0,%v1 ++ vl\t%v0,%1 ++ vst\t%v1,%0 ++ vzero\t%v0 ++ vone\t%v0 ++ vgbm\t%v0,%t1 ++ vgm\t%v0,%s1,%e1 ++ vrepi\t%v0,%h1 ++ vlvgp\t%v0,%1,%N1 ++ #" ++ [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRI,VRI,VRI,VRI,VRR,*")]) ++ ++(define_split ++ [(set (match_operand:V_128 0 "register_operand" "") ++ (match_operand:V_128 1 "register_operand" ""))] ++ "TARGET_VX && GENERAL_REG_P (operands[0]) && VECTOR_REG_P (operands[1])" ++ [(set (match_dup 2) ++ (unspec:DI [(subreg:V2DI (match_dup 1) 0) ++ (const_int 0)] UNSPEC_VEC_EXTRACT)) ++ (set (match_dup 3) ++ (unspec:DI [(subreg:V2DI (match_dup 1) 0) ++ (const_int 1)] UNSPEC_VEC_EXTRACT))] ++{ ++ operands[2] = operand_subword (operands[0], 0, 0, mode); ++ operands[3] = operand_subword (operands[0], 1, 0, mode); ++}) ++ ++; Moves for smaller vector modes. ++ ++; In these patterns only the vlr, vone, and vzero instructions write ++; VR bytes outside the mode. This should be ok since we disallow ++; formerly bigger modes being accessed with smaller modes via ++; subreg. Note: The vone, vzero instructions could easily be replaced ++; with vlei which would only access the bytes belonging to the mode. ++; However, this would probably be slower. ++ ++(define_insn "mov" ++ [(set (match_operand:V_8 0 "nonimmediate_operand" "=v,v,d, v,QR, v, v, v, v,d, Q, S, Q, S, d, d,d,d,d,R,T") ++ (match_operand:V_8 1 "general_operand" " v,d,v,QR, v,j00,jm1,jyy,jxx,d,j00,j00,jm1,jm1,j00,jm1,R,T,b,d,d"))] ++ "" ++ "@ ++ vlr\t%v0,%v1 ++ vlvgb\t%v0,%1,0 ++ vlgvb\t%0,%v1,0 ++ vleb\t%v0,%1,0 ++ vsteb\t%v1,%0,0 ++ vzero\t%v0 ++ vone\t%v0 ++ vgbm\t%v0,%t1 ++ vgm\t%v0,%s1,%e1 ++ lr\t%0,%1 ++ mvi\t%0,0 ++ mviy\t%0,0 ++ mvi\t%0,-1 ++ mviy\t%0,-1 ++ lhi\t%0,0 ++ lhi\t%0,-1 ++ lh\t%0,%1 ++ lhy\t%0,%1 ++ lhrl\t%0,%1 ++ stc\t%1,%0 ++ stcy\t%1,%0" ++ [(set_attr "op_type" "VRR,VRS,VRS,VRX,VRX,VRI,VRI,VRI,VRI,RR,SI,SIY,SI,SIY,RI,RI,RX,RXY,RIL,RX,RXY")]) ++ ++(define_insn "mov" ++ [(set (match_operand:V_16 0 "nonimmediate_operand" "=v,v,d, v,QR, v, v, v, v,d, Q, Q, d, d,d,d,d,R,T,b") ++ (match_operand:V_16 1 "general_operand" " v,d,v,QR, v,j00,jm1,jyy,jxx,d,j00,jm1,j00,jm1,R,T,b,d,d,d"))] ++ "" ++ "@ ++ vlr\t%v0,%v1 ++ vlvgh\t%v0,%1,0 ++ vlgvh\t%0,%v1,0 ++ vleh\t%v0,%1,0 ++ vsteh\t%v1,%0,0 ++ vzero\t%v0 ++ vone\t%v0 ++ vgbm\t%v0,%t1 ++ vgm\t%v0,%s1,%e1 ++ lr\t%0,%1 ++ mvhhi\t%0,0 ++ mvhhi\t%0,-1 ++ lhi\t%0,0 ++ lhi\t%0,-1 ++ lh\t%0,%1 ++ lhy\t%0,%1 ++ lhrl\t%0,%1 ++ sth\t%1,%0 ++ sthy\t%1,%0 ++ sthrl\t%1,%0" ++ [(set_attr "op_type" "VRR,VRS,VRS,VRX,VRX,VRI,VRI,VRI,VRI,RR,SIL,SIL,RI,RI,RX,RXY,RIL,RX,RXY,RIL")]) ++ ++(define_insn "mov" ++ [(set (match_operand:V_32 0 "nonimmediate_operand" "=f,f,f,R,T,v,v,d, v,QR, f, v, v, v, v, Q, Q, d, d,d,d,d,d,R,T,b") ++ (match_operand:V_32 1 "general_operand" " f,R,T,f,f,v,d,v,QR, v,j00,j00,jm1,jyy,jxx,j00,jm1,j00,jm1,b,d,R,T,d,d,d"))] ++ "TARGET_VX" ++ "@ ++ lder\t%v0,%v1 ++ lde\t%0,%1 ++ ley\t%0,%1 ++ ste\t%1,%0 ++ stey\t%1,%0 ++ vlr\t%v0,%v1 ++ vlvgf\t%v0,%1,0 ++ vlgvf\t%0,%v1,0 ++ vlef\t%v0,%1,0 ++ vstef\t%1,%0,0 ++ lzer\t%v0 ++ vzero\t%v0 ++ vone\t%v0 ++ vgbm\t%v0,%t1 ++ vgm\t%v0,%s1,%e1 ++ mvhi\t%0,0 ++ mvhi\t%0,-1 ++ lhi\t%0,0 ++ lhi\t%0,-1 ++ lrl\t%0,%1 ++ lr\t%0,%1 ++ l\t%0,%1 ++ ly\t%0,%1 ++ st\t%1,%0 ++ sty\t%1,%0 ++ strl\t%1,%0" ++ [(set_attr "op_type" "RRE,RXE,RXY,RX,RXY,VRR,VRS,VRS,VRX,VRX,RRE,VRI,VRI,VRI,VRI,SIL,SIL,RI,RI, ++ RIL,RR,RX,RXY,RX,RXY,RIL")]) ++ ++(define_insn "mov" ++ [(set (match_operand:V_64 0 "nonimmediate_operand" ++ "=f,f,f,R,T,v,v,d, v,QR, f, v, v, v, v, Q, Q, d, d,f,d,d,d, d,RT,b") ++ (match_operand:V_64 1 "general_operand" ++ " f,R,T,f,f,v,d,v,QR, v,j00,j00,jm1,jyy,jxx,j00,jm1,j00,jm1,d,f,b,d,RT, d,d"))] ++ "TARGET_ZARCH" ++ "@ ++ ldr\t%0,%1 ++ ld\t%0,%1 ++ ldy\t%0,%1 ++ std\t%1,%0 ++ stdy\t%1,%0 ++ vlr\t%v0,%v1 ++ vlvgg\t%v0,%1,0 ++ vlgvg\t%0,%v1,0 ++ vleg\t%v0,%1,0 ++ vsteg\t%v1,%0,0 ++ lzdr\t%0 ++ vzero\t%v0 ++ vone\t%v0 ++ vgbm\t%v0,%t1 ++ vgm\t%v0,%s1,%e1 ++ mvghi\t%0,0 ++ mvghi\t%0,-1 ++ lghi\t%0,0 ++ lghi\t%0,-1 ++ ldgr\t%0,%1 ++ lgdr\t%0,%1 ++ lgrl\t%0,%1 ++ lgr\t%0,%1 ++ lg\t%0,%1 ++ stg\t%1,%0 ++ stgrl\t%1,%0" ++ [(set_attr "op_type" "RRE,RX,RXY,RX,RXY,VRR,VRS,VRS,VRX,VRX,RRE,VRI,VRI,VRI,VRI, ++ SIL,SIL,RI,RI,RRE,RRE,RIL,RR,RXY,RXY,RIL")]) ++ ++ ++; vec_load_lanes? ++ ++; vec_store_lanes? ++ ++; FIXME: Support also vector mode operands for 1 ++; FIXME: A target memory operand seems to be useful otherwise we end ++; up with vl vlvgg vst. Shouldn't the middle-end be able to handle ++; that itself? ++(define_insn "*vec_set" ++ [(set (match_operand:V 0 "register_operand" "=v, v,v") ++ (unspec:V [(match_operand: 1 "general_operand" "d,QR,K") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y, I,I") ++ (match_operand:V 3 "register_operand" "0, 0,0")] ++ UNSPEC_VEC_SET))] ++ "TARGET_VX" ++ "@ ++ vlvg\t%v0,%1,%Y2 ++ vle\t%v0,%1,%2 ++ vlei\t%v0,%1,%2" ++ [(set_attr "op_type" "VRS,VRX,VRI")]) ++ ++; vec_set is supposed to *modify* an existing vector so operand 0 is ++; duplicated as input operand. ++(define_expand "vec_set" ++ [(set (match_operand:V 0 "register_operand" "") ++ (unspec:V [(match_operand: 1 "general_operand" "") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "") ++ (match_dup 0)] ++ UNSPEC_VEC_SET))] ++ "TARGET_VX") ++ ++; FIXME: Support also vector mode operands for 0 ++; FIXME: This should be (vec_select ..) or something but it does only allow constant selectors :( ++; This is used via RTL standard name as well as for expanding the builtin ++(define_insn "vec_extract" ++ [(set (match_operand: 0 "nonimmediate_operand" "=d,QR") ++ (unspec: [(match_operand:V 1 "register_operand" " v, v") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" " Y, I")] ++ UNSPEC_VEC_EXTRACT))] ++ "TARGET_VX" ++ "@ ++ vlgv\t%0,%v1,%Y2 ++ vste\t%v1,%0,%2" ++ [(set_attr "op_type" "VRS,VRX")]) ++ ++(define_expand "vec_init" ++ [(match_operand:V_HW 0 "register_operand" "") ++ (match_operand:V_HW 1 "nonmemory_operand" "")] ++ "TARGET_VX" ++{ ++ s390_expand_vec_init (operands[0], operands[1]); ++ DONE; ++}) ++ ++; Replicate from vector element ++(define_insn "*vec_splat" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (vec_duplicate:V_HW ++ (vec_select: ++ (match_operand:V_HW 1 "register_operand" "v") ++ (parallel ++ [(match_operand:QI 2 "const_mask_operand" "C")]))))] ++ "TARGET_VX && UINTVAL (operands[2]) < GET_MODE_NUNITS (mode)" ++ "vrep\t%v0,%v1,%2" ++ [(set_attr "op_type" "VRI")]) ++ ++(define_insn "*vec_splats" ++ [(set (match_operand:V_HW 0 "register_operand" "=v,v,v,v") ++ (vec_duplicate:V_HW (match_operand: 1 "general_operand" "QR,K,v,d")))] ++ "TARGET_VX" ++ "@ ++ vlrep\t%v0,%1 ++ vrepi\t%v0,%h1 ++ vrep\t%v0,%v1,0 ++ #" ++ [(set_attr "op_type" "VRX,VRI,VRI,*")]) ++ ++; vec_splats is supposed to replicate op1 into all elements of op0 ++; This splitter first sets the rightmost element of op0 to op1 and ++; then does a vec_splat to replicate that element into all other ++; elements. ++(define_split ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (vec_duplicate:V_HW (match_operand: 1 "register_operand" "")))] ++ "TARGET_VX && GENERAL_REG_P (operands[1])" ++ [(set (match_dup 0) ++ (unspec:V_HW [(match_dup 1) (match_dup 2) (match_dup 0)] UNSPEC_VEC_SET)) ++ (set (match_dup 0) ++ (vec_duplicate:V_HW ++ (vec_select: ++ (match_dup 0) (parallel [(match_dup 2)]))))] ++{ ++ operands[2] = GEN_INT (GET_MODE_NUNITS (mode) - 1); ++}) ++ ++(define_expand "vcond" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (if_then_else:V_HW ++ (match_operator 3 "comparison_operator" ++ [(match_operand:V_HW2 4 "register_operand" "") ++ (match_operand:V_HW2 5 "register_operand" "")]) ++ (match_operand:V_HW 1 "nonmemory_operand" "") ++ (match_operand:V_HW 2 "nonmemory_operand" "")))] ++ "TARGET_VX && GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode)" ++{ ++ s390_expand_vcond (operands[0], operands[1], operands[2], ++ GET_CODE (operands[3]), operands[4], operands[5]); ++ DONE; ++}) ++ ++(define_expand "vcondu" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (if_then_else:V_HW ++ (match_operator 3 "comparison_operator" ++ [(match_operand:V_HW2 4 "register_operand" "") ++ (match_operand:V_HW2 5 "register_operand" "")]) ++ (match_operand:V_HW 1 "nonmemory_operand" "") ++ (match_operand:V_HW 2 "nonmemory_operand" "")))] ++ "TARGET_VX && GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode)" ++{ ++ s390_expand_vcond (operands[0], operands[1], operands[2], ++ GET_CODE (operands[3]), operands[4], operands[5]); ++ DONE; ++}) ++ ++; We only have HW support for byte vectors. The middle-end is ++; supposed to lower the mode if required. ++(define_insn "vec_permv16qi" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_PERM))] ++ "TARGET_VX" ++ "vperm\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_perm_const for V2DI using vpdi? ++ ++;; ++;; Vector integer arithmetic instructions ++;; ++ ++; vab, vah, vaf, vag, vaq ++ ++; We use nonimmediate_operand instead of register_operand since it is ++; better to have the reloads into VRs instead of splitting the ++; operation into two DImode ADDs. ++(define_insn "add3" ++ [(set (match_operand:VIT 0 "nonimmediate_operand" "=v") ++ (plus:VIT (match_operand:VIT 1 "nonimmediate_operand" "%v") ++ (match_operand:VIT 2 "general_operand" "v")))] ++ "TARGET_VX" ++ "va\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vsb, vsh, vsf, vsg, vsq ++(define_insn "sub3" ++ [(set (match_operand:VIT 0 "nonimmediate_operand" "=v") ++ (minus:VIT (match_operand:VIT 1 "nonimmediate_operand" "v") ++ (match_operand:VIT 2 "general_operand" "v")))] ++ "TARGET_VX" ++ "vs\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmlb, vmlhw, vmlf ++(define_insn "mul3" ++ [(set (match_operand:VI_QHS 0 "register_operand" "=v") ++ (mult:VI_QHS (match_operand:VI_QHS 1 "register_operand" "%v") ++ (match_operand:VI_QHS 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vml\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vlcb, vlch, vlcf, vlcg ++(define_insn "neg2" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (neg:VI (match_operand:VI 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vlc\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; vlpb, vlph, vlpf, vlpg ++(define_insn "abs2" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (abs:VI (match_operand:VI 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vlp\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector sum across ++ ++; Sum across DImode parts of the 1st operand and add the rightmost ++; element of 2nd operand ++; vsumgh, vsumgf ++(define_insn "*vec_sum2" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_operand:VI_HW_HS 1 "register_operand" "v") ++ (match_operand:VI_HW_HS 2 "register_operand" "v")] ++ UNSPEC_VEC_VSUMG))] ++ "TARGET_VX" ++ "vsumg\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vsumb, vsumh ++(define_insn "*vec_sum4" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand" "v") ++ (match_operand:VI_HW_QH 2 "register_operand" "v")] ++ UNSPEC_VEC_VSUM))] ++ "TARGET_VX" ++ "vsum\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++;; ++;; Vector bit instructions (int + fp) ++;; ++ ++; Vector and ++ ++(define_insn "and3" ++ [(set (match_operand:VT 0 "register_operand" "=v") ++ (and:VT (match_operand:VT 1 "register_operand" "%v") ++ (match_operand:VT 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vn\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector or ++ ++(define_insn "ior3" ++ [(set (match_operand:VT 0 "register_operand" "=v") ++ (ior:VT (match_operand:VT 1 "register_operand" "%v") ++ (match_operand:VT 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vo\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector xor ++ ++(define_insn "xor3" ++ [(set (match_operand:VT 0 "register_operand" "=v") ++ (xor:VT (match_operand:VT 1 "register_operand" "%v") ++ (match_operand:VT 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vx\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Bitwise inversion of a vector - used for vec_cmpne ++(define_insn "*not" ++ [(set (match_operand:VT 0 "register_operand" "=v") ++ (not:VT (match_operand:VT 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vnot\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; Vector population count ++ ++(define_insn "popcountv16qi2" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] ++ UNSPEC_POPCNT))] ++ "TARGET_VX" ++ "vpopct\t%v0,%v1,0" ++ [(set_attr "op_type" "VRR")]) ++ ++; vpopct only counts bits in byte elements. Bigger element sizes need ++; to be emulated. Word and doubleword elements can use the sum across ++; instructions. For halfword sized elements we do a shift of a copy ++; of the result, add it to the result and extend it to halfword ++; element size (unpack). ++ ++(define_expand "popcountv8hi2" ++ [(set (match_dup 2) ++ (unspec:V16QI [(subreg:V16QI (match_operand:V8HI 1 "register_operand" "v") 0)] ++ UNSPEC_POPCNT)) ++ ; Make a copy of the result ++ (set (match_dup 3) (match_dup 2)) ++ ; Generate the shift count operand in a VR (8->byte 7) ++ (set (match_dup 4) (match_dup 5)) ++ (set (match_dup 4) (unspec:V16QI [(const_int 8) ++ (const_int 7) ++ (match_dup 4)] UNSPEC_VEC_SET)) ++ ; Vector shift right logical by one byte ++ (set (match_dup 3) ++ (unspec:V16QI [(match_dup 3) (match_dup 4)] UNSPEC_VEC_SRLB)) ++ ; Add the shifted and the original result ++ (set (match_dup 2) ++ (plus:V16QI (match_dup 2) (match_dup 3))) ++ ; Generate mask for the odd numbered byte elements ++ (set (match_dup 3) ++ (const_vector:V16QI [(const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255)])) ++ ; Zero out the even indexed bytes ++ (set (match_operand:V8HI 0 "register_operand" "=v") ++ (and:V8HI (subreg:V8HI (match_dup 2) 0) ++ (subreg:V8HI (match_dup 3) 0))) ++] ++ "TARGET_VX" ++{ ++ operands[2] = gen_reg_rtx (V16QImode); ++ operands[3] = gen_reg_rtx (V16QImode); ++ operands[4] = gen_reg_rtx (V16QImode); ++ operands[5] = CONST0_RTX (V16QImode); ++}) ++ ++(define_expand "popcountv4si2" ++ [(set (match_dup 2) ++ (unspec:V16QI [(subreg:V16QI (match_operand:V4SI 1 "register_operand" "v") 0)] ++ UNSPEC_POPCNT)) ++ (set (match_operand:V4SI 0 "register_operand" "=v") ++ (unspec:V4SI [(match_dup 2) (match_dup 3)] ++ UNSPEC_VEC_VSUM))] ++ "TARGET_VX" ++{ ++ operands[2] = gen_reg_rtx (V16QImode); ++ operands[3] = force_reg (V16QImode, CONST0_RTX (V16QImode)); ++}) ++ ++(define_expand "popcountv2di2" ++ [(set (match_dup 2) ++ (unspec:V16QI [(subreg:V16QI (match_operand:V2DI 1 "register_operand" "v") 0)] ++ UNSPEC_POPCNT)) ++ (set (match_dup 3) ++ (unspec:V4SI [(match_dup 2) (match_dup 4)] ++ UNSPEC_VEC_VSUM)) ++ (set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_dup 3) (match_dup 5)] ++ UNSPEC_VEC_VSUMG))] ++ "TARGET_VX" ++{ ++ operands[2] = gen_reg_rtx (V16QImode); ++ operands[3] = gen_reg_rtx (V4SImode); ++ operands[4] = force_reg (V16QImode, CONST0_RTX (V16QImode)); ++ operands[5] = force_reg (V4SImode, CONST0_RTX (V4SImode)); ++}) ++ ++; Count leading zeros ++(define_insn "clz2" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (clz:V (match_operand:V 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vclz\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; Count trailing zeros ++(define_insn "ctz2" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (ctz:V (match_operand:V 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vctz\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector rotate instructions ++ ++; Each vector element rotated by a scalar ++; verllb, verllh, verllf, verllg ++(define_insn "rotl3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (rotate:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))] ++ "TARGET_VX" ++ "verll\t%v0,%v1,%Y2" ++ [(set_attr "op_type" "VRS")]) ++ ++; Each vector element rotated by the corresponding vector element ++; verllvb, verllvh, verllvf, verllvg ++(define_insn "vrotl3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (rotate:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "verllv\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Shift each element by scalar value ++ ++; veslb, veslh, veslf, veslg ++(define_insn "ashl3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (ashift:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))] ++ "TARGET_VX" ++ "vesl\t%v0,%v1,%Y2" ++ [(set_attr "op_type" "VRS")]) ++ ++; vesrab, vesrah, vesraf, vesrag ++(define_insn "ashr3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (ashiftrt:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))] ++ "TARGET_VX" ++ "vesra\t%v0,%v1,%Y2" ++ [(set_attr "op_type" "VRS")]) ++ ++; vesrlb, vesrlh, vesrlf, vesrlg ++(define_insn "lshr3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (lshiftrt:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))] ++ "TARGET_VX" ++ "vesrl\t%v0,%v1,%Y2" ++ [(set_attr "op_type" "VRS")]) ++ ++ ++; Shift each element by corresponding vector element ++ ++; veslvb, veslvh, veslvf, veslvg ++(define_insn "vashl3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (ashift:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "veslv\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vesravb, vesravh, vesravf, vesravg ++(define_insn "vashr3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (ashiftrt:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vesrav\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vesrlvb, vesrlvh, vesrlvf, vesrlvg ++(define_insn "vlshr3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (lshiftrt:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vesrlv\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; Vector shift right logical by byte ++ ++; Pattern used by e.g. popcount ++(define_insn "*vec_srb" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand: 2 "register_operand" "v")] ++ UNSPEC_VEC_SRLB))] ++ "TARGET_VX" ++ "vsrlb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; vmnb, vmnh, vmnf, vmng ++(define_insn "smin3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (smin:VI (match_operand:VI 1 "register_operand" "%v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vmn\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmxb, vmxh, vmxf, vmxg ++(define_insn "smax3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (smax:VI (match_operand:VI 1 "register_operand" "%v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vmx\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmnlb, vmnlh, vmnlf, vmnlg ++(define_insn "umin3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (umin:VI (match_operand:VI 1 "register_operand" "%v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vmnl\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmxlb, vmxlh, vmxlf, vmxlg ++(define_insn "umax3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (umax:VI (match_operand:VI 1 "register_operand" "%v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vmxl\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmeb, vmeh, vmef ++(define_insn "vec_widen_smult_even_" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_QHS 1 "register_operand" "%v") ++ (match_operand:VI_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SMULT_EVEN))] ++ "TARGET_VX" ++ "vme\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmleb, vmleh, vmlef ++(define_insn "vec_widen_umult_even_" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_QHS 1 "register_operand" "%v") ++ (match_operand:VI_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_UMULT_EVEN))] ++ "TARGET_VX" ++ "vmle\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmob, vmoh, vmof ++(define_insn "vec_widen_smult_odd_" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_QHS 1 "register_operand" "%v") ++ (match_operand:VI_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SMULT_ODD))] ++ "TARGET_VX" ++ "vmo\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmlob, vmloh, vmlof ++(define_insn "vec_widen_umult_odd_" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_QHS 1 "register_operand" "%v") ++ (match_operand:VI_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_UMULT_ODD))] ++ "TARGET_VX" ++ "vmlo\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_widen_umult_hi ++; vec_widen_umult_lo ++; vec_widen_smult_hi ++; vec_widen_smult_lo ++ ++; vec_widen_ushiftl_hi ++; vec_widen_ushiftl_lo ++; vec_widen_sshiftl_hi ++; vec_widen_sshiftl_lo ++ ++;; ++;; Vector floating point arithmetic instructions ++;; ++ ++(define_insn "addv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (plus:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfadb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "subv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (minus:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfsdb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "mulv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (mult:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfmdb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "divv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (div:V2DF (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfddb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "sqrtv2df2" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfsqdb\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "fmav2df4" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (fma:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v") ++ (match_operand:V2DF 3 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfmadb\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "fmsv2df4" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (fma:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v") ++ (neg:V2DF (match_operand:V2DF 3 "register_operand" "v"))))] ++ "TARGET_VX" ++ "vfmsdb\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "negv2df2" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (neg:V2DF (match_operand:V2DF 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vflcdb\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "absv2df2" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (abs:V2DF (match_operand:V2DF 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vflpdb\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*negabsv2df2" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (neg:V2DF (abs:V2DF (match_operand:V2DF 1 "register_operand" "v"))))] ++ "TARGET_VX" ++ "vflndb\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; Emulate with compare + select ++(define_insn_and_split "smaxv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (smax:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "#" ++ "" ++ [(set (match_dup 3) ++ (gt:V2DI (match_dup 1) (match_dup 2))) ++ (set (match_dup 0) ++ (if_then_else:V2DF ++ (eq (match_dup 3) (match_dup 4)) ++ (match_dup 2) ++ (match_dup 1)))] ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++ operands[4] = CONST0_RTX (V2DImode); ++}) ++ ++; Emulate with compare + select ++(define_insn_and_split "sminv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (smin:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "#" ++ "" ++ [(set (match_dup 3) ++ (gt:V2DI (match_dup 1) (match_dup 2))) ++ (set (match_dup 0) ++ (if_then_else:V2DF ++ (eq (match_dup 3) (match_dup 4)) ++ (match_dup 1) ++ (match_dup 2)))] ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++ operands[4] = CONST0_RTX (V2DImode); ++}) ++ ++ ++;; ++;; Integer compares ++;; ++ ++(define_insn "*vec_cmp_nocc" ++ [(set (match_operand:VI 2 "register_operand" "=v") ++ (VICMP_HW_OP:VI (match_operand:VI 0 "register_operand" "v") ++ (match_operand:VI 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vc\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++;; ++;; Floating point compares ++;; ++ ++; EQ, GT, GE ++(define_insn "*vec_cmpv2df_nocc" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (VFCMP_HW_OP:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfcdb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; Expanders for not directly supported comparisons ++ ++; UNEQ a u== b -> !(a > b | b > a) ++(define_expand "vec_cmpuneqv2df" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (gt:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_dup 3) ++ (gt:V2DI (match_dup 2) (match_dup 1))) ++ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3))) ++ (set (match_dup 0) (not:V2DI (match_dup 0)))] ++ "TARGET_VX" ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++}) ++ ++; LTGT a <> b -> a > b | b > a ++(define_expand "vec_cmpltgtv2df" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (gt:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1))) ++ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))] ++ "TARGET_VX" ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++}) ++ ++; ORDERED (a, b): a >= b | b > a ++(define_expand "vec_orderedv2df" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (ge:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1))) ++ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))] ++ "TARGET_VX" ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++}) ++ ++; UNORDERED (a, b): !ORDERED (a, b) ++(define_expand "vec_unorderedv2df" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (ge:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1))) ++ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3))) ++ (set (match_dup 0) (not:V2DI (match_dup 0)))] ++ "TARGET_VX" ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++}) ++ ++(define_insn "*vec_load_pairv2di" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (vec_concat:V2DI (match_operand:DI 1 "register_operand" "d") ++ (match_operand:DI 2 "register_operand" "d")))] ++ "TARGET_VX" ++ "vlvgp\t%v0,%1,%2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vllv16qi" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:SI 1 "register_operand" "d") ++ (match_operand:BLK 2 "memory_operand" "Q")] ++ UNSPEC_VEC_LOAD_LEN))] ++ "TARGET_VX" ++ "vll\t%v0,%1,%2" ++ [(set_attr "op_type" "VRS")]) ++ ++; vfenebs, vfenehs, vfenefs ++; vfenezbs, vfenezhs, vfenezfs ++(define_insn "vec_vfenes" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_VFENE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_VEC_VFENECC))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[3]); ++ ++ gcc_assert (!(flags & ~(VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); ++ flags &= ~VSTRING_FLAG_CS; ++ ++ if (flags == VSTRING_FLAG_ZS) ++ return "vfenezs\t%v0,%v1,%v2"; ++ return "vfenes\t%v0,%v1,%v2"; ++} ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector select ++ ++; The following splitters simplify vec_sel for constant 0 or -1 ++; selection sources. This is required to generate efficient code for ++; vcond. ++ ++; a = b == c; ++(define_split ++ [(set (match_operand:V 0 "register_operand" "") ++ (if_then_else:V ++ (eq (match_operand: 3 "register_operand" "") ++ (match_operand:V 4 "const0_operand" "")) ++ (match_operand:V 1 "const0_operand" "") ++ (match_operand:V 2 "all_ones_operand" "")))] ++ "TARGET_VX" ++ [(set (match_dup 0) (match_dup 3))] ++{ ++ PUT_MODE (operands[3], mode); ++}) ++ ++; a = ~(b == c) ++(define_split ++ [(set (match_operand:V 0 "register_operand" "") ++ (if_then_else:V ++ (eq (match_operand: 3 "register_operand" "") ++ (match_operand:V 4 "const0_operand" "")) ++ (match_operand:V 1 "all_ones_operand" "") ++ (match_operand:V 2 "const0_operand" "")))] ++ "TARGET_VX" ++ [(set (match_dup 0) (not:V (match_dup 3)))] ++{ ++ PUT_MODE (operands[3], mode); ++}) ++ ++; a = b != c ++(define_split ++ [(set (match_operand:V 0 "register_operand" "") ++ (if_then_else:V ++ (ne (match_operand: 3 "register_operand" "") ++ (match_operand:V 4 "const0_operand" "")) ++ (match_operand:V 1 "all_ones_operand" "") ++ (match_operand:V 2 "const0_operand" "")))] ++ "TARGET_VX" ++ [(set (match_dup 0) (match_dup 3))] ++{ ++ PUT_MODE (operands[3], mode); ++}) ++ ++; a = ~(b != c) ++(define_split ++ [(set (match_operand:V 0 "register_operand" "") ++ (if_then_else:V ++ (ne (match_operand: 3 "register_operand" "") ++ (match_operand:V 4 "const0_operand" "")) ++ (match_operand:V 1 "const0_operand" "") ++ (match_operand:V 2 "all_ones_operand" "")))] ++ "TARGET_VX" ++ [(set (match_dup 0) (not:V (match_dup 3)))] ++{ ++ PUT_MODE (operands[3], mode); ++}) ++ ++; op0 = op3 == 0 ? op1 : op2 ++(define_insn "*vec_sel0" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (if_then_else:V ++ (eq (match_operand: 3 "register_operand" "v") ++ (match_operand: 4 "const0_operand" "")) ++ (match_operand:V 1 "register_operand" "v") ++ (match_operand:V 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vsel\t%v0,%2,%1,%3" ++ [(set_attr "op_type" "VRR")]) ++ ++; op0 = !op3 == 0 ? op1 : op2 ++(define_insn "*vec_sel0" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (if_then_else:V ++ (eq (not: (match_operand: 3 "register_operand" "v")) ++ (match_operand: 4 "const0_operand" "")) ++ (match_operand:V 1 "register_operand" "v") ++ (match_operand:V 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vsel\t%v0,%1,%2,%3" ++ [(set_attr "op_type" "VRR")]) ++ ++; op0 = op3 == -1 ? op1 : op2 ++(define_insn "*vec_sel1" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (if_then_else:V ++ (eq (match_operand: 3 "register_operand" "v") ++ (match_operand: 4 "all_ones_operand" "")) ++ (match_operand:V 1 "register_operand" "v") ++ (match_operand:V 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vsel\t%v0,%1,%2,%3" ++ [(set_attr "op_type" "VRR")]) ++ ++; op0 = !op3 == -1 ? op1 : op2 ++(define_insn "*vec_sel1" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (if_then_else:V ++ (eq (not: (match_operand: 3 "register_operand" "v")) ++ (match_operand: 4 "all_ones_operand" "")) ++ (match_operand:V 1 "register_operand" "v") ++ (match_operand:V 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vsel\t%v0,%2,%1,%3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++ ++; reduc_smin ++; reduc_smax ++; reduc_umin ++; reduc_umax ++ ++; vec_shl vrep + vsl ++; vec_shr ++ ++; vec_pack_trunc ++; vec_pack_ssat ++; vec_pack_usat ++; vec_pack_sfix_trunc ++; vec_pack_ufix_trunc ++; vec_unpacks_hi ++; vec_unpacks_low ++; vec_unpacku_hi ++; vec_unpacku_low ++; vec_unpacks_float_hi ++; vec_unpacks_float_lo ++; vec_unpacku_float_hi ++; vec_unpacku_float_lo +--- gcc/config/s390/vx-builtins.md 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/vx-builtins.md 2016-05-11 19:46:05.504890170 +0200 +@@ -0,0 +1,2081 @@ ++;;- Instruction patterns for the System z vector facility builtins. ++;; Copyright (C) 2015 Free Software Foundation, Inc. ++;; Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify it under ++;; the terms of the GNU General Public License as published by the Free ++;; Software Foundation; either version 3, or (at your option) any later ++;; version. ++ ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++;; WARRANTY; without even the implied warranty of MERCHANTABILITY or ++;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++;; for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++; The patterns in this file are enabled with -mzvector ++ ++(define_mode_iterator V_HW_64 [V2DI V2DF]) ++(define_mode_iterator V_HW_32_64 [V4SI V2DI V2DF]) ++(define_mode_iterator VI_HW_SD [V4SI V2DI]) ++(define_mode_iterator V_HW_HSD [V8HI V4SI V2DI V2DF]) ++(define_mode_iterator VI_HW_HSD [V8HI V4SI V2DI]) ++ ++; The element type of the vector with floating point modes translated ++; to int modes of the same size. ++(define_mode_attr non_vec_int[(V1QI "QI") (V2QI "QI") (V4QI "QI") (V8QI "QI") (V16QI "QI") ++ (V1HI "HI") (V2HI "HI") (V4HI "HI") (V8HI "HI") ++ (V1SI "SI") (V2SI "SI") (V4SI "SI") ++ (V1DI "DI") (V2DI "DI") ++ (V1SF "SI") (V2SF "SI") (V4SF "SI") ++ (V1DF "DI") (V2DF "DI")]) ++ ++; Condition code modes generated by int comparisons ++(define_mode_iterator VICMP [CCVEQ CCVH CCVHU]) ++ ++; Comparisons supported by the vec_cmp* builtins ++(define_code_iterator intcmp [eq gt gtu ge geu lt ltu le leu]) ++(define_code_iterator fpcmp [eq gt ge lt le]) ++ ++; Comparisons supported by the vec_all/any* builtins ++(define_code_iterator intcmpcc [eq ne gt ge lt le gtu geu ltu leu]) ++(define_code_iterator fpcmpcc [eq ne gt ge unle unlt lt le]) ++ ++; Flags for vector string instructions (vfae all 4, vfee only ZS and CS, vstrc all 4) ++(define_constants ++ [(VSTRING_FLAG_IN 8) ; invert result ++ (VSTRING_FLAG_RT 4) ; result type ++ (VSTRING_FLAG_ZS 2) ; zero search ++ (VSTRING_FLAG_CS 1)]) ; condition code set ++ ++; Rounding modes as being used for e.g. VFI ++(define_constants ++ [(VEC_RND_CURRENT 0) ++ (VEC_RND_NEAREST_AWAY_FROM_ZERO 1) ++ (VEC_RND_SHORT_PREC 3) ++ (VEC_RND_NEAREST_TO_EVEN 4) ++ (VEC_RND_TO_ZERO 5) ++ (VEC_RND_TO_INF 6) ++ (VEC_RND_TO_MINF 7)]) ++ ++ ++; Vector gather element ++ ++(define_insn "vec_gather_element" ++ [(set (match_operand:V_HW_32_64 0 "register_operand" "=v") ++ (unspec:V_HW_32_64 [(match_operand:V_HW_32_64 1 "register_operand" "0") ++ (match_operand: 2 "register_operand" "v") ++ (match_operand:BLK 3 "memory_operand" "QR") ++ (match_operand:QI 4 "const_mask_operand" "C")] ++ UNSPEC_VEC_GATHER))] ++ "TARGET_VX && UINTVAL (operands[4]) < GET_MODE_NUNITS (mode)" ++ "vge\t%0,%O3(%v2,%R3),%b4" ++ [(set_attr "op_type" "VRV")]) ++ ++(define_expand "vec_genmask" ++ [(match_operand:VI_HW 0 "register_operand" "=v") ++ (match_operand:QI 1 "const_int_operand" "C") ++ (match_operand:QI 2 "const_int_operand" "C")] ++ "TARGET_VX" ++{ ++ int nunits = GET_MODE_NUNITS (mode); ++ int bitlen = GET_MODE_UNIT_BITSIZE (mode); ++ /* To bit little endian style. */ ++ int end = bitlen - 1 - INTVAL (operands[1]); ++ int start = bitlen - 1 - INTVAL (operands[2]); ++ rtx const_vec[16]; ++ int i; ++ unsigned HOST_WIDE_INT mask; ++ bool swapped_p = false; ++ ++ if (start > end) ++ { ++ i = start - 1; start = end + 1; end = i; ++ swapped_p = true; ++ } ++ if (end == 63) ++ mask = (unsigned HOST_WIDE_INT) -1; ++ else ++ mask = ((unsigned HOST_WIDE_INT) 1 << (end + 1)) - 1; ++ ++ mask &= ~(((unsigned HOST_WIDE_INT) 1 << start) - 1); ++ ++ if (swapped_p) ++ mask = ~mask; ++ ++ for (i = 0; i < nunits; i++) ++ const_vec[i] = GEN_INT (trunc_int_for_mode (mask, ++ GET_MODE_INNER (mode))); ++ ++ emit_insn (gen_rtx_SET (VOIDmode, operands[0], ++ gen_rtx_CONST_VECTOR (mode, ++ gen_rtvec_v (nunits, const_vec)))); ++ DONE; ++}) ++ ++(define_expand "vec_genbytemaskv16qi" ++ [(match_operand:V16QI 0 "register_operand" "") ++ (match_operand:HI 1 "const_int_operand" "")] ++ "TARGET_VX" ++{ ++ int i; ++ unsigned mask = 0x8000; ++ rtx const_vec[16]; ++ unsigned HOST_WIDE_INT byte_mask = INTVAL (operands[1]); ++ ++ for (i = 0; i < 16; i++) ++ { ++ if (mask & byte_mask) ++ const_vec[i] = constm1_rtx; ++ else ++ const_vec[i] = const0_rtx; ++ mask = mask >> 1; ++ } ++ emit_insn (gen_rtx_SET (VOIDmode, operands[0], ++ gen_rtx_CONST_VECTOR (V16QImode, ++ gen_rtvec_v (16, const_vec)))); ++ DONE; ++}) ++ ++(define_expand "vec_splats" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (vec_duplicate:V_HW (match_operand: 1 "general_operand" "")))] ++ "TARGET_VX") ++ ++(define_expand "vec_insert" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (unspec:V_HW [(match_operand: 2 "register_operand" "") ++ (match_operand:SI 3 "shift_count_or_setmem_operand" "") ++ (match_operand:V_HW 1 "register_operand" "")] ++ UNSPEC_VEC_SET))] ++ "TARGET_VX" ++ "") ++ ++; This is vec_set + modulo arithmetic on the element selector (op 2) ++(define_expand "vec_promote" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (unspec:V_HW [(match_operand: 1 "register_operand" "") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "") ++ (match_dup 0)] ++ UNSPEC_VEC_SET))] ++ "TARGET_VX" ++ "") ++ ++; vec_extract is also an RTL standard name -> vector.md ++ ++(define_insn "vec_insert_and_zero" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand: 1 "memory_operand" "QR")] ++ UNSPEC_VEC_INSERT_AND_ZERO))] ++ "TARGET_VX" ++ "vllez\t%v0,%1" ++ [(set_attr "op_type" "VRX")]) ++ ++(define_insn "vlbb" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "QR") ++ (match_operand:QI 2 "const_mask_operand" "C")] ++ UNSPEC_VEC_LOAD_BNDRY))] ++ "TARGET_VX && UINTVAL (operands[2]) < 7" ++ "vlbb\t%v0,%1,%2" ++ [(set_attr "op_type" "VRX")]) ++ ++; FIXME: The following two patterns might using vec_merge. But what is ++; the canonical form: (vec_select (vec_merge op0 op1)) or (vec_merge ++; (vec_select op0) (vec_select op1) ++(define_insn "vec_mergeh" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand:V_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_MERGEH))] ++ "TARGET_VX" ++ "vmrh\t%v0,%1,%2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_mergel" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand:V_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_MERGEL))] ++ "TARGET_VX" ++ "vmrl\t%v0,%1,%2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector pack ++ ++(define_insn "vec_pack" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK))] ++ "TARGET_VX" ++ "vpk\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector pack saturate ++ ++(define_insn "vec_packs" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK_SATURATE))] ++ "TARGET_VX" ++ "vpks\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; This is vec_packs_cc + loading cc into a caller specified memory location. ++(define_expand "vec_packs_cc" ++ [(parallel ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "") ++ (match_operand:VI_HW_HSD 2 "register_operand" "")] ++ UNSPEC_VEC_PACK_SATURATE_GENCC)) ++ (set (match_operand: 0 "register_operand" "") ++ (unspec: [(match_dup 1) (match_dup 2)] ++ UNSPEC_VEC_PACK_SATURATE_CC))]) ++ (set (match_dup 4) ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT)) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (match_dup 4))] ++ "TARGET_VX" ++{ ++ operands[4] = gen_reg_rtx (SImode); ++}) ++ ++(define_insn "*vec_packs_cc" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK_SATURATE_GENCC)) ++ (set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_dup 1) (match_dup 2)] ++ UNSPEC_VEC_PACK_SATURATE_CC))] ++ "TARGET_VX" ++ "vpkss\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector pack logical saturate ++ ++(define_insn "vec_packsu" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE))] ++ "TARGET_VX" ++ "vpkls\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; Emulate saturate unsigned pack on signed operands. ++; Zero out negative elements and continue with the unsigned saturating pack. ++(define_expand "vec_packsu_u" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE))] ++ "TARGET_VX" ++{ ++ rtx null_vec = CONST0_RTX(mode); ++ enum machine_mode half_mode; ++ switch (mode) ++ { ++ case V8HImode: half_mode = V16QImode; break; ++ case V4SImode: half_mode = V8HImode; break; ++ case V2DImode: half_mode = V4SImode; break; ++ default: gcc_unreachable (); ++ } ++ s390_expand_vcond (operands[1], operands[1], null_vec, ++ GE, operands[1], null_vec); ++ s390_expand_vcond (operands[2], operands[2], null_vec, ++ GE, operands[2], null_vec); ++ emit_insn (gen_rtx_SET (VOIDmode, operands[0], ++ gen_rtx_UNSPEC (half_mode, ++ gen_rtvec (2, operands[1], operands[2]), ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE))); ++ DONE; ++}) ++ ++; This is vec_packsu_cc + loading cc into a caller specified memory location. ++; FIXME: The reg to target mem copy should be issued by reload?! ++(define_expand "vec_packsu_cc" ++ [(parallel ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "") ++ (match_operand:VI_HW_HSD 2 "register_operand" "")] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC)) ++ (set (match_operand: 0 "register_operand" "") ++ (unspec: [(match_dup 1) (match_dup 2)] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC))]) ++ (set (match_dup 4) ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT)) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (match_dup 4))] ++ "TARGET_VX" ++{ ++ operands[4] = gen_reg_rtx (SImode); ++}) ++ ++(define_insn "*vec_packsu_cc" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC)) ++ (set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_dup 1) (match_dup 2)] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC))] ++ "TARGET_VX" ++ "vpklss\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector permute ++ ++; vec_perm is also RTL standard name, but we can only use it for V16QI ++ ++(define_insn "vec_zperm" ++ [(set (match_operand:V_HW_HSD 0 "register_operand" "=v") ++ (unspec:V_HW_HSD [(match_operand:V_HW_HSD 1 "register_operand" "v") ++ (match_operand:V_HW_HSD 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_PERM))] ++ "TARGET_VX" ++ "vperm\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vec_permi" ++ [(set (match_operand:V_HW_64 0 "register_operand" "") ++ (unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand" "") ++ (match_operand:V_HW_64 2 "register_operand" "") ++ (match_operand:QI 3 "const_mask_operand" "")] ++ UNSPEC_VEC_PERMI))] ++ "TARGET_VX" ++{ ++ HOST_WIDE_INT val = INTVAL (operands[3]); ++ operands[3] = GEN_INT ((val & 1) | (val & 2) << 1); ++}) ++ ++(define_insn "*vec_permi" ++ [(set (match_operand:V_HW_64 0 "register_operand" "=v") ++ (unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand" "v") ++ (match_operand:V_HW_64 2 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_PERMI))] ++ "TARGET_VX && (UINTVAL (operands[3]) & 10) == 0" ++ "vpdi\t%v0,%v1,%v2,%b3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector replicate ++ ++ ++; Replicate from vector element ++(define_expand "vec_splat" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (vec_duplicate:V_HW (vec_select: ++ (match_operand:V_HW 1 "register_operand" "") ++ (parallel ++ [(match_operand:QI 2 "const_mask_operand" "")]))))] ++ "TARGET_VX") ++ ++; Vector scatter element ++ ++; vscef, vsceg ++ ++; A 64 bit target adress generated from 32 bit elements ++(define_insn "vec_scatter_elementv4si_DI" ++ [(set (mem:SI ++ (plus:DI (zero_extend:DI ++ (unspec:SI [(match_operand:V4SI 1 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_EXTRACT)) ++ (match_operand:SI 2 "address_operand" "ZQ"))) ++ (unspec:SI [(match_operand:V4SI 0 "register_operand" "v") ++ (match_dup 3)] UNSPEC_VEC_EXTRACT))] ++ "TARGET_VX && TARGET_64BIT && UINTVAL (operands[3]) < 4" ++ "vscef\t%v0,%O2(%v1,%R2),%3" ++ [(set_attr "op_type" "VRV")]) ++ ++; A 31 bit target address is generated from 64 bit elements ++(define_insn "vec_scatter_element_SI" ++ [(set (mem: ++ (plus:SI (subreg:SI ++ (unspec: [(match_operand:V_HW_64 1 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_EXTRACT) 4) ++ (match_operand:SI 2 "address_operand" "ZQ"))) ++ (unspec: [(match_operand:V_HW_64 0 "register_operand" "v") ++ (match_dup 3)] UNSPEC_VEC_EXTRACT))] ++ "TARGET_VX && !TARGET_64BIT && UINTVAL (operands[3]) < GET_MODE_NUNITS (mode)" ++ "vsce\t%v0,%O2(%v1,%R2),%3" ++ [(set_attr "op_type" "VRV")]) ++ ++; Element size and target adress size is the same ++(define_insn "vec_scatter_element_" ++ [(set (mem: ++ (plus: (unspec: ++ [(match_operand: 1 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_EXTRACT) ++ (match_operand:DI 2 "address_operand" "ZQ"))) ++ (unspec: [(match_operand:V_HW_32_64 0 "register_operand" "v") ++ (match_dup 3)] UNSPEC_VEC_EXTRACT))] ++ "TARGET_VX && UINTVAL (operands[3]) < GET_MODE_NUNITS (mode)" ++ "vsce\t%v0,%O2(%v1,%R2),%3" ++ [(set_attr "op_type" "VRV")]) ++ ++; Depending on the address size we have to expand a different pattern. ++; This however cannot be represented in s390-builtins.def so we do the ++; multiplexing here in the expander. ++(define_expand "vec_scatter_element" ++ [(match_operand:V_HW_32_64 0 "register_operand" "") ++ (match_operand: 1 "register_operand" "") ++ (match_operand 2 "address_operand" "") ++ (match_operand:QI 3 "const_mask_operand" "")] ++ "TARGET_VX" ++{ ++ if (TARGET_64BIT) ++ { ++ PUT_MODE (operands[2], DImode); ++ emit_insn ( ++ gen_vec_scatter_element_DI (operands[0], operands[1], ++ operands[2], operands[3])); ++ } ++ else ++ { ++ PUT_MODE (operands[2], SImode); ++ emit_insn ( ++ gen_vec_scatter_element_SI (operands[0], operands[1], ++ operands[2], operands[3])); ++ } ++ DONE; ++}) ++ ++ ++; Vector select ++ ++; Operand 3 selects bits from either OP1 (0) or OP2 (1) ++ ++; Comparison operator should not matter as long as we always use the same ?! ++ ++; Operands 1 and 2 are swapped in order to match the altivec builtin. ++; If operand 3 is a const_int bitmask this would be vec_merge ++(define_expand "vec_sel" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (if_then_else:V_HW ++ (eq (match_operand: 3 "register_operand" "") ++ (match_dup 4)) ++ (match_operand:V_HW 2 "register_operand" "") ++ (match_operand:V_HW 1 "register_operand" "")))] ++ "TARGET_VX" ++{ ++ operands[4] = CONST0_RTX (mode); ++}) ++ ++ ++; Vector sign extend to doubleword ++ ++; Sign extend of right most vector element to respective double-word ++(define_insn "vec_extend" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_EXTEND))] ++ "TARGET_VX" ++ "vseg\t%v0,%1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector store with length ++ ++; Store bytes in OP1 from OP0 with the highest indexed byte to be ++; stored from OP0 given by OP2 ++(define_insn "vstl" ++ [(set (match_operand:BLK 2 "memory_operand" "=Q") ++ (unspec:BLK [(match_operand:V 0 "register_operand" "v") ++ (match_operand:SI 1 "register_operand" "d")] ++ UNSPEC_VEC_STORE_LEN))] ++ "TARGET_VX" ++ "vstl\t%v0,%1,%2" ++ [(set_attr "op_type" "VRS")]) ++ ++ ++; Vector unpack high ++ ++; vuphb, vuphh, vuphf ++(define_insn "vec_unpackh" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_UNPACKH))] ++ "TARGET_VX" ++ "vuph\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; vuplhb, vuplhh, vuplhf ++(define_insn "vec_unpackh_l" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_UNPACKH_L))] ++ "TARGET_VX" ++ "vuplh\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector unpack low ++ ++; vuplb, vuplhw, vuplf ++(define_insn "vec_unpackl" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_UNPACKL))] ++ "TARGET_VX" ++ "vupl\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; vupllb, vupllh, vupllf ++(define_insn "vec_unpackl_l" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_UNPACKL_L))] ++ "TARGET_VX" ++ "vupll\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector add ++ ++; vaq ++ ++; zvector builtins uses V16QI operands. So replace the modes in order ++; to map this to a TImode add. We have to keep the V16QI mode ++; operands in the expander in order to allow some operand type ++; checking when expanding the builtin. ++(define_expand "vec_add_u128" ++ [(match_operand:V16QI 0 "register_operand" "") ++ (match_operand:V16QI 1 "register_operand" "") ++ (match_operand:V16QI 2 "register_operand" "")] ++ "TARGET_VX" ++{ ++ rtx op0 = gen_rtx_SUBREG (TImode, operands[0], 0); ++ rtx op1 = gen_rtx_SUBREG (TImode, operands[1], 0); ++ rtx op2 = gen_rtx_SUBREG (TImode, operands[2], 0); ++ ++ emit_insn (gen_rtx_SET (VOIDmode, op0, ++ gen_rtx_PLUS (TImode, op1, op2))); ++ DONE; ++}) ++ ++; Vector add compute carry ++ ++(define_insn "vec_addc" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "%v") ++ (match_operand:VI_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_ADDC))] ++ "TARGET_VX" ++ "vacc\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_addc_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%v") ++ (match_operand:V16QI 2 "register_operand" "v")] ++ UNSPEC_VEC_ADDC_U128))] ++ "TARGET_VX" ++ "vaccq\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector add with carry ++ ++(define_insn "vec_adde_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%v") ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_ADDE_U128))] ++ "TARGET_VX" ++ "vacq\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector add with carry compute carry ++ ++(define_insn "vec_addec_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%v") ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_ADDEC_U128))] ++ "TARGET_VX" ++ "vacccq\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector and ++ ++; The following two patterns allow mixed mode and's as required for the intrinsics. ++(define_insn "and_av2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (and:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0) ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vn\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "and_cv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (and:V2DF (match_operand:V2DF 1 "register_operand" "v") ++ (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0)))] ++ "TARGET_VX" ++ "vn\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector and with complement ++ ++; vnc ++(define_insn "vec_andc3" ++ [(set (match_operand:VT_HW 0 "register_operand" "=v") ++ (and:VT_HW (not:VT_HW (match_operand:VT_HW 2 "register_operand" "v")) ++ (match_operand:VT_HW 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vnc\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The following two patterns allow mixed mode and's as required for the intrinsics. ++(define_insn "vec_andc_av2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (and:V2DF (not:V2DF (match_operand:V2DF 2 "register_operand" "v")) ++ (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0)))] ++ ++ "TARGET_VX" ++ "vnc\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_andc_cv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (and:V2DF (not:V2DF (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0)) ++ (match_operand:V2DF 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vnc\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector average ++ ++(define_insn "vec_avg" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "%v") ++ (match_operand:VI_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_AVG))] ++ "TARGET_VX" ++ "vavg\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; Vector average logical ++ ++(define_insn "vec_avgu" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "%v") ++ (match_operand:VI_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_AVGU))] ++ "TARGET_VX" ++ "vavgl\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector checksum ++ ++(define_insn "vec_checksum" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") ++ (match_operand:V4SI 2 "register_operand" "v")] ++ UNSPEC_VEC_CHECKSUM))] ++ "TARGET_VX" ++ "vcksm\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++;; ++;; Vector compare ++;; ++ ++; vec_all/any int compares ++ ++(define_expand "vec_all_" ++ [(match_operand:SI 0 "register_operand" "") ++ (intcmpcc (match_operand:VI_HW 1 "register_operand" "") ++ (match_operand:VI_HW 2 "register_operand" ""))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare_cc (operands[0], ++ , ++ operands[1], ++ operands[2], ++ true); ++ DONE; ++}) ++ ++(define_expand "vec_any_" ++ [(match_operand:SI 0 "register_operand" "") ++ (intcmpcc (match_operand:VI_HW 1 "register_operand" "") ++ (match_operand:VI_HW 2 "register_operand" ""))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare_cc (operands[0], ++ , ++ operands[1], ++ operands[2], ++ false); ++ DONE; ++}) ++ ++; vec_all/any fp compares ++ ++(define_expand "vec_all_v2df" ++ [(match_operand:SI 0 "register_operand" "") ++ (fpcmpcc (match_operand:V2DF 1 "register_operand" "") ++ (match_operand:V2DF 2 "register_operand" ""))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare_cc (operands[0], ++ , ++ operands[1], ++ operands[2], ++ true); ++ DONE; ++}) ++ ++(define_expand "vec_any_v2df" ++ [(match_operand:SI 0 "register_operand" "") ++ (fpcmpcc (match_operand:V2DF 1 "register_operand" "") ++ (match_operand:V2DF 2 "register_operand" ""))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare_cc (operands[0], ++ , ++ operands[1], ++ operands[2], ++ false); ++ DONE; ++}) ++ ++ ++; Compare without generating CC ++ ++(define_expand "vec_cmp" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (intcmp:VI_HW (match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW 2 "register_operand" "v")))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare (operands[0], , operands[1], operands[2]); ++ DONE; ++}) ++ ++(define_expand "vec_cmpv2df" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (fpcmp:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare (operands[0], , operands[1], operands[2]); ++ DONE; ++}) ++ ++ ++; Vector count leading zeros ++ ++; vec_cntlz -> clz ++; vec_cnttz -> ctz ++ ++; Vector xor ++ ++; vec_xor -> xor ++ ++; The following two patterns allow mixed mode xor's as required for the intrinsics. ++(define_insn "xor_av2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (xor:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0) ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vx\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "xor_cv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (xor:V2DF (match_operand:V2DF 1 "register_operand" "v") ++ (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0)))] ++ "TARGET_VX" ++ "vx\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector Galois field multiply sum ++ ++(define_insn "vec_gfmsum" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_GFMSUM))] ++ "TARGET_VX" ++ "vgfm\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_gfmsum_128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V2DI 1 "register_operand" "v") ++ (match_operand:V2DI 2 "register_operand" "v")] ++ UNSPEC_VEC_GFMSUM_128))] ++ "TARGET_VX" ++ "vgfmg\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_gfmsum_accum" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand: 3 "register_operand" "v")] ++ UNSPEC_VEC_GFMSUM_ACCUM))] ++ "TARGET_VX" ++ "vgfma\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_gfmsum_accum_128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V2DI 1 "register_operand" "v") ++ (match_operand:V2DI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_GFMSUM_ACCUM_128))] ++ "TARGET_VX" ++ "vgfmag\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; FIXME: vec_neg ? ++ ++; Vector load positive: vec_abs -> abs ++; Vector maximum vec_max -> smax, logical vec_max -> umax ++; Vector maximum vec_min -> smin, logical vec_min -> umin ++ ++ ++; Vector multiply and add high ++ ++; vec_mladd -> vec_vmal ++; vmalb, vmalh, vmalf, vmalg ++(define_insn "vec_vmal" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:VI_HW_QHS 3 "register_operand" "v")] ++ UNSPEC_VEC_VMAL))] ++ "TARGET_VX" ++ "vmal\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_mhadd -> vec_vmah/vec_vmalh ++ ++; vmahb; vmahh, vmahf, vmahg ++(define_insn "vec_vmah" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:VI_HW_QHS 3 "register_operand" "v")] ++ UNSPEC_VEC_VMAH))] ++ "TARGET_VX" ++ "vmah\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmalhb; vmalhh, vmalhf, vmalhg ++(define_insn "vec_vmalh" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:VI_HW_QHS 3 "register_operand" "v")] ++ UNSPEC_VEC_VMALH))] ++ "TARGET_VX" ++ "vmalh\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_meadd -> vec_vmae/vec_vmale ++ ++; vmaeb; vmaeh, vmaef, vmaeg ++(define_insn "vec_vmae" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand: 3 "register_operand" "v")] ++ UNSPEC_VEC_VMAE))] ++ "TARGET_VX" ++ "vmae\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmaleb; vmaleh, vmalef, vmaleg ++(define_insn "vec_vmale" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand: 3 "register_operand" "v")] ++ UNSPEC_VEC_VMALE))] ++ "TARGET_VX" ++ "vmale\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_moadd -> vec_vmao/vec_vmalo ++ ++; vmaob; vmaoh, vmaof, vmaog ++(define_insn "vec_vmao" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand: 3 "register_operand" "v")] ++ UNSPEC_VEC_VMAO))] ++ "TARGET_VX" ++ "vmao\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmalob; vmaloh, vmalof, vmalog ++(define_insn "vec_vmalo" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand: 3 "register_operand" "v")] ++ UNSPEC_VEC_VMALO))] ++ "TARGET_VX" ++ "vmalo\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector multiply high ++ ++; vec_mulh -> vec_smulh/vec_umulh ++ ++; vmhb, vmhh, vmhf ++(define_insn "vec_smulh" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SMULT_HI))] ++ "TARGET_VX" ++ "vmh\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmlhb, vmlhh, vmlhf ++(define_insn "vec_umulh" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_UMULT_HI))] ++ "TARGET_VX" ++ "vmlh\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector multiply low ++ ++; vec_mule -> vec_widen_umult_even/vec_widen_smult_even ++; vec_mulo -> vec_widen_umult_odd/vec_widen_smult_odd ++ ++ ++; Vector nor ++ ++(define_insn "vec_nor3" ++ [(set (match_operand:VT_HW 0 "register_operand" "=v") ++ (not:VT_HW (ior:VT_HW (match_operand:VT_HW 1 "register_operand" "%v") ++ (match_operand:VT_HW 2 "register_operand" "v"))))] ++ "TARGET_VX" ++ "vno\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The following two patterns allow mixed mode and's as required for the intrinsics. ++(define_insn "vec_nor_av2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (not:V2DF (ior:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0) ++ (match_operand:V2DF 2 "register_operand" "v"))))] ++ "TARGET_VX" ++ "vno\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_nor_cv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (not:V2DF (ior:V2DF (match_operand:V2DF 1 "register_operand" "v") ++ (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0))))] ++ "TARGET_VX" ++ "vno\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector or ++ ++; The following two patterns allow mixed mode or's as required for the intrinsics. ++(define_insn "ior_av2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (ior:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0) ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vo\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "ior_cv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (ior:V2DF (match_operand:V2DF 1 "register_operand" "v") ++ (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0)))] ++ "TARGET_VX" ++ "vo\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector population count vec_popcnt -> popcount ++; Vector element rotate left logical vec_rl -> vrotl, vec_rli -> rot ++ ++; Vector element rotate and insert under mask ++ ++; verimb, verimh, verimf, verimg ++(define_insn "verim" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "0") ++ (match_operand:VI_HW 2 "register_operand" "v") ++ (match_operand:VI_HW 3 "register_operand" "v") ++ (match_operand:QI 4 "const_int_operand" "C")] ++ UNSPEC_VEC_RL_MASK))] ++ "TARGET_VX" ++ "verim\t%v0,%v2,%v3,%b4" ++ [(set_attr "op_type" "VRI")]) ++ ++ ++; Vector shift left ++ ++(define_insn "vec_sll" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SLL))] ++ "TARGET_VX" ++ "vsl\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector shift left by byte ++ ++(define_insn "vec_slb" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand: 2 "register_operand" "v")] ++ UNSPEC_VEC_SLB))] ++ "TARGET_VX" ++ "vslb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector shift left double by byte ++ ++(define_insn "vec_sld" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand:V_HW 2 "register_operand" "v") ++ (match_operand:QI 3 "const_int_operand" "C")] ++ UNSPEC_VEC_SLDB))] ++ "TARGET_VX" ++ "vsldb\t%v0,%v1,%v2,%b3" ++ [(set_attr "op_type" "VRI")]) ++ ++(define_expand "vec_sldw" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "") ++ (match_operand:V_HW 2 "register_operand" "") ++ (match_operand:QI 3 "const_int_operand" "")] ++ UNSPEC_VEC_SLDB))] ++ "TARGET_VX" ++{ ++ operands[3] = GEN_INT (INTVAL (operands[3]) << 2); ++}) ++ ++; Vector shift right arithmetic ++ ++(define_insn "vec_sral" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SRAL))] ++ "TARGET_VX" ++ "vsra\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector shift right arithmetic by byte ++ ++(define_insn "vec_srab" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand: 2 "register_operand" "v")] ++ UNSPEC_VEC_SRAB))] ++ "TARGET_VX" ++ "vsrab\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector shift right logical ++ ++(define_insn "vec_srl" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SRL))] ++ "TARGET_VX" ++ "vsrl\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector shift right logical by byte ++ ++; Pattern definition in vector.md ++(define_expand "vec_srb" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "") ++ (match_operand: 2 "register_operand" "")] ++ UNSPEC_VEC_SRLB))] ++ "TARGET_VX") ++ ++ ++; Vector subtract ++ ++(define_insn "vec_sub_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") ++ (match_operand:V16QI 2 "register_operand" "v")] ++ UNSPEC_VEC_SUB_U128))] ++ "TARGET_VX" ++ "vsq\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector subtract compute borrow indication ++ ++(define_insn "vec_subc" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_SUBC))] ++ "TARGET_VX" ++ "vscbi\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_subc_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") ++ (match_operand:V16QI 2 "register_operand" "v")] ++ UNSPEC_VEC_SUBC_U128))] ++ "TARGET_VX" ++ "vscbiq\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector subtract with borrow indication ++ ++(define_insn "vec_sube_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_SUBE_U128))] ++ "TARGET_VX" ++ "vsbiq\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector subtract with borrow compute and borrow indication ++ ++(define_insn "vec_subec_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_SUBEC_U128))] ++ "TARGET_VX" ++ "vsbcbiq\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector sum across ++ ++; Sum across DImode parts of the 1st operand and add the rightmost ++; element of 2nd operand ++; vsumgh, vsumgf ++(define_expand "vec_sum2" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:VI_HW_HS 1 "register_operand" "") ++ (match_operand:VI_HW_HS 2 "register_operand" "")] ++ UNSPEC_VEC_VSUMG))] ++ "TARGET_VX") ++ ++; vsumqh, vsumqf ++(define_insn "vec_sum_u128" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_operand:VI_HW_SD 1 "register_operand" "v") ++ (match_operand:VI_HW_SD 2 "register_operand" "v")] ++ UNSPEC_VEC_VSUMQ))] ++ "TARGET_VX" ++ "vsumq\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vsumb, vsumh ++(define_expand "vec_sum4" ++ [(set (match_operand:V4SI 0 "register_operand" "") ++ (unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand" "") ++ (match_operand:VI_HW_QH 2 "register_operand" "")] ++ UNSPEC_VEC_VSUM))] ++ "TARGET_VX") ++ ++ ++; Vector test under mask ++ ++(define_expand "vec_test_mask_int" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:V_HW 1 "register_operand" "") ++ (match_operand: 2 "register_operand" "")] ++ UNSPEC_VEC_TEST_MASK)) ++ (set (match_operand:SI 0 "register_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_insn "*vec_test_mask" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:V_HW 0 "register_operand" "v") ++ (match_operand: 1 "register_operand" "v")] ++ UNSPEC_VEC_TEST_MASK))] ++ "TARGET_VX" ++ "vtm\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector find any element equal ++ ++; vfaeb, vfaeh, vfaef ++; vfaezb, vfaezh, vfaezf ++(define_insn "vfae" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_VFAE))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[3]); ++ ++ if (flags & VSTRING_FLAG_ZS) ++ { ++ flags &= ~VSTRING_FLAG_ZS; ++ operands[3] = GEN_INT (flags); ++ return "vfaez\t%v0,%v1,%v2,%b3"; ++ } ++ return "vfae\t%v0,%v1,%v2,%b3"; ++} ++[(set_attr "op_type" "VRR")]) ++ ++; vfaebs, vfaehs, vfaefs ++; vfaezbs, vfaezhs, vfaezfs ++(define_insn "*vfaes" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_VFAE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_VEC_VFAECC))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[3]); ++ ++ if (flags & VSTRING_FLAG_ZS) ++ { ++ flags &= ~VSTRING_FLAG_ZS; ++ operands[3] = GEN_INT (flags); ++ return "vfaezs\t%v0,%v1,%v2,%b3"; ++ } ++ return "vfaes\t%v0,%v1,%v2,%b3"; ++} ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vfaez" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:QI 3 "const_mask_operand" "")] ++ UNSPEC_VEC_VFAE))] ++ "TARGET_VX" ++{ ++ operands[3] = GEN_INT (INTVAL (operands[3]) | VSTRING_FLAG_ZS); ++}) ++ ++(define_expand "vfaes" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:QI 3 "const_mask_operand" "")] ++ UNSPEC_VEC_VFAE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_VEC_VFAECC))]) ++ (set (match_operand:SI 4 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[3] = GEN_INT (INTVAL (operands[3]) | VSTRING_FLAG_CS); ++}) ++ ++(define_expand "vfaezs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:SI 3 "const_mask_operand" "")] ++ UNSPEC_VEC_VFAE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_VEC_VFAECC))]) ++ (set (match_operand:SI 4 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[3] = GEN_INT (INTVAL (operands[3]) | VSTRING_FLAG_CS | VSTRING_FLAG_ZS); ++}) ++ ++ ++; Vector find element equal ++ ++; vfeebs, vfeehs, vfeefs ++; vfeezbs, vfeezhs, vfeezfs ++(define_insn "*vfees" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_VFEE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_VEC_VFEECC))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[3]); ++ ++ gcc_assert (!(flags & ~(VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); ++ flags &= ~VSTRING_FLAG_CS; ++ ++ if (flags == VSTRING_FLAG_ZS) ++ return "vfeezs\t%v0,%v1,%v2"; ++ return "vfees\t%v0,%v1,%v2,%b3"; ++} ++ [(set_attr "op_type" "VRR")]) ++ ++; vfeeb, vfeeh, vfeef ++(define_insn "vfee" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (const_int 0)] ++ UNSPEC_VEC_VFEE))] ++ "TARGET_VX" ++ "vfee\t%v0,%v1,%v2,0" ++ [(set_attr "op_type" "VRR")]) ++ ++; vfeezb, vfeezh, vfeezf ++(define_insn "vfeez" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (const_int VSTRING_FLAG_ZS)] ++ UNSPEC_VEC_VFEE))] ++ "TARGET_VX" ++ "vfeezs\t%v0,%v1,%v2,2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vfees" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (const_int VSTRING_FLAG_CS)] ++ UNSPEC_VEC_VFEE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (const_int VSTRING_FLAG_CS)] ++ UNSPEC_VEC_VFEECC))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vfeezs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_dup 4)] ++ UNSPEC_VEC_VFEE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 4)] ++ UNSPEC_VEC_VFEECC))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[4] = GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS); ++}) ++ ++; Vector find element not equal ++ ++; vfeneb, vfeneh, vfenef ++(define_insn "vfene" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (const_int 0)] ++ UNSPEC_VEC_VFENE))] ++ "TARGET_VX" ++ "vfene\t%v0,%v1,%v2,0" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_vfenes can be found in vector.md since it is used for strlen ++ ++; vfenezb, vfenezh, vfenezf ++(define_insn "vfenez" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (const_int VSTRING_FLAG_ZS)] ++ UNSPEC_VEC_VFENE))] ++ "TARGET_VX" ++ "vfenez\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vfenes" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (const_int VSTRING_FLAG_CS)] ++ UNSPEC_VEC_VFENE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (const_int VSTRING_FLAG_CS)] ++ UNSPEC_VEC_VFENECC))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vfenezs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_dup 4)] ++ UNSPEC_VEC_VFENE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 4)] ++ UNSPEC_VEC_VFENECC))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[4] = GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS); ++}) ++ ++; Vector isolate string ++ ++; vistrb, vistrh, vistrf ++(define_insn "vistr" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_VISTR))] ++ "TARGET_VX" ++ "vistr\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; vistrbs, vistrhs, vistrfs ++(define_insn "*vistrs" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_VISTR)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1)] UNSPEC_VEC_VISTRCC))] ++ "TARGET_VX" ++ "vistrs\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vistrs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")] ++ UNSPEC_VEC_VISTR)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1)] ++ UNSPEC_VEC_VISTRCC))]) ++ (set (match_operand:SI 2 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++ ++; Vector compare range ++ ++; vstrcb, vstrch, vstrcf ++; vstrczb, vstrczh, vstrczf ++(define_insn "vstrc" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:VI_HW_QHS 3 "register_operand" "v") ++ (match_operand:QI 4 "const_mask_operand" "C")] ++ UNSPEC_VEC_VSTRC))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[4]); ++ ++ if (flags & VSTRING_FLAG_ZS) ++ { ++ flags &= ~VSTRING_FLAG_ZS; ++ operands[4] = GEN_INT (flags); ++ return "vstrcz\t%v0,%v1,%v2,%v3,%b4"; ++ } ++ return "vstrc\t%v0,%v1,%v2,%v3,%b4"; ++} ++[(set_attr "op_type" "VRR")]) ++ ++; vstrcbs, vstrchs, vstrcfs ++; vstrczbs, vstrczhs, vstrczfs ++(define_insn "*vstrcs" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:VI_HW_QHS 3 "register_operand" "v") ++ (match_operand:QI 4 "const_mask_operand" "C")] ++ UNSPEC_VEC_VSTRC)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3) ++ (match_dup 4)] ++ UNSPEC_VEC_VSTRCCC))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[4]); ++ ++ if (flags & VSTRING_FLAG_ZS) ++ { ++ flags &= ~VSTRING_FLAG_ZS; ++ operands[4] = GEN_INT (flags); ++ return "vstrczs\t%v0,%v1,%v2,%v3,%b4"; ++ } ++ return "vstrcs\t%v0,%v1,%v2,%v3,%b4"; ++} ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vstrcz" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:VI_HW_QHS 3 "register_operand" "") ++ (match_operand:QI 4 "const_mask_operand" "")] ++ UNSPEC_VEC_VSTRC))] ++ "TARGET_VX" ++{ ++ operands[4] = GEN_INT (INTVAL (operands[4]) | VSTRING_FLAG_ZS); ++}) ++ ++(define_expand "vstrcs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:VI_HW_QHS 3 "register_operand" "") ++ (match_operand:QI 4 "const_mask_operand" "")] ++ UNSPEC_VEC_VSTRC)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3) ++ (match_dup 4)] ++ UNSPEC_VEC_VSTRCCC))]) ++ (set (match_operand:SI 5 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[4] = GEN_INT (INTVAL (operands[4]) | VSTRING_FLAG_CS); ++}) ++ ++(define_expand "vstrczs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:VI_HW_QHS 3 "register_operand" "") ++ (match_operand:QI 4 "const_mask_operand" "")] ++ UNSPEC_VEC_VSTRC)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3) ++ (match_dup 4)] ++ UNSPEC_VEC_VSTRCCC))]) ++ (set (match_operand:SI 5 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[4] = GEN_INT (INTVAL (operands[4]) | VSTRING_FLAG_CS | VSTRING_FLAG_ZS); ++}) ++ ++ ++; Signed V2DI -> V2DF conversion - inexact exception disabled ++(define_insn "vec_di_to_df_s64" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (unspec:V2DF [(match_operand:V2DI 1 "register_operand" "v") ++ (match_operand:QI 2 "const_mask_operand" "C")] ++ UNSPEC_VEC_VCDGB))] ++ "TARGET_VX && UINTVAL (operands[2]) != 2 && UINTVAL (operands[2]) <= 7" ++ "vcdgb\t%v0,%v1,4,%b2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The result needs to be multiplied with 2**-op2 ++(define_expand "vec_ctd_s64" ++ [(set (match_operand:V2DF 0 "register_operand" "") ++ (unspec:V2DF [(match_operand:V2DI 1 "register_operand" "") ++ (const_int 0)] ; According to current BFP rounding mode ++ UNSPEC_VEC_VCDGB)) ++ (use (match_operand:QI 2 "const_int_operand" "")) ++ (set (match_dup 0) (mult:V2DF (match_dup 0) (match_dup 3)))] ++ "TARGET_VX" ++{ ++ REAL_VALUE_TYPE f; ++ rtx c; ++ ++ real_2expN (&f, -INTVAL (operands[2]), DFmode); ++ c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode); ++ ++ operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c)); ++ operands[3] = force_reg (V2DFmode, operands[3]); ++}) ++ ++; Unsigned V2DI -> V2DF conversion - inexact exception disabled ++(define_insn "vec_di_to_df_u64" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (unspec:V2DF [(match_operand:V2DI 1 "register_operand" "v") ++ (match_operand:QI 2 "const_int_operand" "C")] ++ UNSPEC_VEC_VCDLGB))] ++ "TARGET_VX" ++ "vcdlgb\t%v0,%v1,4,%b2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The result needs to be multiplied with 2**-op2 ++(define_expand "vec_ctd_u64" ++ [(set (match_operand:V2DF 0 "register_operand" "") ++ (unspec:V2DF [(match_operand:V2DI 1 "register_operand" "") ++ (const_int 0)] ; According to current BFP rounding mode ++ UNSPEC_VEC_VCDLGB)) ++ (use (match_operand:QI 2 "const_int_operand" "")) ++ (set (match_dup 0) (mult:V2DF (match_dup 0) (match_dup 3)))] ++ "TARGET_VX" ++{ ++ REAL_VALUE_TYPE f; ++ rtx c; ++ ++ real_2expN (&f, -INTVAL (operands[2]), DFmode); ++ c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode); ++ ++ operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c)); ++ operands[3] = force_reg (V2DFmode, operands[3]); ++}) ++ ++ ++; Signed V2DF -> V2DI conversion - inexact exception disabled ++(define_insn "vec_df_to_di_s64" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:QI 2 "const_int_operand" "C")] ++ UNSPEC_VEC_VCGDB))] ++ "TARGET_VX" ++ "vcgdb\t%v0,%v1,4,%b2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The input needs to be multiplied with 2**op2 ++(define_expand "vec_ctsl" ++ [(use (match_operand:QI 2 "const_int_operand" "")) ++ (set (match_dup 4) (mult:V2DF (match_operand:V2DF 1 "register_operand" "") ++ (match_dup 3))) ++ (set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_dup 4) (const_int 0)] ; According to current BFP rounding mode ++ UNSPEC_VEC_VCGDB))] ++ "TARGET_VX" ++{ ++ REAL_VALUE_TYPE f; ++ rtx c; ++ ++ real_2expN (&f, INTVAL (operands[2]), DFmode); ++ c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode); ++ ++ operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c)); ++ operands[3] = force_reg (V2DFmode, operands[3]); ++ operands[4] = gen_reg_rtx (V2DFmode); ++}) ++ ++; Unsigned V2DF -> V2DI conversion - inexact exception disabled ++(define_insn "vec_df_to_di_u64" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:QI 2 "const_mask_operand" "C")] ++ UNSPEC_VEC_VCLGDB))] ++ "TARGET_VX && UINTVAL (operands[2]) <= 7" ++ "vclgdb\t%v0,%v1,4,%b2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The input needs to be multiplied with 2**op2 ++(define_expand "vec_ctul" ++ [(use (match_operand:QI 2 "const_int_operand" "")) ++ (set (match_dup 4) (mult:V2DF (match_operand:V2DF 1 "register_operand" "") ++ (match_dup 3))) ++ (set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_dup 4) (const_int 0)] ; According to current BFP rounding mode ++ UNSPEC_VEC_VCLGDB))] ++ "TARGET_VX" ++{ ++ REAL_VALUE_TYPE f; ++ rtx c; ++ ++ real_2expN (&f, INTVAL (operands[2]), DFmode); ++ c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode); ++ ++ operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c)); ++ operands[3] = force_reg (V2DFmode, operands[3]); ++ operands[4] = gen_reg_rtx (V2DFmode); ++}) ++ ++; Vector load fp integer - IEEE inexact exception is suppressed ++(define_insn "vfidb" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:QI 2 "const_mask_operand" "C") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX && !(UINTVAL (operands[2]) & 3) && UINTVAL (operands[3]) <= 7" ++ "vfidb\t%v0,%v1,%b2,%b3" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vec_ceil" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "") ++ (const_int VEC_RND_TO_INF)] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX") ++ ++(define_expand "vec_floor" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "") ++ (const_int VEC_RND_TO_MINF)] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX") ++ ++(define_expand "vec_trunc" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "") ++ (const_int VEC_RND_TO_ZERO)] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX") ++ ++(define_expand "vec_roundc" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "") ++ (const_int VEC_RND_CURRENT)] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX") ++ ++(define_expand "vec_round" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "") ++ (const_int VEC_RND_NEAREST_TO_EVEN)] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX") ++ ++ ++; Vector load lengthened - V4SF -> V2DF ++ ++(define_insn "*vldeb" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (unspec:V2DF [(match_operand:V4SF 1 "register_operand" "v")] ++ UNSPEC_VEC_VLDEB))] ++ "TARGET_VX" ++ "vldeb\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vec_ld2f" ++ [; Initialize a vector to all zeroes. FIXME: This should not be ++ ; necessary since all elements of the vector will be set anyway. ++ ; This is just to make it explicit to the data flow framework. ++ (set (match_dup 2) (match_dup 3)) ++ (set (match_dup 2) (unspec:V4SF [(match_operand:SF 1 "memory_operand" "") ++ (const_int 0) ++ (match_dup 2)] ++ UNSPEC_VEC_SET)) ++ (set (match_dup 2) (unspec:V4SF [(match_dup 4) ++ (const_int 2) ++ (match_dup 2)] ++ UNSPEC_VEC_SET)) ++ (set (match_operand:V2DF 0 "register_operand" "") ++ (unspec:V2DF [(match_dup 2)] UNSPEC_VEC_VLDEB))] ++ "TARGET_VX" ++{ ++ operands[2] = gen_reg_rtx (V4SFmode); ++ operands[3] = CONST0_RTX (V4SFmode); ++ operands[4] = adjust_address (operands[1], SFmode, 4); ++}) ++ ++ ++; Vector load rounded - V2DF -> V4SF ++ ++(define_insn "*vledb" ++ [(set (match_operand:V4SF 0 "register_operand" "=v") ++ (unspec:V4SF [(match_operand:V2DF 1 "register_operand" "v")] ++ UNSPEC_VEC_VLEDB))] ++ "TARGET_VX" ++ "vledb\t%v0,%v1,0,0" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vec_st2f" ++ [(set (match_dup 2) ++ (unspec:V4SF [(match_operand:V2DF 0 "register_operand" "")] ++ UNSPEC_VEC_VLEDB)) ++ (set (match_operand:SF 1 "memory_operand" "") ++ (unspec:SF [(match_dup 2) (const_int 0)] UNSPEC_VEC_EXTRACT)) ++ (set (match_dup 3) ++ (unspec:SF [(match_dup 2) (const_int 2)] UNSPEC_VEC_EXTRACT))] ++ "TARGET_VX" ++{ ++ operands[2] = gen_reg_rtx (V4SFmode); ++ operands[3] = adjust_address (operands[1], SFmode, 4); ++}) ++ ++ ++; Vector load negated fp ++ ++(define_expand "vec_nabs" ++ [(set (match_operand:V2DF 0 "register_operand" "") ++ (neg:V2DF (abs:V2DF (match_operand:V2DF 1 "register_operand" ""))))] ++ "TARGET_VX") ++ ++; Vector square root fp vec_sqrt -> sqrt rtx standard name ++ ++; Vector FP test data class immediate ++ ++(define_insn "*vftcidb" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:HI 2 "const_int_operand" "J")] ++ UNSPEC_VEC_VFTCIDB)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) (match_dup 2)] UNSPEC_VEC_VFTCIDBCC))] ++ "TARGET_VX && CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'J', \"J\")" ++ "vftcidb\t%v0,%v1,%x2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*vftcidb_cconly" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:HI 2 "const_int_operand" "J")] ++ UNSPEC_VEC_VFTCIDBCC)) ++ (clobber (match_scratch:V2DI 0 "=v"))] ++ "TARGET_VX && CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'J', \"J\")" ++ "vftcidb\t%v0,%v1,%x2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vftcidb" ++ [(parallel ++ [(set (match_operand:V2DF 0 "register_operand" "") ++ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "") ++ (match_operand:HI 2 "const_int_operand" "")] ++ UNSPEC_VEC_VFTCIDB)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) (match_dup 2)] UNSPEC_VEC_VFTCIDBCC))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX && CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'J', \"J\")") ++ ++;; ++;; Integer compares ++;; ++ ++; All comparisons which produce a CC need fully populated (VI_HW) ++; vector arguments. Otherwise the any/all CCs would be just bogus. ++ ++(define_insn "*vec_cmp_cconly" ++ [(set (reg:VICMP CC_REGNUM) ++ (compare:VICMP (match_operand:VI_HW 0 "register_operand" "v") ++ (match_operand:VI_HW 1 "register_operand" "v"))) ++ (clobber (match_scratch:VI_HW 2 "=v"))] ++ "TARGET_VX" ++ "vcs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; FIXME: The following 2x3 definitions should be merged into 2 with ++; VICMP like above but I could not find a way to set the comparison ++; operator (eq) depending on the mode CCVEQ (mode_iterator). Or the ++; other way around - setting the mode depending on the code ++; (code_iterator). ++(define_expand "vec_cmpeq_cc" ++ [(parallel ++ [(set (reg:CCVEQ CC_REGNUM) ++ (compare:CCVEQ (match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW 2 "register_operand" "v"))) ++ (set (match_operand:VI_HW 0 "register_operand" "=v") ++ (eq:VI_HW (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVEQ CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vec_cmph_cc" ++ [(parallel ++ [(set (reg:CCVH CC_REGNUM) ++ (compare:CCVH (match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW 2 "register_operand" "v"))) ++ (set (match_operand:VI_HW 0 "register_operand" "=v") ++ (gt:VI_HW (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVH CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vec_cmphl_cc" ++ [(parallel ++ [(set (reg:CCVHU CC_REGNUM) ++ (compare:CCVHU (match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW 2 "register_operand" "v"))) ++ (set (match_operand:VI_HW 0 "register_operand" "=v") ++ (gtu:VI_HW (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVHU CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++ ++(define_insn "*vec_cmpeq_cc" ++ [(set (reg:CCVEQ CC_REGNUM) ++ (compare:CCVEQ (match_operand:VI_HW 0 "register_operand" "v") ++ (match_operand:VI_HW 1 "register_operand" "v"))) ++ (set (match_operand:VI_HW 2 "register_operand" "=v") ++ (eq:VI_HW (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vceqs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*vec_cmph_cc" ++ [(set (reg:CCVH CC_REGNUM) ++ (compare:CCVH (match_operand:VI_HW 0 "register_operand" "v") ++ (match_operand:VI_HW 1 "register_operand" "v"))) ++ (set (match_operand:VI_HW 2 "register_operand" "=v") ++ (gt:VI_HW (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vchs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*vec_cmphl_cc" ++ [(set (reg:CCVHU CC_REGNUM) ++ (compare:CCVHU (match_operand:VI_HW 0 "register_operand" "v") ++ (match_operand:VI_HW 1 "register_operand" "v"))) ++ (set (match_operand:VI_HW 2 "register_operand" "=v") ++ (gtu:VI_HW (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vchls\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++;; ++;; Floating point comparesg ++;; ++ ++(define_insn "*vec_cmpv2df_cconly" ++ [(set (reg:VFCMP CC_REGNUM) ++ (compare:VFCMP (match_operand:V2DF 0 "register_operand" "v") ++ (match_operand:V2DF 1 "register_operand" "v"))) ++ (clobber (match_scratch:V2DI 2 "=v"))] ++ "TARGET_VX" ++ "vfcdbs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; FIXME: Merge the following 2x3 patterns with VFCMP ++(define_expand "vec_cmpeqv2df_cc" ++ [(parallel ++ [(set (reg:CCVEQ CC_REGNUM) ++ (compare:CCVEQ (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_operand:V2DI 0 "register_operand" "=v") ++ (eq:V2DI (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVEQ CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vec_cmphv2df_cc" ++ [(parallel ++ [(set (reg:CCVH CC_REGNUM) ++ (compare:CCVH (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_operand:V2DI 0 "register_operand" "=v") ++ (gt:V2DI (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVH CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vec_cmphev2df_cc" ++ [(parallel ++ [(set (reg:CCVFHE CC_REGNUM) ++ (compare:CCVFHE (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_operand:V2DI 0 "register_operand" "=v") ++ (ge:V2DI (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVFHE CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++ ++(define_insn "*vec_cmpeqv2df_cc" ++ [(set (reg:CCVEQ CC_REGNUM) ++ (compare:CCVEQ (match_operand:V2DF 0 "register_operand" "v") ++ (match_operand:V2DF 1 "register_operand" "v"))) ++ (set (match_operand:V2DI 2 "register_operand" "=v") ++ (eq:V2DI (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vfcedbs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*vec_cmphv2df_cc" ++ [(set (reg:CCVH CC_REGNUM) ++ (compare:CCVH (match_operand:V2DF 0 "register_operand" "v") ++ (match_operand:V2DF 1 "register_operand" "v"))) ++ (set (match_operand:V2DI 2 "register_operand" "=v") ++ (gt:V2DI (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vfchdbs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*vec_cmphev2df_cc" ++ [(set (reg:CCVFHE CC_REGNUM) ++ (compare:CCVFHE (match_operand:V2DF 0 "register_operand" "v") ++ (match_operand:V2DF 1 "register_operand" "v"))) ++ (set (match_operand:V2DI 2 "register_operand" "=v") ++ (ge:V2DI (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vfchedbs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) +--- gcc/config.gcc 2016-05-11 14:46:08.298981685 +0200 ++++ gcc/config.gcc 2016-05-11 17:17:32.000000000 +0200 +@@ -452,7 +452,7 @@ s390*-*-*) + cpu_type=s390 + need_64bit_hwint=yes + extra_options="${extra_options} fused-madd.opt" +- extra_headers="s390intrin.h htmintrin.h htmxlintrin.h" ++ extra_headers="s390intrin.h htmintrin.h htmxlintrin.h vecintrin.h" + ;; + # Note the 'l'; we need to be able to match e.g. "shle" or "shl". + sh[123456789lbe]*-*-* | sh-*-*) +@@ -2249,27 +2249,35 @@ rx-*-elf*) + s390-*-linux*) + default_gnu_indirect_function=yes + tm_file="s390/s390.h dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h s390/linux.h" ++ c_target_objs="${c_target_objs} s390-c.o" ++ cxx_target_objs="${cxx_target_objs} s390-c.o" + if test x$enable_targets = xall; then + tmake_file="${tmake_file} s390/t-linux64" + fi ++ tmake_file="${tmake_file} s390/t-s390" + ;; + s390x-*-linux*) + default_gnu_indirect_function=yes + tm_file="s390/s390x.h s390/s390.h dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h s390/linux.h" + tm_p_file=s390/s390-protos.h ++ c_target_objs="${c_target_objs} s390-c.o" ++ cxx_target_objs="${cxx_target_objs} s390-c.o" + md_file=s390/s390.md + extra_modes=s390/s390-modes.def + out_file=s390/s390.c +- tmake_file="${tmake_file} s390/t-linux64" ++ tmake_file="${tmake_file} s390/t-linux64 s390/t-s390" + ;; + s390x-ibm-tpf*) +- tm_file="s390/s390x.h s390/s390.h dbxelf.h elfos.h s390/tpf.h" +- tm_p_file=s390/s390-protos.h +- md_file=s390/s390.md +- extra_modes=s390/s390-modes.def +- out_file=s390/s390.c +- thread_file='tpf' ++ tm_file="s390/s390x.h s390/s390.h dbxelf.h elfos.h s390/tpf.h" ++ tm_p_file=s390/s390-protos.h ++ c_target_objs="${c_target_objs} s390-c.o" ++ cxx_target_objs="${cxx_target_objs} s390-c.o" ++ md_file=s390/s390.md ++ extra_modes=s390/s390-modes.def ++ out_file=s390/s390.c ++ thread_file='tpf' + extra_options="${extra_options} s390/tpf.opt" ++ tmake_file="${tmake_file} s390/t-s390" + ;; + score-*-elf) + gas=yes +@@ -3603,7 +3611,7 @@ case "${target}" in + for which in arch tune; do + eval "val=\$with_$which" + case ${val} in +- "" | g5 | g6 | z900 | z990 | z9-109 | z9-ec | z10 | z196 | zEC12) ++ "" | g5 | g6 | z900 | z990 | z9-109 | z9-ec | z10 | z196 | zEC12 | z13) + # OK + ;; + *) +--- gcc/configure 2016-05-11 14:46:08.719976035 +0200 ++++ gcc/configure 2016-05-11 19:41:14.975813805 +0200 +@@ -26000,6 +26000,42 @@ $as_echo "#define HAVE_LD_PERSONALITY_RE + + fi + ;; ++ s390*-*-*) ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .gnu_attribute support" >&5 ++$as_echo_n "checking assembler for .gnu_attribute support... " >&6; } ++if test "${gcc_cv_as_s390_gnu_attribute+set}" = set; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_s390_gnu_attribute=no ++ if test $in_tree_gas = yes; then ++ if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 18 \) \* 1000 + 0` ++ then gcc_cv_as_s390_gnu_attribute=yes ++fi ++ elif test x$gcc_cv_as != x; then ++ $as_echo '.gnu_attribute 8,1' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_s390_gnu_attribute=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_s390_gnu_attribute" >&5 ++$as_echo "$gcc_cv_as_s390_gnu_attribute" >&6; } ++if test $gcc_cv_as_s390_gnu_attribute = yes; then ++ ++$as_echo "#define HAVE_AS_GNU_ATTRIBUTE 1" >>confdefs.h ++ ++fi ++ ;; + esac + + # Mips and HP-UX need the GNU assembler. +--- gcc/configure.ac 2015-06-18 16:32:50.000000000 +0200 ++++ gcc/configure.ac 2016-05-11 19:34:04.507631160 +0200 +@@ -4207,6 +4207,13 @@ EOF + pointers into PC-relative form.]) + fi + ;; ++ s390*-*-*) ++ gcc_GAS_CHECK_FEATURE([.gnu_attribute support], ++ gcc_cv_as_s390_gnu_attribute, [2,18,0],, ++ [.gnu_attribute 8,1],, ++ [AC_DEFINE(HAVE_AS_GNU_ATTRIBUTE, 1, ++ [Define if your assembler supports .gnu_attribute.])]) ++ ;; + esac + + # Mips and HP-UX need the GNU assembler. +--- gcc/doc/invoke.texi 2016-05-11 14:46:08.615977431 +0200 ++++ gcc/doc/invoke.texi 2016-05-11 19:27:23.065121001 +0200 +@@ -885,6 +885,7 @@ See RS/6000 and PowerPC Options. + -mbackchain -mno-backchain -mpacked-stack -mno-packed-stack @gol + -msmall-exec -mno-small-exec -mmvcle -mno-mvcle @gol + -m64 -m31 -mdebug -mno-debug -mesa -mzarch @gol ++-mhtm -mvx -mzvector @gol + -mtpf-trace -mno-tpf-trace -mfused-madd -mno-fused-madd @gol + -mwarn-framesize -mwarn-dynamicstack -mstack-size -mstack-guard @gol + -mhotpatch=@var{halfwords},@var{halfwords}} +@@ -18596,6 +18597,46 @@ When generating code compliant to the GN + the default is @option{-mesa}. When generating code compliant + to the GNU/Linux for zSeries ABI, the default is @option{-mzarch}. + ++@item -mhtm ++@itemx -mno-htm ++@opindex mhtm ++@opindex mno-htm ++The @option{-mhtm} option enables a set of builtins making use of ++instructions available with the transactional execution facility ++introduced with the IBM zEnterprise EC12 machine generation ++@ref{S/390 System z Built-in Functions}. ++@option{-mhtm} is enabled by default when using @option{-march=zEC12}. ++ ++@item -mvx ++@itemx -mno-vx ++@opindex mvx ++@opindex mno-vx ++When @option{-mvx} is specified, generate code using the instructions ++available with the vector extension facility introduced with the IBM ++z13 machine generation. ++This option changes the ABI for some vector type values with regard to ++alignment and calling conventions. In case vector type values are ++being used in an ABI-relevant context a GAS @samp{.gnu_attribute} ++command will be added to mark the resulting binary with the ABI used. ++@option{-mvx} is enabled by default when using @option{-march=z13}. ++ ++@item -mzvector ++@itemx -mno-zvector ++@opindex mzvector ++@opindex mno-zvector ++The @option{-mzvector} option enables vector language extensions and ++builtins using instructions available with the vector extension ++facility introduced with the IBM z13 machine generation. ++This option adds support for @samp{vector} to be used as a keyword to ++define vector type variables and arguments. @samp{vector} is only ++available when GNU extensions are enabled. It will not be expanded ++when requesting strict standard compliance e.g. with @option{-std=c99}. ++In addition to the GCC low-level builtins @option{-mzvector} enables ++a set of builtins added for compatibility with Altivec-style ++implementations like Power and Cell. In order to make use of these ++builtins the header file @file{vecintrin.h} needs to be included. ++@option{-mzvector} is disabled by default. ++ + @item -mmvcle + @itemx -mno-mvcle + @opindex mmvcle +@@ -18617,7 +18658,8 @@ The default is to not print debug inform + Generate code that runs on @var{cpu-type}, which is the name of a system + representing a certain processor type. Possible values for + @var{cpu-type} are @samp{g5}, @samp{g6}, @samp{z900}, @samp{z990}, +-@samp{z9-109}, @samp{z9-ec}, @samp{z10}, @samp{z196}, and @samp{zEC12}. ++@samp{z9-109}, @samp{z9-ec}, @samp{z10}, @samp{z196}, @samp{zEC12}, ++and @samp{z13}. + When generating code using the instructions available on z/Architecture, + the default is @option{-march=z900}. Otherwise, the default is + @option{-march=g5}. +--- gcc/doc/tm.texi 2016-05-11 14:46:08.216982786 +0200 ++++ gcc/doc/tm.texi 2016-05-11 15:41:36.000000000 +0200 +@@ -8983,6 +8983,13 @@ register in Dwarf. Otherwise, this hook + If not defined, the default is to return @code{NULL_RTX}. + @end deftypefn + ++@deftypefn {Target Hook} {enum machine_mode} TARGET_DWARF_FRAME_REG_MODE (int @var{regno}) ++Given a register, this hook should return the mode which the ++corresponding Dwarf frame register should have. This is normally ++used to return a smaller mode than the raw mode to prevent call ++clobbered parts of a register altering the frame register size ++@end deftypefn ++ + @deftypefn {Target Hook} void TARGET_INIT_DWARF_REG_SIZES_EXTRA (tree @var{address}) + If some registers are represented in Dwarf-2 unwind information in + multiple pieces, define this hook to fill in information about the +--- gcc/doc/tm.texi.in 2016-05-11 14:46:08.213982826 +0200 ++++ gcc/doc/tm.texi.in 2016-05-11 15:41:36.000000000 +0200 +@@ -8854,6 +8854,8 @@ register in Dwarf. Otherwise, this hook + If not defined, the default is to return @code{NULL_RTX}. + @end deftypefn + ++@hook TARGET_DWARF_FRAME_REG_MODE ++ + @hook TARGET_INIT_DWARF_REG_SIZES_EXTRA + If some registers are represented in Dwarf-2 unwind information in + multiple pieces, define this hook to fill in information about the +--- gcc/dwarf2cfi.c 2013-01-21 16:10:46.000000000 +0100 ++++ gcc/dwarf2cfi.c 2016-05-11 15:41:36.000000000 +0200 +@@ -244,11 +244,9 @@ expand_builtin_init_dwarf_reg_sizes (tre + if (rnum < DWARF_FRAME_REGISTERS) + { + HOST_WIDE_INT offset = rnum * GET_MODE_SIZE (mode); +- enum machine_mode save_mode = reg_raw_mode[i]; + HOST_WIDE_INT size; ++ enum machine_mode save_mode = targetm.dwarf_frame_reg_mode (i); + +- if (HARD_REGNO_CALL_PART_CLOBBERED (i, save_mode)) +- save_mode = choose_hard_reg_mode (i, 1, true); + if (dnum == DWARF_FRAME_RETURN_COLUMN) + { + if (save_mode == VOIDmode) +--- gcc/genattrtab.c 2013-01-21 16:08:23.000000000 +0100 ++++ gcc/genattrtab.c 2016-05-11 17:32:29.000000000 +0200 +@@ -229,7 +229,7 @@ static int *insn_n_alternatives; + /* Stores, for each insn code, a bitmap that has bits on for each possible + alternative. */ + +-static int *insn_alternatives; ++static uint64_t *insn_alternatives; + + /* Used to simplify expressions. */ + +@@ -257,7 +257,7 @@ static char *attr_printf (unsi + ATTRIBUTE_PRINTF_2; + static rtx make_numeric_value (int); + static struct attr_desc *find_attr (const char **, int); +-static rtx mk_attr_alt (int); ++static rtx mk_attr_alt (uint64_t); + static char *next_comma_elt (const char **); + static rtx insert_right_side (enum rtx_code, rtx, rtx, int, int); + static rtx copy_boolean (rtx); +@@ -771,7 +771,7 @@ check_attr_test (rtx exp, int is_const, + if (attr == NULL) + { + if (! strcmp (XSTR (exp, 0), "alternative")) +- return mk_attr_alt (1 << atoi (XSTR (exp, 1))); ++ return mk_attr_alt (((uint64_t) 1) << atoi (XSTR (exp, 1))); + else + fatal ("unknown attribute `%s' in EQ_ATTR", XSTR (exp, 0)); + } +@@ -817,7 +817,7 @@ check_attr_test (rtx exp, int is_const, + + name_ptr = XSTR (exp, 1); + while ((p = next_comma_elt (&name_ptr)) != NULL) +- set |= 1 << atoi (p); ++ set |= ((uint64_t) 1) << atoi (p); + + return mk_attr_alt (set); + } +@@ -1292,7 +1292,7 @@ static struct attr_value * + get_attr_value (rtx value, struct attr_desc *attr, int insn_code) + { + struct attr_value *av; +- int num_alt = 0; ++ uint64_t num_alt = 0; + + value = make_canonical (attr, value); + if (compares_alternatives_p (value)) +@@ -1934,7 +1934,7 @@ insert_right_side (enum rtx_code code, r + This routine is passed an expression and either AND or IOR. It returns a + bitmask indicating which alternatives are mentioned within EXP. */ + +-static int ++static uint64_t + compute_alternative_mask (rtx exp, enum rtx_code code) + { + const char *string; +@@ -1965,15 +1965,15 @@ compute_alternative_mask (rtx exp, enum + return 0; + + if (string[1] == 0) +- return 1 << (string[0] - '0'); +- return 1 << atoi (string); ++ return ((uint64_t) 1) << (string[0] - '0'); ++ return ((uint64_t) 1) << atoi (string); + } + + /* Given I, a single-bit mask, return RTX to compare the `alternative' + attribute with the value represented by that bit. */ + + static rtx +-make_alternative_compare (int mask) ++make_alternative_compare (uint64_t mask) + { + return mk_attr_alt (mask); + } +@@ -2472,7 +2472,7 @@ attr_alt_complement (rtx s) + in E. */ + + static rtx +-mk_attr_alt (int e) ++mk_attr_alt (uint64_t e) + { + rtx result = rtx_alloc (EQ_ATTR_ALT); + +@@ -2499,7 +2499,7 @@ simplify_test_exp (rtx exp, int insn_cod + struct attr_value *av; + struct insn_ent *ie; + struct attr_value_list *iv; +- int i; ++ uint64_t i; + rtx newexp = exp; + bool left_alt, right_alt; + +@@ -2779,7 +2779,7 @@ simplify_test_exp (rtx exp, int insn_cod + case EQ_ATTR: + if (XSTR (exp, 0) == alternative_name) + { +- newexp = mk_attr_alt (1 << atoi (XSTR (exp, 1))); ++ newexp = mk_attr_alt (((uint64_t) 1) << atoi (XSTR (exp, 1))); + break; + } + +@@ -5240,10 +5240,11 @@ main (int argc, char **argv) + expand_delays (); + + /* Make `insn_alternatives'. */ +- insn_alternatives = oballocvec (int, insn_code_number); ++ insn_alternatives = oballocvec (uint64_t, insn_code_number); + for (id = defs; id; id = id->next) + if (id->insn_code >= 0) +- insn_alternatives[id->insn_code] = (1 << id->num_alternatives) - 1; ++ insn_alternatives[id->insn_code] ++ = (((uint64_t) 1) << id->num_alternatives) - 1; + + /* Make `insn_n_alternatives'. */ + insn_n_alternatives = oballocvec (int, insn_code_number); +--- gcc/optabs.c 2014-05-15 10:46:12.000000000 +0200 ++++ gcc/optabs.c 2016-05-11 15:53:11.000000000 +0200 +@@ -6659,11 +6659,11 @@ expand_vec_perm (enum machine_mode mode, + enum machine_mode selmode = GET_MODE (sel); + if (u == 2) + sel = expand_simple_binop (selmode, PLUS, sel, sel, +- sel, 0, OPTAB_DIRECT); ++ NULL, 0, OPTAB_DIRECT); + else + sel = expand_simple_binop (selmode, ASHIFT, sel, + GEN_INT (exact_log2 (u)), +- sel, 0, OPTAB_DIRECT); ++ NULL, 0, OPTAB_DIRECT); + gcc_assert (sel != NULL); + + /* Broadcast the low byte each element into each of its bytes. */ +--- gcc/recog.h 2013-09-09 19:16:08.000000000 +0200 ++++ gcc/recog.h 2016-05-11 15:52:48.000000000 +0200 +@@ -21,7 +21,7 @@ along with GCC; see the file COPYING3. + #define GCC_RECOG_H + + /* Random number that should be large enough for all purposes. */ +-#define MAX_RECOG_ALTERNATIVES 30 ++#define MAX_RECOG_ALTERNATIVES 35 + + /* Types of operands. */ + enum op_type { +--- gcc/target.def 2013-03-04 12:46:23.000000000 +0100 ++++ gcc/target.def 2016-05-11 15:41:36.000000000 +0200 +@@ -1834,6 +1834,17 @@ DEFHOOK + rtx, (rtx reg), + hook_rtx_rtx_null) + ++/* Given a register return the mode of the corresponding DWARF frame ++ register. */ ++DEFHOOK ++(dwarf_frame_reg_mode, ++ "Given a register, this hook should return the mode which the\n\ ++corresponding Dwarf frame register should have. This is normally\n\ ++used to return a smaller mode than the raw mode to prevent call\n\ ++clobbered parts of a register altering the frame register size", ++ enum machine_mode, (int regno), ++ default_dwarf_frame_reg_mode) ++ + /* If expand_builtin_init_dwarf_reg_sizes needs to fill in table + entries not corresponding directly to registers below + FIRST_PSEUDO_REGISTER, this hook should generate the necessary +--- gcc/targhooks.c 2013-01-21 16:02:59.000000000 +0100 ++++ gcc/targhooks.c 2016-05-11 15:41:36.000000000 +0200 +@@ -1411,6 +1411,19 @@ default_debug_unwind_info (void) + return UI_NONE; + } + ++/* Determine the correct mode for a Dwarf frame register that represents ++ register REGNO. */ ++ ++enum machine_mode ++default_dwarf_frame_reg_mode (int regno) ++{ ++ enum machine_mode save_mode = reg_raw_mode[regno]; ++ ++ if (HARD_REGNO_CALL_PART_CLOBBERED (regno, save_mode)) ++ save_mode = choose_hard_reg_mode (regno, 1, true); ++ return save_mode; ++} ++ + /* To be used by targets where reg_raw_mode doesn't return the right + mode for registers used in apply_builtin_return and apply_builtin_arg. */ + +--- gcc/targhooks.h 2013-01-21 16:03:00.000000000 +0100 ++++ gcc/targhooks.h 2016-05-11 15:42:21.000000000 +0200 +@@ -186,6 +186,7 @@ extern int default_label_align_max_skip + extern int default_jump_align_max_skip (rtx); + extern section * default_function_section(tree decl, enum node_frequency freq, + bool startup, bool exit); ++extern enum machine_mode default_dwarf_frame_reg_mode (int); + extern enum machine_mode default_get_reg_raw_mode(int); + + extern void *default_get_pch_validity (size_t *); +--- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c 2012-12-13 11:28:46.000000000 +0100 ++++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c 2016-05-11 17:30:16.000000000 +0200 +@@ -1,5 +1,6 @@ + /* { dg-do run { target vect_cmdline_needed } } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ ++/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-vx" { target { s390*-*-* } } } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ + + #include +--- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c 2012-12-13 11:28:46.000000000 +0100 ++++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c 2016-05-11 17:30:16.000000000 +0200 +@@ -1,5 +1,6 @@ + /* { dg-do run { target vect_cmdline_needed } } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ ++/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-vx" { target { s390*-*-* } } } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ + + #include +--- gcc/testsuite/gcc.target/s390/htm-builtins-z13-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/htm-builtins-z13-1.c 2016-05-11 17:34:03.000000000 +0200 +@@ -0,0 +1,34 @@ ++/* Verify if VRs are saved and restored. */ ++ ++/* { dg-do run } */ ++/* { dg-require-effective-target vector } */ ++/* { dg-options "-O3 -march=z13 -mzarch" } */ ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++v4si __attribute__((noinline)) ++foo (v4si a) ++{ ++ a += (v4si){ 1, 1, 1, 1 }; ++ if (__builtin_tbegin (0) == 0) ++ { ++ a += (v4si){ 1, 1, 1, 1 }; ++ __builtin_tabort (256); ++ __builtin_tend (); ++ } ++ else ++ a -= (v4si){ 1, 1, 1, 1 }; ++ ++ return a; ++} ++ ++int ++main () ++{ ++ v4si a = (v4si){ 0, 0, 0, 0 }; ++ ++ a = foo (a); ++ ++ if (a[0] != 0) ++ __builtin_abort (); ++} +--- gcc/testsuite/gcc.target/s390/s390.exp 2015-06-18 16:32:12.000000000 +0200 ++++ gcc/testsuite/gcc.target/s390/s390.exp 2016-05-11 17:12:20.000000000 +0200 +@@ -37,6 +37,21 @@ proc check_effective_target_htm { } { + }] "-march=zEC12 -mzarch" ] } { return 0 } else { return 1 } + } + ++# Return 1 if vector (va - vector add) instructions are understood by ++# the assembler and can be executed. This also covers checking for ++# the VX kernel feature. A kernel without that feature does not ++# enable the vector facility and the following check will die with a ++# signal. ++proc check_effective_target_vector { } { ++ if { ![check_runtime s390_check_vector [subst { ++ int main (void) ++ { ++ asm ("va %%v24, %%v26, %%v28, 3" : : : "v24", "v26", "v28"); ++ return 0; ++ } ++ }] "-march=z13 -mzarch" ] } { return 0 } else { return 1 } ++} ++ + # If a testcase doesn't have special options, use these. + global DEFAULT_CFLAGS + if ![info exists DEFAULT_CFLAGS] then { +@@ -59,5 +74,8 @@ set-torture-options $HOTPATCH_TEST_OPTS + gcc-dg-runtest [lsort [glob -nocomplain $hotpatch_tests]] $DEFAULT_CFLAGS + torture-finish + ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.\[cS\]]] \ ++ "" $DEFAULT_CFLAGS ++ + # All done. + dg-finish +--- gcc/testsuite/gcc.target/s390/vector/int128-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/int128-1.c 2016-05-11 18:10:56.000000000 +0200 +@@ -0,0 +1,47 @@ ++/* Check that vaq/vsq are used for int128 operations. */ ++ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++ ++const __int128 c = (__int128)0x0123456789abcd55 + ((__int128)7 << 64); ++ ++ ++__int128 ++addreg(__int128 a, __int128 b) ++{ ++ return a + b; ++} ++ ++__int128 ++addconst(__int128 a) ++{ ++ return a + c; ++} ++ ++__int128 ++addmem(__int128 *a, __int128_t *b) ++{ ++ return *a + *b; ++} ++ ++__int128 ++subreg(__int128 a, __int128 b) ++{ ++ return a - b; ++} ++ ++__int128 ++subconst(__int128 a) ++{ ++ return a - c; /* This becomes vaq as well. */ ++} ++ ++__int128 ++submem(__int128 *a, __int128_t *b) ++{ ++ return *a - *b; ++} ++ ++/* { dg-final { scan-assembler-times "vaq" 4 } } */ ++/* { dg-final { scan-assembler-times "vsq" 2 } } */ +--- gcc/testsuite/gcc.target/s390/vector/stpcpy-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/stpcpy-1.c 2016-05-11 18:11:22.000000000 +0200 +@@ -0,0 +1,100 @@ ++/* The z13 stpcpy implementation plays some alignment tricks for good ++ performance. This test tries to make sure it works correctly and ++ does not access bytes beyond the source and destination ++ strings. */ ++ ++/* { dg-do run } */ ++/* { dg-require-effective-target vector } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++#include ++#include ++ ++#define PAGE_SIZE 4096 ++ ++struct { ++ char unused[PAGE_SIZE - 32]; ++ char m32[15]; /* page bndry - 32 */ ++ char m17[1]; ++ char m16[1]; ++ char m15[14]; ++ char m1[1]; ++ char next_page[PAGE_SIZE]; ++} s, d __attribute__((aligned(PAGE_SIZE))); ++ ++char *__attribute__((noinline)) ++my_stpcpy(char *dest, const char *src) ++{ ++ return __builtin_stpcpy (dest, src); ++} ++ ++void __attribute__ ((noinline)) ++check (char *dest, char *src, size_t len) ++{ ++ char *result; ++ ++ result = my_stpcpy (dest, src); ++ if (result != dest + len) ++ __builtin_abort (); ++ if (__builtin_memcmp (src, dest, len) != 0) ++ __builtin_abort (); ++} ++ ++int ++main () ++{ ++ char *src[5] = { s.m32, s.m17, s.m16, s.m15, s.m1 }; ++ char *dst[5] = { d.m32, d.m17, d.m16, d.m15, d.m1 }; ++ int len[8] = { 33, 32, 31, 17, 16, 15, 1, 0 }; ++ int i, j, k; ++ char backup; ++ ++ for (i = 0; i < sizeof (s); i++) ++ ((char*)&s)[i] = i % 26 + 97; ++ ++ for (i = 0; i < 5; i++) ++ for (j = 0; j < 5; j++) ++ for (k = 0; k < 8; k++) ++ { ++ backup = src[j][len[k]]; ++ src[j][len[k]] = 0; ++ __builtin_memset (&d, 0, sizeof (d)); ++ check (dst[i], src[j], len[k]); ++ src[j][len[k]] = backup; ++ } ++ ++ /* Make all source strings end before the page boundary. */ ++ backup = s.m1[0]; ++ s.m1[0] = 0; ++ ++ if (mprotect (&s.next_page, PAGE_SIZE, PROT_NONE) == -1) ++ perror ("mprotect src"); ++ ++ for (i = 0; i < 5; i++) ++ for (j = 0; j < 5; j++) ++ check (dst[i], src[j], ++ PAGE_SIZE - ((unsigned long)src[j] & ((1UL << 12) - 1)) - 1); ++ ++ if (mprotect (&s.next_page, PAGE_SIZE, PROT_READ | PROT_WRITE) == -1) ++ perror ("mprotect src"); ++ ++ s.m1[0] = backup; ++ ++ if (mprotect (&d.next_page, PAGE_SIZE, PROT_NONE) == -1) ++ perror ("mprotect dst"); ++ ++ for (i = 0; i < 5; i++) ++ for (j = 0; j < 5; j++) ++ { ++ int len = PAGE_SIZE - ((unsigned long)dst[i] & ((1UL << 12) - 1)) - 1; ++ char backup = src[j][len]; ++ ++ src[j][len] = 0; ++ __builtin_memset (&d, 0, ++ (unsigned long)&d.next_page - (unsigned long)&d); ++ check (dst[i], src[j], len); ++ src[j][len] = backup; ++ } ++ ++ return 0; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-1.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,18 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* Make sure the last argument is fetched from the argument overflow area. */ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,160\\(%r15\\)" { target lp64 } } } */ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,96\\(%r15\\)" { target ilp32 } } } */ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++typedef double v2df __attribute__((vector_size(16))); ++ ++v2df ++add (v2df a, v2df b, v2df c, v2df d, ++ v2df e, v2df f, v2df g, v2df h, v2df i) ++{ ++ return a + b + c + d + e + f + g + h + i; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-2.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,15 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* This needs to be v24 = v24 * v26 + v28 */ ++/* { dg-final { scan-assembler "vfmadb\t%v24,%v24,%v26,%v28" } } */ ++ ++typedef double v2df __attribute__((vector_size(16))); ++ ++v2df ++madd (v2df a, v2df b, v2df c) ++{ ++ return a * b + c; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-3.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-3.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,101 @@ ++/* Check calling convention in the vector ABI regarding vector like structs. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* addA */ ++/* { dg-final { scan-assembler-times "vfadb\t%v24,%v24,%v26" 1 } } */ ++ ++/* addB and addE*/ ++/* { dg-final { scan-assembler-times "vah\t%v24,%v\[0-9\]*,%v\[0-9\]*" 2 } } */ ++ ++/* addC */ ++/* { dg-final { scan-assembler-times "vag\t%v24,%v\[0-9\]*,%v\[0-9\]*" 1 } } */ ++ ++/* addB and addC are expected to read the arguments via pointers in r2 and r3 */ ++/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,0\\(%r2\\)" 2 } } */ ++/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,0\\(%r3\\)" 2 } } */ ++ ++/* addD */ ++/* { dg-final { scan-assembler-times "vaf\t%v24,%v24,%v26" 1 } } */ ++ ++/* addE */ ++/* { dg-final { scan-assembler-times "vah\t%v24,%v24,%v26" 1 } } */ ++ ++/* addF */ ++/* { dg-final { scan-assembler-times "vab\t%v24,%v\[0-9\]*,%v\[0-9\]*" 1 } } */ ++/* { dg-final { scan-assembler-times "srlg\t%r\[0-9\]*,%r2,32" 1 { target lp64 } } } */ ++/* { dg-final { scan-assembler-times "srlg\t%r\[0-9\]*,%r3,32" 1 { target lp64 } } } */ ++/* { dg-final { scan-assembler-times "llgfr\t%.*,%r2" 1 { target { ! lp64 } } } } */ ++/* { dg-final { scan-assembler-times "llgfr\t%.*,%r4" 1 { target { ! lp64 } } } } */ ++ ++ ++typedef double v2df __attribute__((vector_size(16))); ++typedef long long v2di __attribute__((vector_size(16))); ++typedef int v4si __attribute__((vector_size(16))); ++typedef short v8hi __attribute__((vector_size(16))); ++ ++typedef short v2hi __attribute__((vector_size(4))); ++typedef char v4qi __attribute__((vector_size(4))); ++ ++/* Vector like structs are passed in VRs. */ ++struct A { v2df a; }; ++ ++v2df ++addA (struct A a, struct A b) ++{ ++ return a.a + b.a; ++} ++ ++/* Only single element vectors qualify as vector type parms. This one ++ is passed as a struct. Since it is bigger than 8 bytes it is passed ++ on the stack with the reference being put into r2/r3. */ ++struct B { v8hi a; char b;}; ++ ++v8hi ++addB (struct B a, struct B b) ++{ ++ return a.a + b.a; ++} ++ ++/* The resulting struct is bigger than 16 bytes and therefore passed ++ on the stack with the references residing in r2/r3. */ ++struct C { v2di __attribute__((aligned(32))) a; }; ++ ++v2di ++addC (struct C a, struct C b) ++{ ++ return a.a + b.a; ++} ++ ++/* The attribute here does not have any effect. So this struct stays ++ vector like and hence is passed in a VR. */ ++struct D { v4si __attribute__((aligned(16))) a; }; ++ ++v4si ++addD (struct D a, struct D b) ++{ ++ return a.a + b.a; ++} ++ ++ ++/* Smaller vectors are passed in vector registers. This also applies ++ for vector like structs. */ ++struct E { v2hi a; }; ++ ++v2hi ++addE (struct E a, struct E b) ++{ ++ return a.a + b.a; ++} ++ ++/* This struct is not passed in VRs because of padding. But since it ++ fits in a GPR and has a power of two size. It is passed in ++ GPRs. */ ++struct F { v4qi __attribute__((aligned(8))) a; }; ++ ++v4qi ++addF (struct F a, struct F b) ++{ ++ return a.a + b.a; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-4.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-4.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,19 @@ ++/* Check calling convention in the vector ABI. Smaller vector need to ++ be placed left-justified in the stack slot. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "lde\t%.*,160\\\(%r15\\\)" 1 { target lp64 } } } */ ++/* { dg-final { scan-assembler-times "lde\t%.*,168\\\(%r15\\\)" 1 { target lp64 } } } */ ++/* { dg-final { scan-assembler-times "lde\t%.*,96\\\(%r15\\\)" 1 { target { ! lp64 } } } } */ ++/* { dg-final { scan-assembler-times "lde\t%.*,100\\\(%r15\\\)" 1 { target { ! lp64 } } } } */ ++ ++typedef char __attribute__((vector_size(4))) v4qi; ++ ++v4qi ++foo (v4qi a, v4qi b, v4qi c, v4qi d, v4qi e, ++ v4qi f, v4qi g, v4qi h, v4qi i, v4qi j) ++{ ++ return (a + b + c + d + e + f + g + h + i + j); ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-align-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-align-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,48 @@ ++/* Check alignment convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++#include ++ ++/* Vector types get an 8 byte alignment. */ ++typedef double v2df __attribute__((vector_size(16))); ++typedef struct ++{ ++ char a; ++ v2df b; ++} A; ++char c1[offsetof (A, b) == 8 ? 0 : -1]; ++ ++/* Smaller vector allow for smaller alignments. */ ++typedef char v4qi __attribute__((vector_size(4))); ++typedef struct ++{ ++ char a; ++ v4qi b; ++} B; ++char c2[offsetof (B, b) == 4 ? 0 : -1]; ++ ++ ++typedef double v4df __attribute__((vector_size(32))); ++typedef struct ++{ ++ char a; ++ v4df b; ++} C; ++char c3[offsetof (C, b) == 8 ? 0 : -1]; ++ ++/* However, we allow the programmer to chose a bigger alignment. */ ++typedef struct ++{ ++ char a; ++ v2df b __attribute__((aligned(16))); ++} D; ++char c4[offsetof (D, b) == 16 ? 0 : -1]; ++ ++typedef struct ++{ ++ char a; ++ v2df b; ++} __attribute__((packed)) E; ++char c5[offsetof (E, b) == 1 ? 0 : -1]; +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-1.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,18 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mno-vx" } */ ++ ++/* The function passes arguments whose calling conventions change with ++ -mvx/-mno-vx. In that case GCC has to emit the ABI attribute to ++ allow GDB and Binutils to detect this. */ ++/* { dg-final { scan-assembler "gnu_attribute 8, 1" } } */ ++ ++typedef double v2df __attribute__((vector_size(16))); ++ ++v2df ++add (v2df a, v2df b, v2df c, v2df d, ++ v2df e, v2df f, v2df g, v2df h, v2df i) ++{ ++ return a + b + c + d + e + f + g + h + i; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-2.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,53 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* No abi attribute should be emitted when nothing relevant happened. */ ++/* { dg-final { scan-assembler-not "gnu_attribute" } } */ ++ ++#include ++ ++/* Local use is ok. */ ++ ++typedef int v4si __attribute__((vector_size(16))); ++ ++static ++v4si __attribute__((__noinline__)) ++foo (v4si a) ++{ ++ return a + (v4si){ 1, 2, 3, 4 }; ++} ++ ++int ++bar (int a) ++{ ++ return foo ((v4si){ 1, 1, 1, 1 })[1]; ++} ++ ++/* Big vector type only used as function argument and return value ++ without being a struct/union member. The alignment change is not ++ relevant here. */ ++ ++typedef double v4df __attribute__((vector_size(32))); ++ ++v4df ++add (v4df a, v4df b, v4df c, v4df d, ++ v4df e, v4df f, v4df g, v4df h, v4df i) ++{ ++ return a + b + c + d + e + f + g + h + i; ++} ++ ++double ++bar2 (int n, ...) ++{ ++ double ret; ++ v4df a; ++ va_list va; ++ ++ va_start (va, n); ++ ret = va_arg (va, v4df)[2]; ++ va_end (va); ++ ++ return ret; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-3.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-3.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,18 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++typedef double v4df __attribute__((vector_size(32))); ++typedef struct { v4df a; } s; ++ ++s ++add (v4df a, v4df b, v4df c, v4df d, ++ v4df e, v4df f, v4df g, v4df h, v4df i) ++{ ++ s t; ++ t.a = a + b + c + d + e + f + g + h + i; ++ return t; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-4.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-4.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,17 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++extern void bar (v4si); ++ ++void ++foo (int a) ++{ ++ v4si b = (v4si){ a, a, a, a }; ++ bar (b); ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-5.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-5.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,19 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++#include ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++extern void bar (int, ...); ++ ++void ++foo (int a) ++{ ++ v4si b = (v4si){ a, a, a, a }; ++ bar (1, b); ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-6.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-6.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,24 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++#include ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++int ++bar (int n, ...) ++{ ++ int ret; ++ v4si a; ++ va_list va; ++ ++ va_start (va, n); ++ ret = va_arg (va, v4si)[2]; ++ va_end (va); ++ ++ return ret; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-single-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-single-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,24 @@ ++/* Check calling convention in the vector ABI for single element vectors. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "vlr\t%v24,%v26" 7 } } */ ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++typedef char __attribute__((vector_size(1))) v1qi; ++typedef short int __attribute__((vector_size(2))) v1hi; ++typedef int __attribute__((vector_size(4))) v1si; ++typedef long long __attribute__((vector_size(8))) v1di; ++typedef float __attribute__((vector_size(4))) v1sf; ++typedef double __attribute__((vector_size(8))) v1df; ++typedef long double __attribute__((vector_size(16))) v1tf; ++ ++v1qi foo1 (v4si a, v1qi b) { return b; } ++v1hi foo2 (v4si a, v1hi b) { return b; } ++v1si foo3 (v4si a, v1si b) { return b; } ++v1di foo4 (v4si a, v1di b) { return b; } ++v1sf foo5 (v4si a, v1sf b) { return b; } ++v1df foo6 (v4si a, v1df b) { return b; } ++v1tf foo7 (v4si a, v1tf b) { return b; } +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-single-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-single-2.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,12 @@ ++/* Check calling convention in the vector ABI for single element vectors. */ ++ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "vlr\t%v24,%v26" 1 } } */ ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++typedef __int128_t __attribute__((vector_size(16))) v1ti; ++ ++v1ti foo (v4si a, v1ti b) { return b; } +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-struct-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-struct-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,37 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* c.i and c.j are passed by reference since a struct with two ++ elements is no vector type argument. */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,0\\(%r3\\)" } } */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,8\\(%r3\\)" } } */ ++ ++/* just_v2si is passed in a vector reg if it as an incoming arg. ++ However, as return value it is passed via hidden first pointer ++ argument. */ ++/* { dg-final { scan-assembler ".*st.*\t%v\[0-9\]*,0\\(%r2\\)" } } */ ++ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++typedef int __attribute__ ((vector_size(8))) v2si; ++ ++struct just_v2si ++{ ++ v2si i; ++}; ++ ++struct two_v2si ++{ ++ v2si i, j; ++}; ++ ++struct just_v2si ++add_structvecs (v2si a, struct just_v2si b, struct two_v2si c) ++{ ++ struct just_v2si res; ++ ++ res.i = a + b.i + c.i + c.j; ++ return res; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,60 @@ ++/* Check calling convention with variable argument lists in the vector ++ ABI. */ ++ ++/* { dg-do run { target { s390*-*-* } } } */ ++/* { dg-require-effective-target vector } */ ++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */ ++ ++/* Make sure arguments are fetched from the argument overflow area. */ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,352\\(%r15\\)" { target lp64 } } } */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,368\\(%r15\\)" { target lp64 } } } */ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,376\\(%r15\\)" { target lp64 } } } */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,392\\(%r15\\)" { target lp64 } } } */ ++ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,208\\(%r15\\)" { target ilp32 } } } */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,224\\(%r15\\)" { target ilp32 } } } */ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,232\\(%r15\\)" { target ilp32 } } } */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,248\\(%r15\\)" { target ilp32 } } } */ ++ ++/* { dg-final { cleanup-saved-temps } } */ ++ ++#include ++ ++extern void abort (void); ++ ++typedef long long v2di __attribute__((vector_size(16))); ++typedef int v2si __attribute__((vector_size(8))); ++ ++v2di __attribute__((noinline)) ++add (int a, ...) ++{ ++ int i; ++ va_list va; ++ v2di di_result = { 0, 0 }; ++ v2si si_result = (v2si){ 0, 0 }; ++ ++ va_start (va, a); ++ ++ di_result += va_arg (va, v2di); ++ si_result += va_arg (va, v2si); ++ di_result += va_arg (va, v2di); ++ si_result += va_arg (va, v2si); ++ ++ va_end (va); ++ ++ di_result[0] += si_result[0]; ++ di_result[1] += si_result[1]; ++ ++ return di_result; ++} ++ ++int ++main () ++{ ++ v2di r = add (4, (v2di){ 11, 21 }, (v2si){ 12, 22 }, (v2di){ 13, 23 }, (v2si){ 14, 24 }); ++ ++ if (r[0] != 50 || r[1] != 90) ++ abort (); ++ ++ return 0; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-2.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,18 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -Wno-implicit-function-declaration" } */ ++ ++ ++typedef long v2di __attribute__((vector_size(16))); ++extern v2di foo1 (int, v2di); ++extern v2di foo2 (int, int); ++extern v2di foo3 (int, ...); ++ ++v2di bar1 (int a) { return foo2 (1, a); } ++v2di bar2 (int a) { return foo3 (1, a); } ++v2di bar3 (v2di a) { return foo1 (1, a); } ++v2di bar4 (v2di a) { return foo3 (1, a); } ++ ++int bar5 (int a) { return foo4 (1, a); } ++int bar6 (v2di a) { return foo4 (1, a); } /* { dg-error "Vector argument passed to unprototyped function" } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-clobber-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-clobber-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,38 @@ ++/* { dg-do run { target { s390*-*-* } } } */ ++/* { dg-require-effective-target vector } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* For FP zero checks we use the ltdbr instruction. Since this is an ++ load and test it actually writes the FPR. Whenever an FPR gets ++ written the rest of the overlapping VR is clobbered. */ ++typedef double __attribute__((vector_size(16))) v2df; ++ ++v2df a = { 1.0, 2.0 }; ++ ++extern void abort (void); ++ ++void __attribute__((noinline)) ++foo (v2df a) ++{ ++ v2df b = { 1.0, 3.0 }; ++ ++ b -= a; ++ ++ /* Take away all the VRs not overlapping with FPRs. */ ++ asm volatile ("" : : : ++ "v16","v17","v18","v19", ++ "v20","v21","v22","v23", ++ "v24","v25","v26","v27", ++ "v28","v29","v30","v31"); ++ if (b[0] != 0.0) /* ltdbr */ ++ abort (); ++ if (b[1] != 1.0) ++ abort (); ++} ++ ++int ++main () ++{ ++ foo (a); ++ return 0; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-cmp-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-cmp-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,45 @@ ++/* Check that the proper unsigned compare instructions are being generated. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "vchlb" 1 } } */ ++/* { dg-final { scan-assembler-times "vchlh" 1 } } */ ++/* { dg-final { scan-assembler-times "vchlf" 1 } } */ ++/* { dg-final { scan-assembler-times "vchlg" 1 } } */ ++ ++typedef __attribute__((vector_size(16))) signed char v16qi; ++typedef __attribute__((vector_size(16))) unsigned char uv16qi; ++ ++typedef __attribute__((vector_size(16))) signed short v8hi; ++typedef __attribute__((vector_size(16))) unsigned short uv8hi; ++ ++typedef __attribute__((vector_size(16))) signed int v4si; ++typedef __attribute__((vector_size(16))) unsigned int uv4si; ++ ++typedef __attribute__((vector_size(16))) signed long long v2di; ++typedef __attribute__((vector_size(16))) unsigned long long uv2di; ++ ++v16qi ++f (uv16qi a, uv16qi b) ++{ ++ return a > b; ++} ++ ++v8hi ++g (uv8hi a, uv8hi b) ++{ ++ return a > b; ++} ++ ++v4si ++h (uv4si a, uv4si b) ++{ ++ return a > b; ++} ++ ++v2di ++i (uv2di a, uv2di b) ++{ ++ return a > b; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-cmp-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-cmp-2.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,38 @@ ++/* Check that the proper signed compare instructions are being generated. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "vchb" 1 } } */ ++/* { dg-final { scan-assembler-times "vchh" 1 } } */ ++/* { dg-final { scan-assembler-times "vchf" 1 } } */ ++/* { dg-final { scan-assembler-times "vchg" 1 } } */ ++ ++typedef __attribute__((vector_size(16))) signed char v16qi; ++typedef __attribute__((vector_size(16))) signed short v8hi; ++typedef __attribute__((vector_size(16))) signed int v4si; ++typedef __attribute__((vector_size(16))) signed long long v2di; ++ ++v16qi ++f (v16qi a, v16qi b) ++{ ++ return a > b; ++} ++ ++v8hi ++g (v8hi a, v8hi b) ++{ ++ return a > b; ++} ++ ++v4si ++h (v4si a, v4si b) ++{ ++ return a > b; ++} ++ ++v2di ++i (v2di a, v2di b) ++{ ++ return a > b; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-dbl-math-compile-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-dbl-math-compile-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,48 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */ ++ ++typedef __attribute__((vector_size(16))) double v2df; ++ ++v2df ++adddbl (v2df a, v2df b) ++{ ++ return a + b; ++} ++/* { dg-final { scan-assembler-times "vfadb" 1 } } */ ++ ++v2df ++subdbl (v2df a, v2df b) ++{ ++ return a - b; ++} ++/* { dg-final { scan-assembler-times "vfsdb" 1 } } */ ++ ++v2df ++muldbl (v2df a, v2df b) ++{ ++ return a * b; ++} ++/* { dg-final { scan-assembler-times "vfmdb" 1 } } */ ++ ++v2df ++divdbl (v2df a, v2df b) ++{ ++ return a / b; ++} ++/* { dg-final { scan-assembler-times "vfd" 1 } } */ ++ ++v2df ++fmadbl (v2df a, v2df b, v2df c) ++{ ++ return a * b + c; ++} ++/* { dg-final { scan-assembler-times "vfma" 1 } } */ ++ ++v2df ++fmsdbl (v2df a, v2df b, v2df c) ++{ ++ return a * b - c; ++} ++/* { dg-final { scan-assembler-times "vfms" 1 } } */ ++ ++/* { dg-final { cleanup-saved-temps } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-1.c 2016-05-11 17:38:00.000000000 +0200 +@@ -0,0 +1,83 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */ ++/* { dg-require-effective-target vector } */ ++/* { dg-require-effective-target int128 } */ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++typedef unsigned __int128 uv1ti __attribute__((vector_size(16))); ++ ++uv2di __attribute__((noinline)) ++foo1 () ++{ ++ return (uv2di){ 0xff00ff00ff00ff00, 0x00ff00ff00ff00ff }; ++} ++/* { dg-final { scan-assembler-times "vgbm\t%v24,43605" 1 } } */ ++ ++uv4si __attribute__((noinline)) ++foo2 () ++{ ++ return (uv4si){ 0xff0000ff, 0x0000ffff, 0xffff0000, 0x00ffff00 }; ++} ++/* { dg-final { scan-assembler-times "vgbm\t%v24,37830" 1 } } */ ++ ++uv8hi __attribute__((noinline)) ++foo3a () ++{ ++ return (uv8hi){ 0xff00, 0xff00, 0xff00, 0xff00, ++ 0xff00, 0xff00, 0xff00, 0xff00 }; ++} ++/* { dg-final { scan-assembler-times "vgbm\t%v24,43690" 1 } } */ ++ ++uv8hi __attribute__((noinline)) ++foo3b () ++{ ++ return (uv8hi){ 0x00ff, 0x00ff, 0x00ff, 0x00ff, ++ 0x00ff, 0x00ff, 0x00ff, 0x00ff }; ++} ++/* { dg-final { scan-assembler-times "vgbm\t%v24,21845" 1 } } */ ++ ++uv16qi __attribute__((noinline)) ++foo4 () ++{ ++ return (uv16qi){ 0xff, 0xff, 0xff, 0xff, ++ 0, 0, 0, 0, ++ 0xff, 0, 0xff, 0, ++ 0, 0xff, 0, 0xff }; ++} ++ ++uv1ti __attribute__((noinline)) ++foo5 () ++{ ++ return (uv1ti){ 0xff00ff00ff00ff00ULL }; ++} ++ ++/* { dg-final { scan-assembler-times "vgbm\t%v24,61605" 1 } } */ ++ ++int ++main () ++{ ++ if (foo1()[1] != 0x00ff00ff00ff00ffULL) ++ __builtin_abort (); ++ ++ if (foo2()[1] != 0x0000ffff) ++ __builtin_abort (); ++ ++ if (foo3a()[1] != 0xff00) ++ __builtin_abort (); ++ ++ if (foo3b()[1] != 0x00ff) ++ __builtin_abort (); ++ ++ if (foo4()[1] != 0xff) ++ __builtin_abort (); ++ ++ if (foo5()[0] != 0xff00ff00ff00ff00ULL) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { cleanup-saved-temps } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-2.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,46 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++ ++/* The elements differ. */ ++uv2di __attribute__((noinline)) ++foo1 () ++{ ++ return (uv2di){ 0x001fffffffffff00, 0x0000ffffffffff00 }; ++} ++ ++/* Non-contiguous bitmasks */ ++ ++uv4si __attribute__((noinline)) ++foo2 () ++{ ++ return (uv4si){ 0xff00100f, 0xff00100f, 0xff00100f, 0xff00100f }; ++} ++ ++uv8hi __attribute__((noinline)) ++foo3a () ++{ ++ return (uv8hi){ 0xf700, 0xf700, 0xf700, 0xf700, ++ 0xf700, 0xf700, 0xf700, 0xf700 }; ++} ++ ++uv8hi __attribute__((noinline)) ++foo3b () ++{ ++ return (uv8hi){ 0x10ff, 0x10ff, 0x10ff, 0x10ff, ++ 0x10ff, 0x10ff, 0x10ff, 0x10ff }; ++} ++ ++uv16qi __attribute__((noinline)) ++foo4 () ++{ ++ return (uv16qi){ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82 }; ++} ++/* { dg-final { scan-assembler-not "vgbm" } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-genmask-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-genmask-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,70 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */ ++/* { dg-require-effective-target vector } */ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++ ++uv2di __attribute__((noinline)) ++foo1 () ++{ ++ return (uv2di){ 0x000fffffffffff00, 0x000fffffffffff00 }; ++} ++/* { dg-final { scan-assembler-times "vgmg\t%v24,12,55" 1 } } */ ++ ++uv4si __attribute__((noinline)) ++foo2 () ++{ ++ return (uv4si){ 0xff00000f, 0xff00000f, 0xff00000f, 0xff00000f }; ++} ++/* { dg-final { scan-assembler-times "vgmf\t%v24,28,7" 1 } } */ ++ ++uv8hi __attribute__((noinline)) ++foo3a () ++{ ++ return (uv8hi){ 0xfff0, 0xfff0, 0xfff0, 0xfff0, ++ 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; ++} ++/* { dg-final { scan-assembler-times "vgmh\t%v24,0,11" 1 } } */ ++ ++uv8hi __attribute__((noinline)) ++foo3b () ++{ ++ return (uv8hi){ 0x0fff, 0x0fff, 0x0fff, 0x0fff, ++ 0x0fff, 0x0fff, 0x0fff, 0x0fff }; ++} ++/* { dg-final { scan-assembler-times "vgmh\t%v24,4,15" 1 } } */ ++ ++uv16qi __attribute__((noinline)) ++foo4 () ++{ ++ return (uv16qi){ 0x8, 0x8, 0x8, 0x8, ++ 0x8, 0x8, 0x8, 0x8, ++ 0x8, 0x8, 0x8, 0x8, ++ 0x8, 0x8, 0x8, 0x8 }; ++} ++/* { dg-final { scan-assembler-times "vgmb\t%v24,4,4" 1 } } */ ++ ++int ++main () ++{ ++ if (foo1()[1] != 0x000fffffffffff00ULL) ++ __builtin_abort (); ++ ++ if (foo2()[1] != 0xff00000f) ++ __builtin_abort (); ++ ++ if (foo3a()[1] != 0xfff0) ++ __builtin_abort (); ++ ++ if (foo3b()[1] != 0x0fff) ++ __builtin_abort (); ++ ++ if (foo4()[1] != 0x8) ++ __builtin_abort (); ++ return 0; ++} ++ ++/* { dg-final { cleanup-saved-temps } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-genmask-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-genmask-2.c 2016-05-11 17:38:00.000000000 +0200 +@@ -0,0 +1,55 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++/* { dg-require-effective-target int128 } */ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++typedef unsigned __int128 uv1ti __attribute__((vector_size(16))); ++ ++/* The elements differ. */ ++uv2di __attribute__((noinline)) ++foo1 () ++{ ++ return (uv2di){ 0x000fffffffffff00, 0x0000ffffffffff00 }; ++} ++ ++/* Non-contiguous bitmasks */ ++ ++uv4si __attribute__((noinline)) ++foo2 () ++{ ++ return (uv4si){ 0xff00100f, 0xff00100f, 0xff00100f, 0xff00100f }; ++} ++ ++uv8hi __attribute__((noinline)) ++foo3a () ++{ ++ return (uv8hi){ 0xf700, 0xf700, 0xf700, 0xf700, ++ 0xf700, 0xf700, 0xf700, 0xf700 }; ++} ++ ++uv8hi __attribute__((noinline)) ++foo3b () ++{ ++ return (uv8hi){ 0x10ff, 0x10ff, 0x10ff, 0x10ff, ++ 0x10ff, 0x10ff, 0x10ff, 0x10ff }; ++} ++ ++uv16qi __attribute__((noinline)) ++foo4 () ++{ ++ return (uv16qi){ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82 }; ++} ++ ++/* We do not have vgmq. */ ++uv1ti ++foo5() ++{ ++ return (uv1ti){ ((unsigned __int128)1 << 53) - 1 }; ++} ++/* { dg-final { scan-assembler-not "vgm" } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-init-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-init-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,68 @@ ++/* Check that the vec_init expander does its job. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++ ++ ++ ++ ++typedef __attribute__((vector_size(16))) signed int v4si; ++ ++extern v4si G; ++ ++v4si ++f (signed int a) ++{ ++ return G == a; ++} ++/* { dg-final { scan-assembler-times "vrepf" 1 } } */ ++ ++v4si ++g (signed int *a) ++{ ++ return G == *a; ++} ++/* { dg-final { scan-assembler-times "vlrepf" 1 } } */ ++ ++v4si ++h () ++{ ++ return G == 1; ++} ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,31,31" 1 } } */ ++ ++v4si ++i () ++{ ++ return G == -1; ++} ++/* { dg-final { scan-assembler-times "vone" 1 } } */ ++ ++v4si ++j () ++{ ++ return G == 0; ++} ++/* { dg-final { scan-assembler-times "vzero" 1 } } */ ++ ++v4si ++k () ++{ ++ return G == (v4si){ 0xff80, 0xff80, 0xff80, 0xff80 }; ++} ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,16,24" 1 } } */ ++ ++v4si ++l () ++{ ++ return G == (v4si){ 0xf000000f, 0xf000000f, 0xf000000f, 0xf000000f }; ++} ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,28,3" 1 } } */ ++ ++v4si ++m () ++{ ++ return G == (v4si){ 0x00ff00ff, 0x0000ffff, 0xffff0000, 0xff00ff00 }; ++} ++/* { dg-final { scan-assembler-times "vgbm\t%v.*,21450" 1 } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-int-math-compile-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-int-math-compile-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,40 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++typedef __attribute__((vector_size(16))) signed int v4si; ++ ++v4si ++adddbl (v4si a, v4si b) ++{ ++ return a + b; ++} ++ ++v4si ++subdbl (v4si a, v4si b) ++{ ++ return a - b; ++} ++ ++v4si ++muldbl (v4si a, v4si b) ++{ ++ return a * b; ++} ++ ++v4si ++divdbl (v4si a, v4si b) ++{ ++ return a / b; ++} ++ ++v4si ++fmadbl (v4si a, v4si b, v4si c) ++{ ++ return a * b + c; ++} ++ ++v4si ++fmsdbl (v4si a, v4si b, v4si c) ++{ ++ return a * b - c; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c 2016-05-11 17:12:28.000000000 +0200 +@@ -0,0 +1,49 @@ ++/* Check that we use the scalar variants of vector compares. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "wfcedbs\t%v\[0-9\]*,%v0,%v2" 2 } } */ ++/* { dg-final { scan-assembler-times "wfchdbs\t%v\[0-9\]*,%v0,%v2" 1 } } */ ++/* { dg-final { scan-assembler-times "wfchedbs\t%v\[0-9\]*,%v2,%v0" 1 } } */ ++/* { dg-final { scan-assembler-times "wfchdbs\t%v\[0-9\]*,%v2,%v0" 1 } } */ ++/* { dg-final { scan-assembler-times "wfchedbs\t%v\[0-9\]*,%v2,%v0" 1 } } */ ++/* { dg-final { scan-assembler-times "locrne" 5 } } */ ++/* { dg-final { scan-assembler-times "locrno" 1 } } */ ++ ++ ++int ++eq (double a, double b) ++{ ++ return a == b; ++} ++ ++int ++ne (double a, double b) ++{ ++ return a != b; ++} ++ ++int ++gt (double a, double b) ++{ ++ return a > b; ++} ++ ++int ++ge (double a, double b) ++{ ++ return a >= b; ++} ++ ++int ++lt (double a, double b) ++{ ++ return a < b; ++} ++ ++int ++le (double a, double b) ++{ ++ return a <= b; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-shift-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-shift-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,108 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "veslb" 2 } } */ ++/* { dg-final { scan-assembler-times "veslh" 2 } } */ ++/* { dg-final { scan-assembler-times "veslf" 2 } } */ ++/* { dg-final { scan-assembler-times "veslg" 2 } } */ ++ ++/* { dg-final { scan-assembler-times "vesrab" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrah" 1 } } */ ++/* { dg-final { scan-assembler-times "vesraf" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrag" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vesrlb" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlh" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlf" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlg" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "veslvb" 2 } } */ ++/* { dg-final { scan-assembler-times "veslvh" 2 } } */ ++/* { dg-final { scan-assembler-times "veslvf" 2 } } */ ++/* { dg-final { scan-assembler-times "veslvg" 2 } } */ ++ ++/* { dg-final { scan-assembler-times "vesravb" 1 } } */ ++/* { dg-final { scan-assembler-times "vesravh" 1 } } */ ++/* { dg-final { scan-assembler-times "vesravf" 1 } } */ ++/* { dg-final { scan-assembler-times "vesravg" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vesrlvb" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlvh" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlvf" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlvg" 1 } } */ ++ ++typedef __attribute__((vector_size(16))) signed char v16qi; ++typedef __attribute__((vector_size(16))) unsigned char uv16qi; ++ ++typedef __attribute__((vector_size(16))) signed short v8hi; ++typedef __attribute__((vector_size(16))) unsigned short uv8hi; ++ ++typedef __attribute__((vector_size(16))) signed int v4si; ++typedef __attribute__((vector_size(16))) unsigned int uv4si; ++ ++typedef __attribute__((vector_size(16))) signed long long v2di; ++typedef __attribute__((vector_size(16))) unsigned long long uv2di; ++ ++uv16qi g_uvqi0, g_uvqi1, g_uvqi2; ++v16qi g_vqi0, g_vqi1, g_vqi2; ++ ++uv8hi g_uvhi0, g_uvhi1, g_uvhi2; ++v8hi g_vhi0, g_vhi1, g_vhi2; ++ ++uv4si g_uvsi0, g_uvsi1, g_uvsi2; ++v4si g_vsi0, g_vsi1, g_vsi2; ++ ++uv2di g_uvdi0, g_uvdi1, g_uvdi2; ++v2di g_vdi0, g_vdi1, g_vdi2; ++ ++void ++shift_left_by_scalar (int s) ++{ ++ g_uvqi0 = g_uvqi1 << s; ++ g_vqi0 = g_vqi1 << s; ++ g_uvhi0 = g_uvhi1 << s; ++ g_vhi0 = g_vhi1 << s; ++ g_uvsi0 = g_uvsi1 << s; ++ g_vsi0 = g_vsi1 << s; ++ g_uvdi0 = g_uvdi1 << s; ++ g_vdi0 = g_vdi1 << s; ++} ++ ++void ++shift_right_by_scalar (int s) ++{ ++ g_uvqi0 = g_uvqi1 >> s; ++ g_vqi0 = g_vqi1 >> s; ++ g_uvhi0 = g_uvhi1 >> s; ++ g_vhi0 = g_vhi1 >> s; ++ g_uvsi0 = g_uvsi1 >> s; ++ g_vsi0 = g_vsi1 >> s; ++ g_uvdi0 = g_uvdi1 >> s; ++ g_vdi0 = g_vdi1 >> s; ++} ++ ++void ++shift_left_by_vector () ++{ ++ g_uvqi0 = g_uvqi1 << g_uvqi2; ++ g_vqi0 = g_vqi1 << g_vqi2; ++ g_uvhi0 = g_uvhi1 << g_uvhi2; ++ g_vhi0 = g_vhi1 << g_vhi2; ++ g_uvsi0 = g_uvsi1 << g_uvsi2; ++ g_vsi0 = g_vsi1 << g_vsi2; ++ g_uvdi0 = g_uvdi1 << g_uvdi2; ++ g_vdi0 = g_vdi1 << g_vdi2; ++} ++ ++void ++shift_right_by_vector () ++{ ++ g_uvqi0 = g_uvqi1 >> g_uvqi2; ++ g_vqi0 = g_vqi1 >> g_vqi2; ++ g_uvhi0 = g_uvhi1 >> g_uvhi2; ++ g_vhi0 = g_vhi1 >> g_vhi2; ++ g_uvsi0 = g_uvsi1 >> g_uvsi2; ++ g_vsi0 = g_vsi1 >> g_vsi2; ++ g_uvdi0 = g_uvdi1 >> g_uvdi2; ++ g_vdi0 = g_vdi1 >> g_vdi2; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-sub-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-sub-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,51 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "vsb" 2 } } */ ++/* { dg-final { scan-assembler-times "vsh" 2 } } */ ++/* { dg-final { scan-assembler-times "vsf" 2 } } */ ++/* { dg-final { scan-assembler-times "vsg" 2 } } */ ++/* { dg-final { scan-assembler-times "vfs" 1 } } */ ++ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef signed char v16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef signed short v8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef signed int v4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++typedef signed long long v2di __attribute__((vector_size(16))); ++typedef double v2df __attribute__((vector_size(16))); ++ ++uv16qi g_uvqi0, g_uvqi1, g_uvqi2; ++v16qi g_vqi0, g_vqi1, g_vqi2; ++ ++uv8hi g_uvhi0, g_uvhi1, g_uvhi2; ++v8hi g_vhi0, g_vhi1, g_vhi2; ++ ++uv4si g_uvsi0, g_uvsi1, g_uvsi2; ++v4si g_vsi0, g_vsi1, g_vsi2; ++ ++uv2di g_uvdi0, g_uvdi1, g_uvdi2; ++v2di g_vdi0, g_vdi1, g_vdi2; ++ ++v2df g_vdf0, g_vdf1, g_vdf2; ++ ++void ++sub1 () ++{ ++ g_vqi0 = g_vqi1 - g_vqi2; ++ g_uvqi0 = g_uvqi1 - g_uvqi2; ++ ++ g_vhi0 = g_vhi1 - g_vhi2; ++ g_uvhi0 = g_uvhi1 - g_uvhi2; ++ ++ g_vsi0 = g_vsi1 - g_vsi2; ++ g_uvsi0 = g_uvsi1 - g_uvsi2; ++ ++ g_vdi0 = g_vdi1 - g_vdi2; ++ g_uvdi0 = g_uvdi1 - g_uvdi2; ++ ++ g_vdf0 = g_vdf1 - g_vdf2; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c 2016-05-11 18:08:10.000000000 +0200 +@@ -0,0 +1,23 @@ ++/* A const vector operand is forced into a register in ++ s390_expand_vcond. ++ This testcase once failed because the target mode (v2di) was picked ++ for the reg instead of the mode of the other comparison ++ operand. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++typedef __attribute__((vector_size(16))) long v2di; ++typedef __attribute__((vector_size(16))) double v2df; ++ ++v2di ++foo (v2df a) ++{ ++ return a == (v2df){ 0.0, 0.0 }; ++} ++ ++v2di ++bar (v2df a) ++{ ++ return (v2df){ 1.0, 1.0 } == (v2df){ 0.0, 0.0 }; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-vrepi-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-vrepi-1.c 2016-05-11 17:41:29.000000000 +0200 +@@ -0,0 +1,58 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */ ++/* { dg-require-effective-target vector } */ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++ ++uv2di __attribute__((noinline)) ++foo1 () ++{ ++ return (uv2di){ 0x7f0f, 0x7f0f }; ++} ++/* { dg-final { scan-assembler-times "vrepig\t%v24,32527" 1 } } */ ++ ++uv4si __attribute__((noinline)) ++foo2 () ++{ ++ return (uv4si){ 0x7f0f, 0x7f0f, 0x7f0f, 0x7f0f }; ++} ++/* { dg-final { scan-assembler-times "vrepif\t%v24,32527" 1 } } */ ++ ++uv8hi __attribute__((noinline)) ++foo3 () ++{ ++ return (uv8hi){ 0x7f0f, 0x7f0f, 0x7f0f, 0x7f0f, ++ 0x7f0f, 0x7f0f, 0x7f0f, 0x7f0f }; ++} ++/* { dg-final { scan-assembler-times "vrepih\t%v24,32527" 1 } } */ ++ ++uv16qi __attribute__((noinline)) ++foo4 () ++{ ++ return (uv16qi){ 0x77, 0x77, 0x77, 0x77, ++ 0x77, 0x77, 0x77, 0x77, ++ 0x77, 0x77, 0x77, 0x77, ++ 0x77, 0x77, 0x77, 0x77 }; ++} ++/* { dg-final { scan-assembler-times "vrepib\t%v24,119" 1 } } */ ++ ++int ++main () ++{ ++ if (foo1()[1] != 0x7f0f) ++ __builtin_abort (); ++ ++ if (foo2()[1] != 0x7f0f) ++ __builtin_abort (); ++ ++ if (foo3()[1] != 0x7f0f) ++ __builtin_abort (); ++ ++ if (foo4()[1] != 0x77) ++ __builtin_abort (); ++ ++ return 0; ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-dbl-math-compile-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-dbl-math-compile-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,67 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector --save-temps" } */ ++ ++/* { dg-final { scan-assembler-times "vfcedb\t" 1 } } */ ++/* { dg-final { scan-assembler-times "vfchdb\t" 2 } } */ ++/* { dg-final { scan-assembler-times "vfchedb\t" 2 } } */ ++ ++/* { dg-final { scan-assembler-times "vfcedbs\t" 2 } } */ ++/* { dg-final { scan-assembler-times "vfchdbs\t" 2 } } */ ++ ++/* { dg-final { cleanup-saved-temps } } */ ++ ++#include ++ ++vector bool long long ++cmpeq (vector double a, vector double b) ++{ ++ return vec_cmpeq (a, b); /* vfcedb */ ++} ++ ++vector bool long long ++cmpgt (vector double a, vector double b) ++{ ++ return vec_cmpgt (a, b); /* vfchdb */ ++} ++ ++vector bool long long ++cmpge (vector double a, vector double b) ++{ ++ return vec_cmpge (a, b); /* vfchedb */ ++} ++ ++vector bool long long ++cmplt (vector double a, vector double b) ++{ ++ return vec_cmplt (a, b); /* vfchdb */ ++} ++ ++vector bool long long ++cmple (vector double a, vector double b) ++{ ++ return vec_cmple (a, b); /* vfchedb */ ++} ++ ++int ++all_eq (vector double a, vector double b) ++{ ++ return vec_all_eq (a, b); ++} ++ ++int ++any_eq (vector double a, vector double b) ++{ ++ return vec_any_eq (a, b); ++} ++ ++int ++all_lt (vector double a, vector double b) ++{ ++ return vec_all_lt (a, b); ++} ++ ++int ++any_lt (vector double a, vector double b) ++{ ++ return vec_any_lt (a, b); ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-elem-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-elem-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,11 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++/* { dg-final { scan-assembler "nilf\t%r2,15" } } */ ++/* { dg-final { scan-assembler "vlgvb" } } */ ++ ++signed char ++foo(unsigned char uc) ++{ ++ return __builtin_s390_vec_extract((__vector signed char){ 0 }, uc); ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-genbytemask-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-genbytemask-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++#include ++ ++ ++vector unsigned char a, b, c, d; ++ ++int ++foo () ++{ ++ a = vec_genmask (0); ++ b = vec_genmask (65535); ++ c = vec_genmask (43605); ++ d = vec_genmask (37830); ++} ++ ++/* { dg-final { scan-assembler-times "vzero" 1 } } */ ++/* { dg-final { scan-assembler-times "vone" 1 } } */ ++/* { dg-final { scan-assembler-times "vgbm\t%v.*,43605" 1 } } */ ++/* { dg-final { scan-assembler-times "vgbm\t%v.*,37830" 1 } } */ +--- gcc/testsuite/gcc.target/s390/zvector/vec-genmask-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-genmask-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++#include ++ ++ ++vector unsigned int a, b, c, d, e, f; ++ ++int ++foo () ++{ ++ a = vec_genmasks_32 (0, 31); ++ b = vec_genmasks_32 (0, 0); ++ c = vec_genmasks_32 (31, 31); ++ d = vec_genmasks_32 (5, 5); ++ e = vec_genmasks_32 (31, 0); ++ f = vec_genmasks_32 (6, 5); ++} ++/* { dg-final { scan-assembler-times "vone" 1 } } */ ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,0,0" 1 } } */ ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,31,31" 1 } } */ ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,5,5" 1 } } */ ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,31,0" 1 } } */ ++/* { dg-final { scan-assembler-times "vone" 1 } } */ +--- gcc/testsuite/gcc.target/s390/zvector/vec-lcbb-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-lcbb-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,31 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++/* { dg-final { scan-assembler-times "\tlcbb\t" 4 } } */ ++ ++#include ++ ++/* CC will be extracted into a GPR and returned. */ ++int ++foo1 (void *ptr) ++{ ++ return __lcbb (ptr, 64); ++} ++ ++int ++foo2 (void *ptr) ++{ ++ return __lcbb (ptr, 128) > 16; ++} ++ ++int ++foo3 (void *ptr) ++{ ++ return __lcbb (ptr, 256) == 16; ++} ++ ++int ++foo4 (void *ptr) ++{ ++ return __lcbb (ptr, 512) < 16; ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-load_bndry-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-load_bndry-1.c 2016-05-11 17:34:31.000000000 +0200 +@@ -0,0 +1,80 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O0 -mzarch -march=z13 -mzvector" } */ ++ ++#include ++ ++signed char ++foo64 (signed char *p) ++{ ++ return vec_load_bndry (p, 64)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),0" 1 } } */ ++} ++ ++signed char ++foo128 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 128)[0] ++ + vec_load_bndry (p + 16, 128)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),1" 2 } } */ ++} ++ ++signed char ++foo256 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 256)[0] ++ + vec_load_bndry (p + 16, 256)[0] ++ + vec_load_bndry (p + 32, 256)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),2" 3 } } */ ++} ++ ++signed char ++foo512 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 512)[0] ++ + vec_load_bndry (p + 16, 512)[0] ++ + vec_load_bndry (p + 32, 512)[0] ++ + vec_load_bndry (p + 48, 512)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),3" 4 } } */ ++} ++ ++signed char ++foo1024 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 1024)[0] ++ + vec_load_bndry (p + 16, 1024)[0] ++ + vec_load_bndry (p + 32, 1024)[0] ++ + vec_load_bndry (p + 48, 1024)[0] ++ + vec_load_bndry (p + 64, 1024)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),4" 5 } } */ ++} ++ ++signed char ++foo2048 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 2048)[0] ++ + vec_load_bndry (p + 16, 2048)[0] ++ + vec_load_bndry (p + 32, 2048)[0] ++ + vec_load_bndry (p + 48, 2048)[0] ++ + vec_load_bndry (p + 64, 2048)[0] ++ + vec_load_bndry (p + 80, 2048)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),5" 6 } } */ ++} ++ ++signed char ++foo4096 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 4096)[0] ++ + vec_load_bndry (p + 16, 4096)[0] ++ + vec_load_bndry (p + 32, 4096)[0] ++ + vec_load_bndry (p + 48, 4096)[0] ++ + vec_load_bndry (p + 64, 4096)[0] ++ + vec_load_bndry (p + 80, 4096)[0] ++ + vec_load_bndry (p + 96, 4096)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),6" 7 } } */ ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,77 @@ ++/* Test whether overloading works as expected. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-march=z13 -mzarch -mzvector -fdump-tree-original" } */ ++ ++__vector int var_v4si; ++__vector unsigned var_uv4si; ++__vector bool var_bv4si; ++__vector long long var_v2di; ++__vector unsigned long long var_uv2di; ++__vector bool long long var_bv2di; ++__vector double var_v2df; ++ ++int *intptr; ++unsigned *uintptr; ++double *dblptr; ++unsigned long long ull; ++const int *cintptr; ++long long* llptr; ++unsigned long long* ullptr; ++ ++typedef __vector int v4si; ++typedef __vector unsigned int uv4si; ++ ++v4si var2_v4si; ++uv4si var2_uv4si; ++ ++void ++foo () ++{ ++ __builtin_s390_vec_scatter_element (var_v4si, var_uv4si, intptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var2_v4si, var2_uv4si, intptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_bv4si, var_uv4si, uintptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_uv4si, var_uv4si, uintptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_v2di, var_uv2di, llptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_bv2di, var_uv2di, ullptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_uv2di, var_uv2di, ullptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_v2df, var_uv2di, dblptr, (unsigned long long)0); ++ ++ /* While the last argument is a int there is a way to convert it to ++ unsigned long long, so this variant is supposed to match. */ ++ __builtin_s390_vec_scatter_element (var_v4si, var_uv4si, intptr, 0); ++ ++ __builtin_s390_vec_insert_and_zero (intptr); ++ __builtin_s390_vec_insert_and_zero (cintptr); ++ ++ __builtin_s390_vec_promote ((signed char)1, 1); ++ __builtin_s390_vec_promote ((unsigned char)1, 1); ++ __builtin_s390_vec_promote ((short int)1, 1); ++ __builtin_s390_vec_promote ((unsigned short int)1, 1); ++ __builtin_s390_vec_promote ((int)1, 1); ++ __builtin_s390_vec_promote ((unsigned)1, 1); ++ __builtin_s390_vec_promote ((long long)1, 1); ++ __builtin_s390_vec_promote ((unsigned long long)1, 1); ++ __builtin_s390_vec_promote ((double)1, 1); ++ ++ /* This is supposed to match vec_promote_s32 */ ++ __builtin_s390_vec_promote (1, (signed char) -1); ++ ++ /* Constants in C usually are considered int. */ ++ __builtin_s390_vec_promote (1, 1); ++ ++ /* And (unsigned) long if they are too big for int. */ ++ __builtin_s390_vec_promote (1ULL << 32, 1); ++ __builtin_s390_vec_promote (1LL << 32, 1); ++} ++ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vscef " 5 "original" } } */ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vsceg " 4 "original" } } */ ++ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vllezf " 2 "original" } } */ ++ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgb_noin " 2 "original" } } */ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgh_noin " 2 "original" } } */ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgf_noin " 4 "original" } } */ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgg_noin " 4 "original" } } */ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgg_dbl_noin " 1 "original" } } */ +--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-2.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,54 @@ ++/* Test whether overloading works as expected. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-march=z13 -mzarch -mzvector" } */ ++ ++__vector int v4si; ++__vector unsigned uv4si; ++__vector bool bv4si; ++__vector long long v2di; ++__vector unsigned long long uv2di; ++__vector bool long long bv2di; ++__vector double v2df; ++int *intptr; ++unsigned *uintptr; ++double *dblptr; ++long long ll; ++unsigned long long ull; ++const int *cintptr; ++long long* llptr; ++unsigned long long* ullptr; ++ ++void ++foo () ++{ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, (int*)0, 0); /* ok */ ++ __builtin_s390_vec_insert_and_zero (intptr); /* ok */ ++ ++ /* The unsigned pointer must not match the signed pointer. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, uintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* Make sure signed int pointers don't match unsigned int pointers. */ ++ __builtin_s390_vec_scatter_element (bv4si, uv4si, intptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* Const pointers do not match unqualified operands. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, cintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* Volatile pointers do not match unqualified operands. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, cintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* The third operands needs to be double *. */ ++ __builtin_s390_vec_scatter_element (v2df, uv4si, intptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* This is an ambigious overload. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, 0, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* Pointer to vector must not match. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, &v4si, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* Don't accept const int* for int*. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, cintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ __builtin_s390_vec_load_pair (ll, ull); /* { dg-error "ambiguous overload for intrinsic" } */ ++ __builtin_s390_vec_load_pair (ull, ll); /* { dg-error "ambiguous overload for intrinsic" } */ ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-3.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-3.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,19 @@ ++/* Check for error messages supposed to be issued during overloading. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-march=z13 -mzarch -mzvector" } */ ++ ++__vector int v4si; ++__vector unsigned uv4si; ++ ++int *intptr; ++unsigned long long ull; ++const unsigned int *ucintptr; ++ ++void ++foo () ++{ ++ /* A backend check makes sure the forth operand is a literal. */ ++ __builtin_s390_vec_gather_element (uv4si, uv4si, ucintptr, 256); /* { dg-error "constant argument 4 for builtin.*is out of range for target type" } */ ++ __builtin_s390_vec_gather_element (uv4si, uv4si, ucintptr, 5); /* { dg-error "constant argument 4 for builtin.*is out of range" } */ ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-4.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-4.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,18 @@ ++/* Check for error messages supposed to be issued during builtin expansion. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-march=z13 -mzarch -mzvector" } */ ++ ++__vector int v4si; ++__vector unsigned uv4si; ++ ++int *intptr; ++unsigned long long ull; ++const unsigned int *ucintptr; ++ ++void ++foo () ++{ ++ /* A backend check makes sure the forth operand is a literal. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, intptr, ull); /* { dg-error "constant value required for builtin" } */ ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-splat-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-splat-1.c 2016-05-11 17:41:24.000000000 +0200 +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++#include ++ ++vector signed char v16qi; ++vector short v8hi; ++vector int v4si; ++vector long long v2di; ++ ++vector unsigned char uv16qi; ++vector unsigned short uv8hi; ++vector unsigned int uv4si; ++vector unsigned long long uv2di; ++ ++int ++foo () ++{ ++ v16qi = vec_splats ((signed char)0x77); ++ uv16qi = vec_splats ((unsigned char)0x77); ++ ++ v8hi = vec_splats ((short int)0x7f0f); ++ uv8hi = vec_splats ((unsigned short int)0x7f0f); ++ ++ v4si = vec_splats ((int)0x7f0f); ++ uv4si = vec_splats ((unsigned int)0x7f0f); ++ ++ v2di = vec_splats ((long long)0x7f0f); ++ uv2di = vec_splats ((unsigned long long)0x7f0f); ++} ++ ++/* { dg-final { scan-assembler-times "vrepib\t%v.*,119" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepib\t%v.*,119" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepih\t%v.*,32527" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepih\t%v.*,32527" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepif\t%v.*,32527" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepif\t%v.*,32527" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepig\t%v.*,32527" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepig\t%v.*,32527" 1 } } */ +--- gcc/testsuite/gcc.target/s390/zvector/vec-splat-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-splat-2.c 2016-05-11 17:53:39.000000000 +0200 +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++#include ++ ++vector signed char v16qi; ++vector short v8hi; ++vector int v4si; ++vector long long v2di; ++ ++vector unsigned char uv16qi; ++vector unsigned short uv8hi; ++vector unsigned int uv4si; ++vector unsigned long long uv2di; ++ ++int ++foo () ++{ ++ v16qi = vec_splat_s8 (-112); ++ uv16qi = vec_splat_u8 (215); ++ ++ v8hi = vec_splat_s16 (-32000); ++ uv8hi = vec_splat_u16 (64000); ++ ++ v4si = vec_splat_s32 (-32000); ++ uv4si = vec_splat_u32 (64000); ++ ++ v2di = vec_splat_s64 (-32000); ++ uv2di = vec_splat_u64 (64000); ++} ++ ++/* { dg-final { scan-assembler-times "vrepib\t%v.*,-112" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepib\t%v.*,-41" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepih\t%v.*,-32000" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepih\t%v.*,-1536" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepif\t%v.*,-32000" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepif\t%v.*,-1536" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepig\t%v.*,-32000" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepig\t%v.*,-1536" 1 } } */ +--- gcc/testsuite/gcc.target/s390/zvector/vec-test-mask-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-test-mask-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,25 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++/* { dg-final { scan-assembler-times "vtm" 2 } } */ ++/* { dg-final { scan-assembler-times "ipm" 1 } } */ ++ ++#include ++ ++/* CC will be extracted into a GPR and returned. */ ++int ++foo (vector unsigned int a, vector unsigned b) ++{ ++ return vec_test_mask (a, b); ++} ++ ++extern void baz (void); ++ ++/* In that case the ipm/srl is supposed to optimized out by ++ combine/s390_canonicalize_comparison. */ ++int ++bar (vector unsigned int a, vector unsigned b) ++{ ++ if (vec_test_mask (a, b) == 2) ++ baz (); ++} +--- gcc/testsuite/lib/target-supports.exp 2015-06-18 16:32:16.000000000 +0200 ++++ gcc/testsuite/lib/target-supports.exp 2016-05-11 17:32:08.000000000 +0200 +@@ -3800,7 +3800,8 @@ proc check_effective_target_vect_natural + verbose "check_effective_target_vect_natural_alignment: using cached result" 2 + } else { + set et_vect_natural_alignment_saved 1 +- if { [check_effective_target_arm_eabi] } { ++ if { [check_effective_target_arm_eabi] ++ || [istarget s390*-*-*] } { + set et_vect_natural_alignment_saved 0 + } + } diff --git a/gcc48-sparc-config-detection.patch b/gcc48-sparc-config-detection.patch new file mode 100644 index 0000000..b669a5c --- /dev/null +++ b/gcc48-sparc-config-detection.patch @@ -0,0 +1,40 @@ +--- gcc/config.gcc.jj 2008-04-24 15:42:46.000000000 -0500 ++++ gcc/config.gcc 2008-04-24 15:44:51.000000000 -0500 +@@ -2478,7 +2478,7 @@ sparc-*-rtems*) + tm_file="${tm_file} dbxelf.h elfos.h sparc/sysv4.h sparc/sp-elf.h sparc/rtemself.h rtems.h newlib-stdint.h" + tmake_file="sparc/t-sparc sparc/t-rtems t-rtems" + ;; +-sparc-*-linux*) ++sparc-*-linux* | sparcv9-*-linux*) + tm_file="${tm_file} dbxelf.h elfos.h sparc/sysv4.h gnu-user.h linux.h glibc-stdint.h sparc/tso.h" + extra_options="${extra_options} sparc/long-double-switch.opt" + case ${target} in +@@ -2532,7 +2532,7 @@ sparc64-*-rtems*) + extra_options="${extra_options}" + tmake_file="${tmake_file} sparc/t-sparc sparc/t-rtems-64 t-rtems" + ;; +-sparc64-*-linux*) ++sparc64*-*-linux*) + tm_file="sparc/biarch64.h ${tm_file} dbxelf.h elfos.h sparc/sysv4.h gnu-user.h linux.h glibc-stdint.h sparc/default-64.h sparc/linux64.h sparc/tso.h" + extra_options="${extra_options} sparc/long-double-switch.opt" + tmake_file="${tmake_file} sparc/t-sparc sparc/t-linux64" +--- libgcc/config.host.jj 2008-04-24 15:46:19.000000000 -0500 ++++ libgcc/config.host 2008-04-24 15:46:49.000000000 -0500 +@@ -1002,7 +1002,7 @@ sparc-*-elf*) + tmake_file="${tmake_file} t-fdpbit t-crtfm" + extra_parts="$extra_parts crti.o crtn.o crtfastmath.o" + ;; +-sparc-*-linux*) # SPARC's running GNU/Linux, libc6 ++sparc-*-linux* | sparcv9-*-linux*) # SPARC's running GNU/Linux, libc6 + tmake_file="${tmake_file} t-crtfm" + if test "${host_address}" = 64; then + tmake_file="$tmake_file sparc/t-linux64" +@@ -1050,7 +1050,7 @@ sparc64-*-freebsd*|ultrasparc-*-freebsd* + tmake_file="$tmake_file t-crtfm" + extra_parts="$extra_parts crtfastmath.o" + ;; +-sparc64-*-linux*) # 64-bit SPARC's running GNU/Linux ++sparc64*-*-linux*) # 64-bit SPARC's running GNU/Linux + extra_parts="$extra_parts crtfastmath.o" + tmake_file="${tmake_file} t-crtfm sparc/t-linux" + if test "${host_address}" = 64; then diff --git a/gcc48-ucontext.patch b/gcc48-ucontext.patch new file mode 100644 index 0000000..01b54f4 --- /dev/null +++ b/gcc48-ucontext.patch @@ -0,0 +1,121 @@ +2017-07-04 Joseph Myers + + * config/aarch64/linux-unwind.h (aarch64_fallback_frame_state), + config/alpha/linux-unwind.h (alpha_fallback_frame_state), + config/bfin/linux-unwind.h (bfin_fallback_frame_state), + config/i386/linux-unwind.h (x86_64_fallback_frame_state, + x86_fallback_frame_state), config/m68k/linux-unwind.h (struct + uw_ucontext), config/pa/linux-unwind.h (pa32_fallback_frame_state), + config/sh/linux-unwind.h (sh_fallback_frame_state), + config/tilepro/linux-unwind.h (tile_fallback_frame_state), + config/xtensa/linux-unwind.h (xtensa_fallback_frame_state): Use + ucontext_t instead of struct ucontext. + +--- libgcc/config/aarch64/linux-unwind.h ++++ libgcc/config/aarch64/linux-unwind.h +@@ -52,7 +52,7 @@ aarch64_fallback_frame_state (struct _Unwind_Context *context, + struct rt_sigframe + { + siginfo_t info; +- struct ucontext uc; ++ ucontext_t uc; + }; + + struct rt_sigframe *rt_; +--- libgcc/config/alpha/linux-unwind.h ++++ libgcc/config/alpha/linux-unwind.h +@@ -51,7 +51,7 @@ alpha_fallback_frame_state (struct _Unwind_Context *context, + { + struct rt_sigframe { + siginfo_t info; +- struct ucontext uc; ++ ucontext_t uc; + } *rt_ = context->cfa; + sc = &rt_->uc.uc_mcontext; + } +--- libgcc/config/bfin/linux-unwind.h ++++ libgcc/config/bfin/linux-unwind.h +@@ -52,7 +52,7 @@ bfin_fallback_frame_state (struct _Unwind_Context *context, + void *puc; + char retcode[8]; + siginfo_t info; +- struct ucontext uc; ++ ucontext_t uc; + } *rt_ = context->cfa; + + /* The void * cast is necessary to avoid an aliasing warning. +--- libgcc/config/i386/linux-unwind.h ++++ libgcc/config/i386/linux-unwind.h +@@ -58,7 +58,7 @@ x86_64_fallback_frame_state (struct _Unwind_Context *context, + if (*(unsigned char *)(pc+0) == 0x48 + && *(unsigned long long *)(pc+1) == RT_SIGRETURN_SYSCALL) + { +- struct ucontext *uc_ = context->cfa; ++ ucontext_t *uc_ = context->cfa; + /* The void * cast is necessary to avoid an aliasing warning. + The aliasing warning is correct, but should not be a problem + because it does not alias anything. */ +@@ -138,7 +138,7 @@ x86_fallback_frame_state (struct _Unwind_Context *context, + siginfo_t *pinfo; + void *puc; + siginfo_t info; +- struct ucontext uc; ++ ucontext_t uc; + } *rt_ = context->cfa; + /* The void * cast is necessary to avoid an aliasing warning. + The aliasing warning is correct, but should not be a problem +--- libgcc/config/m68k/linux-unwind.h ++++ libgcc/config/m68k/linux-unwind.h +@@ -33,7 +33,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + /* is unfortunately broken right now. */ + struct uw_ucontext { + unsigned long uc_flags; +- struct ucontext *uc_link; ++ ucontext_t *uc_link; + stack_t uc_stack; + mcontext_t uc_mcontext; + unsigned long uc_filler[80]; +--- libgcc/config/pa/linux-unwind.h ++++ libgcc/config/pa/linux-unwind.h +@@ -80,7 +80,7 @@ pa32_fallback_frame_state (struct _Unwind_Context *context, + struct sigcontext *sc; + struct rt_sigframe { + siginfo_t info; +- struct ucontext uc; ++ ucontext_t uc; + } *frame; + + /* rt_sigreturn trampoline: +--- libgcc/config/sh/linux-unwind.h ++++ libgcc/config/sh/linux-unwind.h +@@ -180,7 +180,7 @@ sh_fallback_frame_state (struct _Unwind_Context *context, + { + struct rt_sigframe { + siginfo_t info; +- struct ucontext uc; ++ ucontext_t uc; + } *rt_ = context->cfa; + /* The void * cast is necessary to avoid an aliasing warning. + The aliasing warning is correct, but should not be a problem +--- libgcc/config/tilepro/linux-unwind.h ++++ libgcc/config/tilepro/linux-unwind.h +@@ -61,7 +61,7 @@ tile_fallback_frame_state (struct _Unwind_Context *context, + struct rt_sigframe { + unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; + siginfo_t info; +- struct ucontext uc; ++ ucontext_t uc; + } *rt_; + + /* Return if this is not a signal handler. */ +--- libgcc/config/xtensa/linux-unwind.h ++++ libgcc/config/xtensa/linux-unwind.h +@@ -67,7 +67,7 @@ xtensa_fallback_frame_state (struct _Unwind_Context *context, + + struct rt_sigframe { + siginfo_t info; +- struct ucontext uc; ++ ucontext_t uc; + } *rt_; + + /* movi a2, __NR_rt_sigreturn; syscall */ diff --git a/sources b/sources new file mode 100644 index 0000000..9252680 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +SHA512 (gcc-4.8.5-20150702.tar.bz2) = 04883cca7424e3cfa282725c37867fd2af5ebbca16c4fafc31f667f083620922537e130363137dfb599e59768470333da5e602b656606854ef68159b6f288f06