From c7904cc667e9a5cdd8c566cef72209edf000791a Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Fri, 12 Jul 2024 15:41:04 -0400 Subject: [PATCH] 13.3.1-2.1 Fix wrong RTL patterns for vector merge high/low word on LE Resolves: RHEL-45191 --- gcc.spec | 11 +- gcc13-vector-merge-1.patch | 522 +++++++++++++++++++++++++++++++++++++ gcc13-vector-merge-2.patch | 240 +++++++++++++++++ gcc13-vector-merge-3.patch | 306 ++++++++++++++++++++++ 4 files changed, 1078 insertions(+), 1 deletion(-) create mode 100644 gcc13-vector-merge-1.patch create mode 100644 gcc13-vector-merge-2.patch create mode 100644 gcc13-vector-merge-3.patch diff --git a/gcc.spec b/gcc.spec index 4733205..f5d4827 100644 --- a/gcc.spec +++ b/gcc.spec @@ -149,7 +149,7 @@ BuildRequires: scl-utils-build Summary: GCC version %{gcc_major} Name: %{?scl_prefix}gcc Version: %{gcc_version} -Release: %{gcc_release}%{?dist} +Release: %{gcc_release}.1%{?dist} # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD @@ -331,6 +331,9 @@ Patch10: gcc13-rh1574936.patch Patch11: gcc13-d-shared-libphobos.patch Patch12: gcc13-znver5.patch Patch13: gcc13-pr107071.patch +Patch14: gcc13-vector-merge-1.patch +Patch15: gcc13-vector-merge-2.patch +Patch16: gcc13-vector-merge-3.patch Patch50: isl-rh2155127.patch @@ -698,6 +701,9 @@ so that there cannot be any synchronization problems. %patch -P11 -p0 -b .d-shared-libphobos~ %patch -P12 -p1 -b .znver5~ %patch -P13 -p1 -b .pr107071~ +%patch -P14 -p1 -b .vector-merge-1~ +%patch -P15 -p1 -b .vector-merge-2~ +%patch -P16 -p1 -b .vector-merge-3~ %if 0%{?rhel} >= 6 %patch -P100 -p1 -b .fortran-fdec-duplicates~ @@ -2909,6 +2915,9 @@ fi %endif %changelog +* Fri Jul 12 2024 Marek Polacek 13.3.1-2.1 +- fix wrong RTL patterns for vector merge high/low word on LE (RHEL-45191) + * Tue Jun 11 2024 Marek Polacek 13.3.1-2 - update from releases/gcc-13 branch - PRs ada/114398, ada/114708, c/114493, c++/111529, c++/113598, diff --git a/gcc13-vector-merge-1.patch b/gcc13-vector-merge-1.patch new file mode 100644 index 0000000..039fe24 --- /dev/null +++ b/gcc13-vector-merge-1.patch @@ -0,0 +1,522 @@ +commit 361bfcec901ca882130e338aebaa2ebc6ea2dc3b +Author: Kewen Lin +Date: Thu Jun 20 20:23:56 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low word on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low word, which are altivec_vmrg[hl]w, + vsx_xxmrg[hl]w_. These defines are mainly for + built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw, + __builtin_vsx_xxmrghw_4si and some internal gen function + needs. These functions should consider endianness, taking + vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges + the first halves (in element order) of two vectors", it does + note it's in element order. So it's mapped into vmrghw on + BE while vmrglw on LE respectively. Although the mapped + insns are different, as the discussion in PR106069, the RTL + pattern should be still the same, it is conformed before + commit r12-4496, define_expand altivec_vmrghw got expanded + into: + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + + on both BE and LE then. But commit r12-4496 changed it to + expand into: + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + + on BE, and + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + + on LE, although the mapped insn are still vmrghw on BE and + vmrglw on LE, the associated RTL pattern is completely + wrong and inconsistent with the mapped insn. If optimization + passes leave this pattern alone, even if its pattern doesn't + represent its mapped insn, it's still fine, that's why simple + testing on bif doesn't expose this issue. But once some + optimization pass such as combine does some changes basing + on this wrong pattern, because the pattern doesn't match the + semantics that the expanded insn is intended to represent, + it would cause the unexpected result. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghw expands + into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghw_direct_): Rename + to ... + (altivec_vmrghw_direct__be): ... this. Add the condition + BYTES_BIG_ENDIAN. + (altivec_vmrghw_direct__le): New define_insn. + (altivec_vmrglw_direct_): Rename to ... + (altivec_vmrglw_direct__be): ... this. Add the condition + BYTES_BIG_ENDIAN. + (altivec_vmrglw_direct__le): New define_insn. + (altivec_vmrghw): Adjust by calling gen_altivec_vmrghw_direct_v4si_be + for BE and gen_altivec_vmrglw_direct_v4si_le for LE. + (altivec_vmrglw): Adjust by calling gen_altivec_vmrglw_direct_v4si_be + for BE and gen_altivec_vmrghw_direct_v4si_le for LE. + (vec_widen_umult_hi_v8hi): Adjust the call to + gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE + and by gen_altivec_vmrglw for LE. + (vec_widen_smult_hi_v8hi): Likewise. + (vec_widen_umult_lo_v8hi): Adjust the call to + gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE + and by gen_altivec_vmrghw for LE + (vec_widen_smult_lo_v8hi): Likewise. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghw_direct_v4si by + CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and + CODE_FOR_altivec_vmrghw_direct_v4si_le for LE. And replace + CODE_FOR_altivec_vmrglw_direct_v4si by + CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and + CODE_FOR_altivec_vmrglw_direct_v4si_le for LE. + * config/rs6000/vsx.md (vsx_xxmrghw_): Adjust by calling + gen_altivec_vmrghw_direct_v4si_be for BE and + gen_altivec_vmrglw_direct_v4si_le for LE. + (vsx_xxmrglw_): Adjust by calling + gen_altivec_vmrglw_direct_v4si_be for BE and + gen_altivec_vmrghw_direct_v4si_le for LE. + + gcc/testsuite/ChangeLog: + + * g++.target/powerpc/pr106069.C: New test. + * gcc.target/powerpc/pr115355.c: New test. + + (cherry picked from commit 52c112800d9f44457c4832309a48c00945811313) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index ad1224e0b57..92e2e4a4090 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1212,16 +1212,18 @@ (define_expand "altivec_vmrghw" + (use (match_operand:V4SI 2 "register_operand"))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_v4si +- : gen_altivec_vmrglw_direct_v4si; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghw_direct_" ++(define_insn "altivec_vmrghw_direct__be" + [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") + (vec_select:VSX_W + (vec_concat: +@@ -1229,7 +1231,21 @@ (define_insn "altivec_vmrghw_direct_" + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "@ ++ xxmrghw %x0,%x1,%x2 ++ vmrghw %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghw_direct__le" ++ [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") ++ (vec_select:VSX_W ++ (vec_concat: ++ (match_operand:VSX_W 2 "register_operand" "wa,v") ++ (match_operand:VSX_W 1 "register_operand" "wa,v")) ++ (parallel [(const_int 2) (const_int 6) ++ (const_int 3) (const_int 7)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "@ + xxmrghw %x0,%x1,%x2 + vmrghw %0,%1,%2" +@@ -1318,16 +1334,18 @@ (define_expand "altivec_vmrglw" + (use (match_operand:V4SI 2 "register_operand"))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_v4si +- : gen_altivec_vmrghw_direct_v4si; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglw_direct_" ++(define_insn "altivec_vmrglw_direct__be" + [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") + (vec_select:VSX_W + (vec_concat: +@@ -1335,7 +1353,21 @@ (define_insn "altivec_vmrglw_direct_" + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "@ ++ xxmrglw %x0,%x1,%x2 ++ vmrglw %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglw_direct__le" ++ [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") ++ (vec_select:VSX_W ++ (vec_concat: ++ (match_operand:VSX_W 2 "register_operand" "wa,v") ++ (match_operand:VSX_W 1 "register_operand" "wa,v")) ++ (parallel [(const_int 0) (const_int 4) ++ (const_int 1) (const_int 5)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "@ + xxmrglw %x0,%x1,%x2 + vmrglw %0,%1,%2" +@@ -3807,13 +3839,13 @@ (define_expand "vec_widen_umult_hi_v8hi" + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3832,13 +3864,13 @@ (define_expand "vec_widen_umult_lo_v8hi" + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3857,13 +3889,13 @@ (define_expand "vec_widen_smult_hi_v8hi" + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3882,13 +3914,13 @@ (define_expand "vec_widen_smult_lo_v8hi" + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + DONE; + }) +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index 9cfde1a52ea..b3d648312f1 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -23174,8 +23174,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglh_direct, + {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si +- : CODE_FOR_altivec_vmrglw_direct_v4si, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be ++ : CODE_FOR_altivec_vmrglw_direct_v4si_le, + {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct +@@ -23186,8 +23186,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrghh_direct, + {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si +- : CODE_FOR_altivec_vmrghw_direct_v4si, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be ++ : CODE_FOR_altivec_vmrghw_direct_v4si_le, + {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}}, + {OPTION_MASK_P8_VECTOR, + BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct +diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md +index f70d69ee4b9..b9a1bfb5c16 100644 +--- a/gcc/config/rs6000/vsx.md ++++ b/gcc/config/rs6000/vsx.md +@@ -4683,12 +4683,14 @@ (define_expand "vsx_xxmrghw_" + (const_int 1) (const_int 5)])))] + "VECTOR_MEM_VSX_P (mode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_ +- : gen_altivec_vmrglw_direct_; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + } + [(set_attr "type" "vecperm")]) +@@ -4703,12 +4705,14 @@ (define_expand "vsx_xxmrglw_" + (const_int 3) (const_int 7)])))] + "VECTOR_MEM_VSX_P (mode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_ +- : gen_altivec_vmrghw_direct_; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + } + [(set_attr "type" "vecperm")]) +diff --git a/gcc/testsuite/g++.target/powerpc/pr106069.C b/gcc/testsuite/g++.target/powerpc/pr106069.C +new file mode 100644 +index 00000000000..537207d2fe8 +--- /dev/null ++++ b/gcc/testsuite/g++.target/powerpc/pr106069.C +@@ -0,0 +1,119 @@ ++/* { dg-options "-O -fno-tree-forwprop -maltivec" } */ ++/* { dg-require-effective-target vmx_hw } */ ++/* { dg-do run } */ ++ ++typedef __attribute__ ((altivec (vector__))) unsigned native_simd_type; ++ ++union ++{ ++ native_simd_type V; ++ int R[4]; ++} store_le_vec; ++ ++struct S ++{ ++ S () = default; ++ S (unsigned B0) ++ { ++ native_simd_type val{B0}; ++ m_simd = val; ++ } ++ void store_le (unsigned int out[]) ++ { ++ store_le_vec.V = m_simd; ++ unsigned int x0 = store_le_vec.R[0]; ++ __builtin_memcpy (out, &x0, 4); ++ } ++ S rotl (unsigned int r) ++ { ++ native_simd_type rot{r}; ++ return __builtin_vec_rl (m_simd, rot); ++ } ++ void operator+= (S other) ++ { ++ m_simd = __builtin_vec_add (m_simd, other.m_simd); ++ } ++ void operator^= (S other) ++ { ++ m_simd = __builtin_vec_xor (m_simd, other.m_simd); ++ } ++ static void transpose (S &B0, S B1, S B2, S B3) ++ { ++ native_simd_type T0 = __builtin_vec_mergeh (B0.m_simd, B2.m_simd); ++ native_simd_type T1 = __builtin_vec_mergeh (B1.m_simd, B3.m_simd); ++ native_simd_type T2 = __builtin_vec_mergel (B0.m_simd, B2.m_simd); ++ native_simd_type T3 = __builtin_vec_mergel (B1.m_simd, B3.m_simd); ++ B0 = __builtin_vec_mergeh (T0, T1); ++ B3 = __builtin_vec_mergel (T2, T3); ++ } ++ S (native_simd_type x) : m_simd (x) {} ++ native_simd_type m_simd; ++}; ++ ++void ++foo (unsigned int output[], unsigned state[]) ++{ ++ S R00 = state[0]; ++ S R01 = state[0]; ++ S R02 = state[2]; ++ S R03 = state[0]; ++ S R05 = state[5]; ++ S R06 = state[6]; ++ S R07 = state[7]; ++ S R08 = state[8]; ++ S R09 = state[9]; ++ S R10 = state[10]; ++ S R11 = state[11]; ++ S R12 = state[12]; ++ S R13 = state[13]; ++ S R14 = state[4]; ++ S R15 = state[15]; ++ for (int r = 0; r != 10; ++r) ++ { ++ R09 += R13; ++ R11 += R15; ++ R05 ^= R09; ++ R06 ^= R10; ++ R07 ^= R11; ++ R07 = R07.rotl (7); ++ R00 += R05; ++ R01 += R06; ++ R02 += R07; ++ R15 ^= R00; ++ R12 ^= R01; ++ R13 ^= R02; ++ R00 += R05; ++ R01 += R06; ++ R02 += R07; ++ R15 ^= R00; ++ R12 = R12.rotl (8); ++ R13 = R13.rotl (8); ++ R10 += R15; ++ R11 += R12; ++ R08 += R13; ++ R09 += R14; ++ R05 ^= R10; ++ R06 ^= R11; ++ R07 ^= R08; ++ R05 = R05.rotl (7); ++ R06 = R06.rotl (7); ++ R07 = R07.rotl (7); ++ } ++ R00 += state[0]; ++ S::transpose (R00, R01, R02, R03); ++ R00.store_le (output); ++} ++ ++unsigned int res[1]; ++unsigned main_state[]{1634760805, 60878, 2036477234, 6, ++ 0, 825562964, 1471091955, 1346092787, ++ 506976774, 4197066702, 518848283, 118491664, ++ 0, 0, 0, 0}; ++int ++main () ++{ ++ foo (res, main_state); ++ if (res[0] != 0x41fcef98) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/powerpc/pr115355.c b/gcc/testsuite/gcc.target/powerpc/pr115355.c +new file mode 100644 +index 00000000000..8955126b808 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr115355.c +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target p9vector_hw } */ ++/* Force vectorization with -fno-vect-cost-model to have vector unpack ++ which exposes the issue in PR115355. */ ++/* { dg-options "-O2 -mdejagnu-cpu=power9 -fno-vect-cost-model" } */ ++ ++/* Verify it runs successfully. */ ++ ++__attribute__((noipa)) ++void setToIdentityGOOD(unsigned long long *mVec, unsigned int mLen) ++{ ++ #pragma GCC novector ++ for (unsigned int i = 0; i < mLen; i++) ++ mVec[i] = i; ++} ++ ++__attribute__((noipa)) ++void setToIdentityBAD(unsigned long long *mVec, unsigned int mLen) ++{ ++ for (unsigned int i = 0; i < mLen; i++) ++ mVec[i] = i; ++} ++ ++unsigned long long vec1[100]; ++unsigned long long vec2[100]; ++ ++int main() ++{ ++ unsigned int l = 29; ++ setToIdentityGOOD (vec1, 29); ++ setToIdentityBAD (vec2, 29); ++ ++ if (__builtin_memcmp (vec1, vec2, l * sizeof (vec1[0])) != 0) ++ __builtin_abort (); ++ ++ return 0; ++} diff --git a/gcc13-vector-merge-2.patch b/gcc13-vector-merge-2.patch new file mode 100644 index 0000000..3a5cbfb --- /dev/null +++ b/gcc13-vector-merge-2.patch @@ -0,0 +1,240 @@ +commit ffdd377fc07cdc7b62669d354e23f30940eaaffe +Author: Kewen Lin +Date: Wed Jun 26 02:16:17 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low char on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low char, which are altivec_vmrg[hl]b. + These defines are mainly for built-in function vec_merge{h,l} + and some internal gen function needs. These functions should + consider endianness, taking vec_mergeh as example, as PVIPR + defines, vec_mergeh "Merges the first halves (in element order) + of two vectors", it does note it's in element order. So it's + mapped into vmrghb on BE while vmrglb on LE respectively. + Although the mapped insns are different, as the discussion in + PR106069, the RTL pattern should be still the same, it is + conformed before commit r12-4496, but gets changed into + different patterns on BE and LE starting from commit r12-4496. + Similar to 32-bit element case in commit log of r15-1504, this + 8-bit element pattern on LE doesn't actually match what the + underlying insn is intended to represent, once some optimization + like combine does some changes basing on it, it would cause + the unexpected consequence. The newly constructed test case + pr106069-1.c is a typical example for this issue. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghb expands + into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghb_direct): Rename to ... + (altivec_vmrghb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrghb_direct_le): New define_insn. + (altivec_vmrglb_direct): Rename to ... + (altivec_vmrglb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrglb_direct_le): New define_insn. + (altivec_vmrghb): Adjust by calling gen_altivec_vmrghb_direct_be + for BE and gen_altivec_vmrglb_direct_le for LE. + (altivec_vmrglb): Adjust by calling gen_altivec_vmrglb_direct_be + for BE and gen_altivec_vmrghb_direct_le for LE. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghb_direct by + CODE_FOR_altivec_vmrghb_direct_be for BE and + CODE_FOR_altivec_vmrghb_direct_le for LE. And replace + CODE_FOR_altivec_vmrglb_direct by + CODE_FOR_altivec_vmrglb_direct_be for BE and + CODE_FOR_altivec_vmrglb_direct_le for LE. + + gcc/testsuite/ChangeLog: + + * gcc.target/powerpc/pr106069-1.c: New test. + + (cherry picked from commit 62520e4e9f7e2fe8a16ee57a4bd35da2e921ae22) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index 92e2e4a4090..47664204bc5 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1152,15 +1152,16 @@ (define_expand "altivec_vmrghb" + (use (match_operand:V16QI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghb_direct +- : gen_altivec_vmrglb_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrghb_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrglb_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghb_direct" ++(define_insn "altivec_vmrghb_direct_be" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_select:V16QI + (vec_concat:V32QI +@@ -1174,7 +1175,25 @@ (define_insn "altivec_vmrghb_direct" + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrghb %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghb_direct_le" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (vec_select:V16QI ++ (vec_concat:V32QI ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 1 "register_operand" "v")) ++ (parallel [(const_int 8) (const_int 24) ++ (const_int 9) (const_int 25) ++ (const_int 10) (const_int 26) ++ (const_int 11) (const_int 27) ++ (const_int 12) (const_int 28) ++ (const_int 13) (const_int 29) ++ (const_int 14) (const_int 30) ++ (const_int 15) (const_int 31)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrghb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -1274,15 +1293,16 @@ (define_expand "altivec_vmrglb" + (use (match_operand:V16QI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglb_direct +- : gen_altivec_vmrghb_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrglb_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrghb_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglb_direct" ++(define_insn "altivec_vmrglb_direct_be" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_select:V16QI + (vec_concat:V32QI +@@ -1296,7 +1316,25 @@ (define_insn "altivec_vmrglb_direct" + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrglb %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglb_direct_le" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (vec_select:V16QI ++ (vec_concat:V32QI ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 1 "register_operand" "v")) ++ (parallel [(const_int 0) (const_int 16) ++ (const_int 1) (const_int 17) ++ (const_int 2) (const_int 18) ++ (const_int 3) (const_int 19) ++ (const_int 4) (const_int 20) ++ (const_int 5) (const_int 21) ++ (const_int 6) (const_int 22) ++ (const_int 7) (const_int 23)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrglb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index b3d648312f1..10088033aa1 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -23166,8 +23166,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + CODE_FOR_altivec_vpkuwum_direct, + {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct +- : CODE_FOR_altivec_vmrglb_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct_be ++ : CODE_FOR_altivec_vmrglb_direct_le, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct +@@ -23178,8 +23178,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglw_direct_v4si_le, + {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct +- : CODE_FOR_altivec_vmrghb_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct_be ++ : CODE_FOR_altivec_vmrghb_direct_le, + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct +diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-1.c b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c +new file mode 100644 +index 00000000000..4945d8fedfb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c +@@ -0,0 +1,39 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target vmx_hw } */ ++ ++/* Test vector merge for 8-bit element size, ++ it will abort if the RTL pattern isn't expected. */ ++ ++#include "altivec.h" ++ ++__attribute__((noipa)) ++signed char elem_6 (vector signed char a, vector signed char b) ++{ ++ vector signed char c = vec_mergeh (a,b); ++ return vec_extract (c, 6); ++} ++ ++__attribute__((noipa)) ++unsigned char elem_15 (vector unsigned char a, vector unsigned char b) ++{ ++ vector unsigned char c = vec_mergel (a,b); ++ return vec_extract (c, 15); ++} ++ ++int ++main () ++{ ++ vector unsigned char v1 ++ = {3, 33, 22, 12, 34, 14, 5, 25, 30, 11, 0, 21, 17, 27, 38, 8}; ++ vector unsigned char v2 ++ = {81, 82, 83, 84, 68, 67, 66, 65, 99, 100, 101, 102, 250, 125, 0, 6}; ++ signed char x1 = elem_6 ((vector signed char) v1, (vector signed char) v2); ++ unsigned char x2 = elem_15 (v1, v2); ++ ++ if (x1 != 12 || x2 != 6) ++ __builtin_abort (); ++ ++ return 0; ++} ++ diff --git a/gcc13-vector-merge-3.patch b/gcc13-vector-merge-3.patch new file mode 100644 index 0000000..c6ee970 --- /dev/null +++ b/gcc13-vector-merge-3.patch @@ -0,0 +1,306 @@ +commit bab38d9271ce3f26cb64b8cb712351eb3fedd559 +Author: Kewen Lin +Date: Wed Jun 26 02:16:17 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low short on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low short, which are altivec_vmrg[hl]h. + These defines are mainly for built-in function vec_merge{h,l} + and some internal gen function needs. These functions should + consider endianness, taking vec_mergeh as example, as PVIPR + defines, vec_mergeh "Merges the first halves (in element order) + of two vectors", it does note it's in element order. So it's + mapped into vmrghh on BE while vmrglh on LE respectively. + Although the mapped insns are different, as the discussion in + PR106069, the RTL pattern should be still the same, it is + conformed before commit r12-4496, but gets changed into + different patterns on BE and LE starting from commit r12-4496. + Similar to 32-bit element case in commit log of r15-1504, this + 16-bit element pattern on LE doesn't actually match what the + underlying insn is intended to represent, once some optimization + like combine does some changes basing on it, it would cause + the unexpected consequence. The newly constructed test case + pr106069-2.c is a typical example for this issue on element type + short. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghh expands + into altivec_vmrghh_direct_be or altivec_vmrglh_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghh_direct): Rename to ... + (altivec_vmrghh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrghh_direct_le): New define_insn. + (altivec_vmrglh_direct): Rename to ... + (altivec_vmrglh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrglh_direct_le): New define_insn. + (altivec_vmrghh): Adjust by calling gen_altivec_vmrghh_direct_be + for BE and gen_altivec_vmrglh_direct_le for LE. + (altivec_vmrglh): Adjust by calling gen_altivec_vmrglh_direct_be + for BE and gen_altivec_vmrghh_direct_le for LE. + (vec_widen_umult_hi_v16qi): Adjust the call to + gen_altivec_vmrghh_direct by gen_altivec_vmrghh for BE + and by gen_altivec_vmrglh for LE. + (vec_widen_smult_hi_v16qi): Likewise. + (vec_widen_umult_lo_v16qi): Adjust the call to + gen_altivec_vmrglh_direct by gen_altivec_vmrglh for BE + and by gen_altivec_vmrghh for LE. + (vec_widen_smult_lo_v16qi): Likewise. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghh_direct by + CODE_FOR_altivec_vmrghh_direct_be for BE and + CODE_FOR_altivec_vmrghh_direct_le for LE. And replace + CODE_FOR_altivec_vmrglh_direct by + CODE_FOR_altivec_vmrglh_direct_be for BE and + CODE_FOR_altivec_vmrglh_direct_le for LE. + + gcc/testsuite/ChangeLog: + + * gcc.target/powerpc/pr106069-2.c: New test. + + (cherry picked from commit 812c70bf4981958488331d4ea5af8709b5321da1) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index 47664204bc5..6557393a97c 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1203,17 +1203,18 @@ (define_expand "altivec_vmrghh" + (use (match_operand:V8HI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghh_direct +- : gen_altivec_vmrglh_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrghh_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrglh_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghh_direct" ++(define_insn "altivec_vmrghh_direct_be" + [(set (match_operand:V8HI 0 "register_operand" "=v") +- (vec_select:V8HI ++ (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) +@@ -1221,7 +1222,21 @@ (define_insn "altivec_vmrghh_direct" + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrghh %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghh_direct_le" ++ [(set (match_operand:V8HI 0 "register_operand" "=v") ++ (vec_select:V8HI ++ (vec_concat:V16HI ++ (match_operand:V8HI 2 "register_operand" "v") ++ (match_operand:V8HI 1 "register_operand" "v")) ++ (parallel [(const_int 4) (const_int 12) ++ (const_int 5) (const_int 13) ++ (const_int 6) (const_int 14) ++ (const_int 7) (const_int 15)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrghh %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -1344,15 +1359,16 @@ (define_expand "altivec_vmrglh" + (use (match_operand:V8HI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglh_direct +- : gen_altivec_vmrghh_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrglh_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrghh_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglh_direct" ++(define_insn "altivec_vmrglh_direct_be" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (vec_select:V8HI + (vec_concat:V16HI +@@ -1362,7 +1378,21 @@ (define_insn "altivec_vmrglh_direct" + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrglh %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglh_direct_le" ++ [(set (match_operand:V8HI 0 "register_operand" "=v") ++ (vec_select:V8HI ++ (vec_concat:V16HI ++ (match_operand:V8HI 2 "register_operand" "v") ++ (match_operand:V8HI 1 "register_operand" "v")) ++ (parallel [(const_int 0) (const_int 8) ++ (const_int 1) (const_int 9) ++ (const_int 2) (const_int 10) ++ (const_int 3) (const_int 11)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrglh %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -3777,13 +3807,13 @@ (define_expand "vec_widen_umult_hi_v16qi" + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3802,13 +3832,13 @@ (define_expand "vec_widen_umult_lo_v16qi" + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3827,13 +3857,13 @@ (define_expand "vec_widen_smult_hi_v16qi" + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3852,13 +3882,13 @@ (define_expand "vec_widen_smult_lo_v16qi" + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + DONE; + }) +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index 10088033aa1..76eb89ad529 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -23170,8 +23170,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglb_direct_le, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct +- : CODE_FOR_altivec_vmrglh_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct_be ++ : CODE_FOR_altivec_vmrglh_direct_le, + {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be +@@ -23182,8 +23182,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrghb_direct_le, + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct +- : CODE_FOR_altivec_vmrghh_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct_be ++ : CODE_FOR_altivec_vmrghh_direct_le, + {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be +diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-2.c b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c +new file mode 100644 +index 00000000000..283e3290fb3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target vmx_hw } */ ++ ++/* Test vector merge for 16-bit element size, ++ it will abort if the RTL pattern isn't expected. */ ++ ++#include "altivec.h" ++ ++__attribute__((noipa)) ++signed short elem_2 (vector signed short a, vector signed short b) ++{ ++ vector signed short c = vec_mergeh (a,b); ++ return vec_extract (c, 2); ++} ++ ++__attribute__((noipa)) ++unsigned short elem_7 (vector unsigned short a, vector unsigned short b) ++{ ++ vector unsigned short c = vec_mergel (a,b); ++ return vec_extract (c, 7); ++} ++ ++int ++main () ++{ ++ vector unsigned short v1 = {3, 22, 12, 34, 5, 25, 30, 11}; ++ vector unsigned short v2 = {84, 168, 267, 966, 65, 399, 999, 99}; ++ signed short x1 = elem_2 ((vector signed short) v1, (vector signed short) v2); ++ unsigned short x2 = elem_7 (v1, v2); ++ ++ if (x1 != 22 || x2 != 99) ++ __builtin_abort (); ++ ++ return 0; ++} ++