13.3.1-2.1

Fix wrong RTL patterns for vector merge high/low word on LE Resolves: RHEL-45191
2024-07-12 15:41:04 -04:00 · 2024-07-12 15:41:04 -04:00 · c7904cc667
commit c7904cc667
parent e385cb85ae
4 changed files with 1078 additions and 1 deletions
--- a/gcc.spec
+++ b/gcc.spec
@ -149,7 +149,7 @@ BuildRequires: scl-utils-build
 Summary: GCC version %{gcc_major}
 Name: %{?scl_prefix}gcc
 Version: %{gcc_version}
-Release: %{gcc_release}%{?dist}
+Release: %{gcc_release}.1%{?dist}
 # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have
 # GCC Runtime Exception.
 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
@ -331,6 +331,9 @@ Patch10: gcc13-rh1574936.patch
 Patch11: gcc13-d-shared-libphobos.patch
 Patch12: gcc13-znver5.patch
 Patch13: gcc13-pr107071.patch
+Patch14: gcc13-vector-merge-1.patch
+Patch15: gcc13-vector-merge-2.patch
+Patch16: gcc13-vector-merge-3.patch

 Patch50: isl-rh2155127.patch

@ -698,6 +701,9 @@ so that there cannot be any synchronization problems.
 %patch -P11 -p0 -b .d-shared-libphobos~
 %patch -P12 -p1 -b .znver5~
 %patch -P13 -p1 -b .pr107071~
+%patch -P14 -p1 -b .vector-merge-1~
+%patch -P15 -p1 -b .vector-merge-2~
+%patch -P16 -p1 -b .vector-merge-3~

 %if 0%{?rhel} >= 6
 %patch -P100 -p1 -b .fortran-fdec-duplicates~
@ -2909,6 +2915,9 @@ fi
 %endif

 %changelog
+* Fri Jul 12 2024 Marek Polacek <polacek@redhat.com> 13.3.1-2.1
+- fix wrong RTL patterns for vector merge high/low word on LE (RHEL-45191)
+
 * Tue Jun 11 2024 Marek Polacek <polacek@redhat.com> 13.3.1-2
 - update from releases/gcc-13 branch
  - PRs ada/114398, ada/114708, c/114493, c++/111529, c++/113598,
--- a/gcc13-vector-merge-1.patch
+++ b/gcc13-vector-merge-1.patch
@ -0,0 +1,522 @@
+commit 361bfcec901ca882130e338aebaa2ebc6ea2dc3b
+Author: Kewen Lin <linkw@linux.ibm.com>
+Date:   Thu Jun 20 20:23:56 2024 -0500
+
+    rs6000: Fix wrong RTL patterns for vector merge high/low word on LE
+    
+    Commit r12-4496 changes some define_expands and define_insns
+    for vector merge high/low word, which are altivec_vmrg[hl]w,
+    vsx_xxmrg[hl]w_<VSX_W:mode>.  These defines are mainly for
+    built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw,
+    __builtin_vsx_xxmrghw_4si and some internal gen function
+    needs.  These functions should consider endianness, taking
+    vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges
+    the first halves (in element order) of two vectors", it does
+    note it's in element order.  So it's mapped into vmrghw on
+    BE while vmrglw on LE respectively.  Although the mapped
+    insns are different, as the discussion in PR106069, the RTL
+    pattern should be still the same, it is conformed before
+    commit r12-4496, define_expand altivec_vmrghw got expanded
+    into:
+    
+      (vec_select:VSX_W
+         (vec_concat:<VS_double>
+            (match_operand:VSX_W 1 "register_operand" "wa,v")
+            (match_operand:VSX_W 2 "register_operand" "wa,v"))
+            (parallel [(const_int 0) (const_int 4)
+                       (const_int 1) (const_int 5)])))]
+    
+    on both BE and LE then.  But commit r12-4496 changed it to
+    expand into:
+    
+      (vec_select:VSX_W
+         (vec_concat:<VS_double>
+            (match_operand:VSX_W 1 "register_operand" "wa,v")
+            (match_operand:VSX_W 2 "register_operand" "wa,v"))
+            (parallel [(const_int 0) (const_int 4)
+                       (const_int 1) (const_int 5)])))]
+    
+    on BE, and
+    
+      (vec_select:VSX_W
+         (vec_concat:<VS_double>
+            (match_operand:VSX_W 1 "register_operand" "wa,v")
+            (match_operand:VSX_W 2 "register_operand" "wa,v"))
+            (parallel [(const_int 2) (const_int 6)
+                       (const_int 3) (const_int 7)])))]
+    
+    on LE, although the mapped insn are still vmrghw on BE and
+    vmrglw on LE, the associated RTL pattern is completely
+    wrong and inconsistent with the mapped insn.  If optimization
+    passes leave this pattern alone, even if its pattern doesn't
+    represent its mapped insn, it's still fine, that's why simple
+    testing on bif doesn't expose this issue.  But once some
+    optimization pass such as combine does some changes basing
+    on this wrong pattern, because the pattern doesn't match the
+    semantics that the expanded insn is intended to represent,
+    it would cause the unexpected result.
+    
+    So this patch is to fix the wrong RTL pattern, ensure the
+    associated RTL patterns become the same as before which can
+    have the same semantic as their mapped insns.  With the
+    proposed patch, the expanders like altivec_vmrghw expands
+    into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le
+    depending on endianness, "direct" can easily show which
+    insn would be generated, _be and _le are mainly for the
+    different RTL patterns as endianness.
+    
+    Co-authored-by: Xionghu Luo <xionghuluo@tencent.com>
+    
+            PR target/106069
+            PR target/115355
+    
+    gcc/ChangeLog:
+    
+            * config/rs6000/altivec.md (altivec_vmrghw_direct_<VSX_W:mode>): Rename
+            to ...
+            (altivec_vmrghw_direct_<VSX_W:mode>_be): ... this.  Add the condition
+            BYTES_BIG_ENDIAN.
+            (altivec_vmrghw_direct_<VSX_W:mode>_le): New define_insn.
+            (altivec_vmrglw_direct_<VSX_W:mode>): Rename to ...
+            (altivec_vmrglw_direct_<VSX_W:mode>_be): ... this.  Add the condition
+            BYTES_BIG_ENDIAN.
+            (altivec_vmrglw_direct_<VSX_W:mode>_le): New define_insn.
+            (altivec_vmrghw): Adjust by calling gen_altivec_vmrghw_direct_v4si_be
+            for BE and gen_altivec_vmrglw_direct_v4si_le for LE.
+            (altivec_vmrglw): Adjust by calling gen_altivec_vmrglw_direct_v4si_be
+            for BE and gen_altivec_vmrghw_direct_v4si_le for LE.
+            (vec_widen_umult_hi_v8hi): Adjust the call to
+            gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE
+            and by gen_altivec_vmrglw for LE.
+            (vec_widen_smult_hi_v8hi): Likewise.
+            (vec_widen_umult_lo_v8hi): Adjust the call to
+            gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE
+            and by gen_altivec_vmrghw for LE
+            (vec_widen_smult_lo_v8hi): Likewise.
+            * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace
+            CODE_FOR_altivec_vmrghw_direct_v4si by
+            CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and
+            CODE_FOR_altivec_vmrghw_direct_v4si_le for LE.  And replace
+            CODE_FOR_altivec_vmrglw_direct_v4si by
+            CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and
+            CODE_FOR_altivec_vmrglw_direct_v4si_le for LE.
+            * config/rs6000/vsx.md (vsx_xxmrghw_<VSX_W:mode>): Adjust by calling
+            gen_altivec_vmrghw_direct_v4si_be for BE and
+            gen_altivec_vmrglw_direct_v4si_le for LE.
+            (vsx_xxmrglw_<VSX_W:mode>): Adjust by calling
+            gen_altivec_vmrglw_direct_v4si_be for BE and
+            gen_altivec_vmrghw_direct_v4si_le for LE.
+    
+    gcc/testsuite/ChangeLog:
+    
+            * g++.target/powerpc/pr106069.C: New test.
+            * gcc.target/powerpc/pr115355.c: New test.
+    
+    (cherry picked from commit 52c112800d9f44457c4832309a48c00945811313)
+
+diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
+index ad1224e0b57..92e2e4a4090 100644
+--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
+@@ -1212,16 +1212,18 @@ (define_expand "altivec_vmrghw"
+    (use (match_operand:V4SI 2 "register_operand"))]
+   "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+ {
+-  rtx (*fun) (rtx, rtx, rtx);
+-  fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_v4si
+-			 : gen_altivec_vmrglw_direct_v4si;
+-  if (!BYTES_BIG_ENDIAN)
+-    std::swap (operands[1], operands[2]);
+-  emit_insn (fun (operands[0], operands[1], operands[2]));
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0],
+						  operands[1],
+						  operands[2]));
+  else
+    emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0],
+						  operands[2],
+						  operands[1]));
+   DONE;
+ })
+ 
+-(define_insn "altivec_vmrghw_direct_<mode>"
+(define_insn "altivec_vmrghw_direct_<mode>_be"
+   [(set (match_operand:VSX_W 0 "register_operand" "=wa,v")
+ 	(vec_select:VSX_W
+ 	  (vec_concat:<VS_double>
+@@ -1229,7 +1231,21 @@ (define_insn "altivec_vmrghw_direct_<mode>"
+ 	    (match_operand:VSX_W 2 "register_operand" "wa,v"))
+ 	  (parallel [(const_int 0) (const_int 4)
+ 		     (const_int 1) (const_int 5)])))]
+-  "TARGET_ALTIVEC"
+  "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
+  "@
+   xxmrghw %x0,%x1,%x2
+   vmrghw %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghw_direct_<mode>_le"
+  [(set (match_operand:VSX_W 0 "register_operand" "=wa,v")
+	(vec_select:VSX_W
+	  (vec_concat:<VS_double>
+	    (match_operand:VSX_W 2 "register_operand" "wa,v")
+	    (match_operand:VSX_W 1 "register_operand" "wa,v"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
+   "@
+    xxmrghw %x0,%x1,%x2
+    vmrghw %0,%1,%2"
+@@ -1318,16 +1334,18 @@ (define_expand "altivec_vmrglw"
+    (use (match_operand:V4SI 2 "register_operand"))]
+   "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+ {
+-  rtx (*fun) (rtx, rtx, rtx);
+-  fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_v4si
+-			 : gen_altivec_vmrghw_direct_v4si;
+-  if (!BYTES_BIG_ENDIAN)
+-    std::swap (operands[1], operands[2]);
+-  emit_insn (fun (operands[0], operands[1], operands[2]));
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0],
+						  operands[1],
+						  operands[2]));
+  else
+    emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0],
+						  operands[2],
+						  operands[1]));
+   DONE;
+ })
+ 
+-(define_insn "altivec_vmrglw_direct_<mode>"
+(define_insn "altivec_vmrglw_direct_<mode>_be"
+   [(set (match_operand:VSX_W 0 "register_operand" "=wa,v")
+ 	(vec_select:VSX_W
+ 	  (vec_concat:<VS_double>
+@@ -1335,7 +1353,21 @@ (define_insn "altivec_vmrglw_direct_<mode>"
+ 	    (match_operand:VSX_W 2 "register_operand" "wa,v"))
+ 	  (parallel [(const_int 2) (const_int 6)
+ 		     (const_int 3) (const_int 7)])))]
+-  "TARGET_ALTIVEC"
+  "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
+  "@
+   xxmrglw %x0,%x1,%x2
+   vmrglw %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglw_direct_<mode>_le"
+  [(set (match_operand:VSX_W 0 "register_operand" "=wa,v")
+	(vec_select:VSX_W
+	  (vec_concat:<VS_double>
+	    (match_operand:VSX_W 2 "register_operand" "wa,v")
+	    (match_operand:VSX_W 1 "register_operand" "wa,v"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
+   "@
+    xxmrglw %x0,%x1,%x2
+    vmrglw %0,%1,%2"
+@@ -3807,13 +3839,13 @@ (define_expand "vec_widen_umult_hi_v8hi"
+     {
+       emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+     }
+   else
+     {
+       emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+     }
+   DONE;
+ })
+@@ -3832,13 +3864,13 @@ (define_expand "vec_widen_umult_lo_v8hi"
+     {
+       emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+     }
+   else
+     {
+       emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+     }
+   DONE;
+ })
+@@ -3857,13 +3889,13 @@ (define_expand "vec_widen_smult_hi_v8hi"
+     {
+       emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+     }
+   else
+     {
+       emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+     }
+   DONE;
+ })
+@@ -3882,13 +3914,13 @@ (define_expand "vec_widen_smult_lo_v8hi"
+     {
+       emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+     }
+   else
+     {
+       emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+     }
+   DONE;
+ })
+diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
+index 9cfde1a52ea..b3d648312f1 100644
+--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
+@@ -23174,8 +23174,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
+ 		      : CODE_FOR_altivec_vmrglh_direct,
+      {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
+     {OPTION_MASK_ALTIVEC,
+-     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
+-		      : CODE_FOR_altivec_vmrglw_direct_v4si,
+     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be
+		      : CODE_FOR_altivec_vmrglw_direct_v4si_le,
+      {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
+     {OPTION_MASK_ALTIVEC,
+      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
+@@ -23186,8 +23186,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
+ 		      : CODE_FOR_altivec_vmrghh_direct,
+      {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
+     {OPTION_MASK_ALTIVEC,
+-     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
+-		      : CODE_FOR_altivec_vmrghw_direct_v4si,
+     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be
+		      : CODE_FOR_altivec_vmrghw_direct_v4si_le,
+      {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
+     {OPTION_MASK_P8_VECTOR,
+      BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
+diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
+index f70d69ee4b9..b9a1bfb5c16 100644
+--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
+@@ -4683,12 +4683,14 @@ (define_expand "vsx_xxmrghw_<mode>"
+ 		     (const_int 1) (const_int 5)])))]
+   "VECTOR_MEM_VSX_P (<MODE>mode)"
+ {
+-  rtx (*fun) (rtx, rtx, rtx);
+-  fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_<mode>
+-			 : gen_altivec_vmrglw_direct_<mode>;
+-  if (!BYTES_BIG_ENDIAN)
+-    std::swap (operands[1], operands[2]);
+-  emit_insn (fun (operands[0], operands[1], operands[2]));
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0],
+						  operands[1],
+						  operands[2]));
+  else
+    emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0],
+						  operands[2],
+						  operands[1]));
+   DONE;
+ }
+   [(set_attr "type" "vecperm")])
+@@ -4703,12 +4705,14 @@ (define_expand "vsx_xxmrglw_<mode>"
+ 		     (const_int 3) (const_int 7)])))]
+   "VECTOR_MEM_VSX_P (<MODE>mode)"
+ {
+-  rtx (*fun) (rtx, rtx, rtx);
+-  fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_<mode>
+-			 : gen_altivec_vmrghw_direct_<mode>;
+-  if (!BYTES_BIG_ENDIAN)
+-    std::swap (operands[1], operands[2]);
+-  emit_insn (fun (operands[0], operands[1], operands[2]));
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0],
+						  operands[1],
+						  operands[2]));
+  else
+    emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0],
+						  operands[2],
+						  operands[1]));
+   DONE;
+ }
+   [(set_attr "type" "vecperm")])
+diff --git a/gcc/testsuite/g++.target/powerpc/pr106069.C b/gcc/testsuite/g++.target/powerpc/pr106069.C
+new file mode 100644
+index 00000000000..537207d2fe8
+--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/pr106069.C
+@@ -0,0 +1,119 @@
+/* { dg-options "-O -fno-tree-forwprop -maltivec" } */
+/* { dg-require-effective-target vmx_hw } */
+/* { dg-do run } */
+
+typedef __attribute__ ((altivec (vector__))) unsigned native_simd_type;
+
+union
+{
+  native_simd_type V;
+  int R[4];
+} store_le_vec;
+
+struct S
+{
+  S () = default;
+  S (unsigned B0)
+  {
+    native_simd_type val{B0};
+    m_simd = val;
+  }
+  void store_le (unsigned int out[])
+  {
+    store_le_vec.V = m_simd;
+    unsigned int x0 = store_le_vec.R[0];
+    __builtin_memcpy (out, &x0, 4);
+  }
+  S rotl (unsigned int r)
+  {
+    native_simd_type rot{r};
+    return __builtin_vec_rl (m_simd, rot);
+  }
+  void operator+= (S other)
+  {
+    m_simd = __builtin_vec_add (m_simd, other.m_simd);
+  }
+  void operator^= (S other)
+  {
+    m_simd = __builtin_vec_xor (m_simd, other.m_simd);
+  }
+  static void transpose (S &B0, S B1, S B2, S B3)
+  {
+    native_simd_type T0 = __builtin_vec_mergeh (B0.m_simd, B2.m_simd);
+    native_simd_type T1 = __builtin_vec_mergeh (B1.m_simd, B3.m_simd);
+    native_simd_type T2 = __builtin_vec_mergel (B0.m_simd, B2.m_simd);
+    native_simd_type T3 = __builtin_vec_mergel (B1.m_simd, B3.m_simd);
+    B0 = __builtin_vec_mergeh (T0, T1);
+    B3 = __builtin_vec_mergel (T2, T3);
+  }
+  S (native_simd_type x) : m_simd (x) {}
+  native_simd_type m_simd;
+};
+
+void
+foo (unsigned int output[], unsigned state[])
+{
+  S R00 = state[0];
+  S R01 = state[0];
+  S R02 = state[2];
+  S R03 = state[0];
+  S R05 = state[5];
+  S R06 = state[6];
+  S R07 = state[7];
+  S R08 = state[8];
+  S R09 = state[9];
+  S R10 = state[10];
+  S R11 = state[11];
+  S R12 = state[12];
+  S R13 = state[13];
+  S R14 = state[4];
+  S R15 = state[15];
+  for (int r = 0; r != 10; ++r)
+    {
+      R09 += R13;
+      R11 += R15;
+      R05 ^= R09;
+      R06 ^= R10;
+      R07 ^= R11;
+      R07 = R07.rotl (7);
+      R00 += R05;
+      R01 += R06;
+      R02 += R07;
+      R15 ^= R00;
+      R12 ^= R01;
+      R13 ^= R02;
+      R00 += R05;
+      R01 += R06;
+      R02 += R07;
+      R15 ^= R00;
+      R12 = R12.rotl (8);
+      R13 = R13.rotl (8);
+      R10 += R15;
+      R11 += R12;
+      R08 += R13;
+      R09 += R14;
+      R05 ^= R10;
+      R06 ^= R11;
+      R07 ^= R08;
+      R05 = R05.rotl (7);
+      R06 = R06.rotl (7);
+      R07 = R07.rotl (7);
+    }
+  R00 += state[0];
+  S::transpose (R00, R01, R02, R03);
+  R00.store_le (output);
+}
+
+unsigned int res[1];
+unsigned main_state[]{1634760805, 60878,      2036477234, 6,
+		      0,	  825562964,  1471091955, 1346092787,
+		      506976774,  4197066702, 518848283,  118491664,
+		      0,	  0,	      0,	  0};
+int
+main ()
+{
+  foo (res, main_state);
+  if (res[0] != 0x41fcef98)
+    __builtin_abort ();
+  return 0;
+}
+diff --git a/gcc/testsuite/gcc.target/powerpc/pr115355.c b/gcc/testsuite/gcc.target/powerpc/pr115355.c
+new file mode 100644
+index 00000000000..8955126b808
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr115355.c
+@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-require-effective-target p9vector_hw } */
+/* Force vectorization with -fno-vect-cost-model to have vector unpack
+   which exposes the issue in PR115355.  */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -fno-vect-cost-model" } */
+
+/* Verify it runs successfully.  */
+
+__attribute__((noipa))
+void setToIdentityGOOD(unsigned long long *mVec, unsigned int mLen)
+{
+  #pragma GCC novector
+  for (unsigned int i = 0; i < mLen; i++)
+    mVec[i] = i;
+}
+
+__attribute__((noipa))
+void setToIdentityBAD(unsigned long long *mVec, unsigned int mLen)
+{
+  for (unsigned int i = 0; i < mLen; i++)
+    mVec[i] = i;
+}
+
+unsigned long long vec1[100];
+unsigned long long vec2[100];
+
+int main()
+{
+  unsigned int l = 29;
+  setToIdentityGOOD (vec1, 29);
+  setToIdentityBAD (vec2, 29);
+
+  if (__builtin_memcmp (vec1, vec2, l * sizeof (vec1[0])) != 0)
+    __builtin_abort ();
+
+  return 0;
+}
--- a/gcc13-vector-merge-2.patch
+++ b/gcc13-vector-merge-2.patch
@ -0,0 +1,240 @@
+commit ffdd377fc07cdc7b62669d354e23f30940eaaffe
+Author: Kewen Lin <linkw@linux.ibm.com>
+Date:   Wed Jun 26 02:16:17 2024 -0500
+
+    rs6000: Fix wrong RTL patterns for vector merge high/low char on LE
+    
+    Commit r12-4496 changes some define_expands and define_insns
+    for vector merge high/low char, which are altivec_vmrg[hl]b.
+    These defines are mainly for built-in function vec_merge{h,l}
+    and some internal gen function needs.  These functions should
+    consider endianness, taking vec_mergeh as example, as PVIPR
+    defines, vec_mergeh "Merges the first halves (in element order)
+    of two vectors", it does note it's in element order.  So it's
+    mapped into vmrghb on BE while vmrglb on LE respectively.
+    Although the mapped insns are different, as the discussion in
+    PR106069, the RTL pattern should be still the same, it is
+    conformed before commit r12-4496, but gets changed into
+    different patterns on BE and LE starting from commit r12-4496.
+    Similar to 32-bit element case in commit log of r15-1504, this
+    8-bit element pattern on LE doesn't actually match what the
+    underlying insn is intended to represent, once some optimization
+    like combine does some changes basing on it, it would cause
+    the unexpected consequence.  The newly constructed test case
+    pr106069-1.c is a typical example for this issue.
+    
+    So this patch is to fix the wrong RTL pattern, ensure the
+    associated RTL patterns become the same as before which can
+    have the same semantic as their mapped insns.  With the
+    proposed patch, the expanders like altivec_vmrghb expands
+    into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le
+    depending on endianness, "direct" can easily show which
+    insn would be generated, _be and _le are mainly for the
+    different RTL patterns as endianness.
+    
+    Co-authored-by: Xionghu Luo <xionghuluo@tencent.com>
+    
+            PR target/106069
+            PR target/115355
+    
+    gcc/ChangeLog:
+    
+            * config/rs6000/altivec.md (altivec_vmrghb_direct): Rename to ...
+            (altivec_vmrghb_direct_be): ... this.  Add condition BYTES_BIG_ENDIAN.
+            (altivec_vmrghb_direct_le): New define_insn.
+            (altivec_vmrglb_direct): Rename to ...
+            (altivec_vmrglb_direct_be): ... this.  Add condition BYTES_BIG_ENDIAN.
+            (altivec_vmrglb_direct_le): New define_insn.
+            (altivec_vmrghb): Adjust by calling gen_altivec_vmrghb_direct_be
+            for BE and gen_altivec_vmrglb_direct_le for LE.
+            (altivec_vmrglb): Adjust by calling gen_altivec_vmrglb_direct_be
+            for BE and gen_altivec_vmrghb_direct_le for LE.
+            * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace
+            CODE_FOR_altivec_vmrghb_direct by
+            CODE_FOR_altivec_vmrghb_direct_be for BE and
+            CODE_FOR_altivec_vmrghb_direct_le for LE.  And replace
+            CODE_FOR_altivec_vmrglb_direct by
+            CODE_FOR_altivec_vmrglb_direct_be for BE and
+            CODE_FOR_altivec_vmrglb_direct_le for LE.
+    
+    gcc/testsuite/ChangeLog:
+    
+            * gcc.target/powerpc/pr106069-1.c: New test.
+    
+    (cherry picked from commit 62520e4e9f7e2fe8a16ee57a4bd35da2e921ae22)
+
+diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
+index 92e2e4a4090..47664204bc5 100644
+--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
+@@ -1152,15 +1152,16 @@ (define_expand "altivec_vmrghb"
+    (use (match_operand:V16QI 2 "register_operand"))]
+   "TARGET_ALTIVEC"
+ {
+-  rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghb_direct
+-						: gen_altivec_vmrglb_direct;
+-  if (!BYTES_BIG_ENDIAN)
+-    std::swap (operands[1], operands[2]);
+-  emit_insn (fun (operands[0], operands[1], operands[2]));
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (
+      gen_altivec_vmrghb_direct_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (
+      gen_altivec_vmrglb_direct_le (operands[0], operands[2], operands[1]));
+   DONE;
+ })
+ 
+-(define_insn "altivec_vmrghb_direct"
+(define_insn "altivec_vmrghb_direct_be"
+   [(set (match_operand:V16QI 0 "register_operand" "=v")
+ 	(vec_select:V16QI
+ 	  (vec_concat:V32QI
+@@ -1174,7 +1175,25 @@ (define_insn "altivec_vmrghb_direct"
+ 		     (const_int 5) (const_int 21)
+ 		     (const_int 6) (const_int 22)
+ 		     (const_int 7) (const_int 23)])))]
+-  "TARGET_ALTIVEC"
+  "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
+  "vmrghb %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghb_direct_le"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 2 "register_operand" "v")
+	    (match_operand:V16QI 1 "register_operand" "v"))
+	  (parallel [(const_int  8) (const_int 24)
+		     (const_int  9) (const_int 25)
+		     (const_int 10) (const_int 26)
+		     (const_int 11) (const_int 27)
+		     (const_int 12) (const_int 28)
+		     (const_int 13) (const_int 29)
+		     (const_int 14) (const_int 30)
+		     (const_int 15) (const_int 31)])))]
+  "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
+   "vmrghb %0,%1,%2"
+   [(set_attr "type" "vecperm")])
+ 
+@@ -1274,15 +1293,16 @@ (define_expand "altivec_vmrglb"
+    (use (match_operand:V16QI 2 "register_operand"))]
+   "TARGET_ALTIVEC"
+ {
+-  rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglb_direct
+-						: gen_altivec_vmrghb_direct;
+-  if (!BYTES_BIG_ENDIAN)
+-    std::swap (operands[1], operands[2]);
+-  emit_insn (fun (operands[0], operands[1], operands[2]));
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (
+      gen_altivec_vmrglb_direct_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (
+      gen_altivec_vmrghb_direct_le (operands[0], operands[2], operands[1]));
+   DONE;
+ })
+ 
+-(define_insn "altivec_vmrglb_direct"
+(define_insn "altivec_vmrglb_direct_be"
+   [(set (match_operand:V16QI 0 "register_operand" "=v")
+ 	(vec_select:V16QI
+ 	  (vec_concat:V32QI
+@@ -1296,7 +1316,25 @@ (define_insn "altivec_vmrglb_direct"
+ 		     (const_int 13) (const_int 29)
+ 		     (const_int 14) (const_int 30)
+ 		     (const_int 15) (const_int 31)])))]
+-  "TARGET_ALTIVEC"
+  "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
+  "vmrglb %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglb_direct_le"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 2 "register_operand" "v")
+	    (match_operand:V16QI 1 "register_operand" "v"))
+	  (parallel [(const_int 0) (const_int 16)
+		     (const_int 1) (const_int 17)
+		     (const_int 2) (const_int 18)
+		     (const_int 3) (const_int 19)
+		     (const_int 4) (const_int 20)
+		     (const_int 5) (const_int 21)
+		     (const_int 6) (const_int 22)
+		     (const_int 7) (const_int 23)])))]
+  "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
+   "vmrglb %0,%1,%2"
+   [(set_attr "type" "vecperm")])
+ 
+diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
+index b3d648312f1..10088033aa1 100644
+--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
+@@ -23166,8 +23166,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
+      CODE_FOR_altivec_vpkuwum_direct,
+      {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
+     {OPTION_MASK_ALTIVEC,
+-     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
+-		      : CODE_FOR_altivec_vmrglb_direct,
+     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct_be
+		      : CODE_FOR_altivec_vmrglb_direct_le,
+      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
+     {OPTION_MASK_ALTIVEC,
+      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
+@@ -23178,8 +23178,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
+ 		      : CODE_FOR_altivec_vmrglw_direct_v4si_le,
+      {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
+     {OPTION_MASK_ALTIVEC,
+-     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
+-		      : CODE_FOR_altivec_vmrghb_direct,
+     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct_be
+		      : CODE_FOR_altivec_vmrghb_direct_le,
+      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
+     {OPTION_MASK_ALTIVEC,
+      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
+diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-1.c b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c
+new file mode 100644
+index 00000000000..4945d8fedfb
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c
+@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target vmx_hw } */
+
+/* Test vector merge for 8-bit element size,
+   it will abort if the RTL pattern isn't expected.  */
+
+#include "altivec.h"
+
+__attribute__((noipa))
+signed char elem_6 (vector signed char a, vector signed char b)
+{
+  vector signed char c = vec_mergeh (a,b);
+  return vec_extract (c, 6);
+}
+
+__attribute__((noipa))
+unsigned char elem_15 (vector unsigned char a, vector unsigned char b)
+{
+  vector unsigned char c = vec_mergel (a,b);
+  return vec_extract (c, 15);
+}
+
+int
+main ()
+{
+  vector unsigned char v1
+    = {3, 33, 22, 12, 34, 14, 5, 25, 30, 11, 0, 21, 17, 27, 38, 8};
+  vector unsigned char v2
+    = {81, 82, 83, 84, 68, 67, 66, 65, 99, 100, 101, 102, 250, 125, 0, 6};
+  signed char x1 = elem_6 ((vector signed char) v1, (vector signed char) v2);
+  unsigned char x2 = elem_15 (v1, v2);
+
+  if (x1 != 12 || x2 != 6)
+    __builtin_abort ();
+
+  return 0;
+}
+
--- a/gcc13-vector-merge-3.patch
+++ b/gcc13-vector-merge-3.patch
@ -0,0 +1,306 @@
+commit bab38d9271ce3f26cb64b8cb712351eb3fedd559
+Author: Kewen Lin <linkw@linux.ibm.com>
+Date:   Wed Jun 26 02:16:17 2024 -0500
+
+    rs6000: Fix wrong RTL patterns for vector merge high/low short on LE
+    
+    Commit r12-4496 changes some define_expands and define_insns
+    for vector merge high/low short, which are altivec_vmrg[hl]h.
+    These defines are mainly for built-in function vec_merge{h,l}
+    and some internal gen function needs.  These functions should
+    consider endianness, taking vec_mergeh as example, as PVIPR
+    defines, vec_mergeh "Merges the first halves (in element order)
+    of two vectors", it does note it's in element order.  So it's
+    mapped into vmrghh on BE while vmrglh on LE respectively.
+    Although the mapped insns are different, as the discussion in
+    PR106069, the RTL pattern should be still the same, it is
+    conformed before commit r12-4496, but gets changed into
+    different patterns on BE and LE starting from commit r12-4496.
+    Similar to 32-bit element case in commit log of r15-1504, this
+    16-bit element pattern on LE doesn't actually match what the
+    underlying insn is intended to represent, once some optimization
+    like combine does some changes basing on it, it would cause
+    the unexpected consequence.  The newly constructed test case
+    pr106069-2.c is a typical example for this issue on element type
+    short.
+    
+    So this patch is to fix the wrong RTL pattern, ensure the
+    associated RTL patterns become the same as before which can
+    have the same semantic as their mapped insns.  With the
+    proposed patch, the expanders like altivec_vmrghh expands
+    into altivec_vmrghh_direct_be or altivec_vmrglh_direct_le
+    depending on endianness, "direct" can easily show which
+    insn would be generated, _be and _le are mainly for the
+    different RTL patterns as endianness.
+    
+    Co-authored-by: Xionghu Luo <xionghuluo@tencent.com>
+    
+            PR target/106069
+            PR target/115355
+    
+    gcc/ChangeLog:
+    
+            * config/rs6000/altivec.md (altivec_vmrghh_direct): Rename to ...
+            (altivec_vmrghh_direct_be): ... this.  Add condition BYTES_BIG_ENDIAN.
+            (altivec_vmrghh_direct_le): New define_insn.
+            (altivec_vmrglh_direct): Rename to ...
+            (altivec_vmrglh_direct_be): ... this.  Add condition BYTES_BIG_ENDIAN.
+            (altivec_vmrglh_direct_le): New define_insn.
+            (altivec_vmrghh): Adjust by calling gen_altivec_vmrghh_direct_be
+            for BE and gen_altivec_vmrglh_direct_le for LE.
+            (altivec_vmrglh): Adjust by calling gen_altivec_vmrglh_direct_be
+            for BE and gen_altivec_vmrghh_direct_le for LE.
+            (vec_widen_umult_hi_v16qi): Adjust the call to
+            gen_altivec_vmrghh_direct by gen_altivec_vmrghh for BE
+            and by gen_altivec_vmrglh for LE.
+            (vec_widen_smult_hi_v16qi): Likewise.
+            (vec_widen_umult_lo_v16qi): Adjust the call to
+            gen_altivec_vmrglh_direct by gen_altivec_vmrglh for BE
+            and by gen_altivec_vmrghh for LE.
+            (vec_widen_smult_lo_v16qi): Likewise.
+            * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace
+            CODE_FOR_altivec_vmrghh_direct by
+            CODE_FOR_altivec_vmrghh_direct_be for BE and
+            CODE_FOR_altivec_vmrghh_direct_le for LE.  And replace
+            CODE_FOR_altivec_vmrglh_direct by
+            CODE_FOR_altivec_vmrglh_direct_be for BE and
+            CODE_FOR_altivec_vmrglh_direct_le for LE.
+    
+    gcc/testsuite/ChangeLog:
+    
+            * gcc.target/powerpc/pr106069-2.c: New test.
+    
+    (cherry picked from commit 812c70bf4981958488331d4ea5af8709b5321da1)
+
+diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
+index 47664204bc5..6557393a97c 100644
+--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
+@@ -1203,17 +1203,18 @@ (define_expand "altivec_vmrghh"
+    (use (match_operand:V8HI 2 "register_operand"))]
+   "TARGET_ALTIVEC"
+ {
+-  rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghh_direct
+-						: gen_altivec_vmrglh_direct;
+-  if (!BYTES_BIG_ENDIAN)
+-    std::swap (operands[1], operands[2]);
+-  emit_insn (fun (operands[0], operands[1], operands[2]));
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (
+      gen_altivec_vmrghh_direct_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (
+      gen_altivec_vmrglh_direct_le (operands[0], operands[2], operands[1]));
+   DONE;
+ })
+ 
+-(define_insn "altivec_vmrghh_direct"
+(define_insn "altivec_vmrghh_direct_be"
+   [(set (match_operand:V8HI 0 "register_operand" "=v")
+-        (vec_select:V8HI
+	(vec_select:V8HI
+ 	  (vec_concat:V16HI
+ 	    (match_operand:V8HI 1 "register_operand" "v")
+ 	    (match_operand:V8HI 2 "register_operand" "v"))
+@@ -1221,7 +1222,21 @@ (define_insn "altivec_vmrghh_direct"
+ 		     (const_int 1) (const_int 9)
+ 		     (const_int 2) (const_int 10)
+ 		     (const_int 3) (const_int 11)])))]
+-  "TARGET_ALTIVEC"
+  "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
+  "vmrghh %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghh_direct_le"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 2 "register_operand" "v")
+	    (match_operand:V8HI 1 "register_operand" "v"))
+	  (parallel [(const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
+   "vmrghh %0,%1,%2"
+   [(set_attr "type" "vecperm")])
+ 
+@@ -1344,15 +1359,16 @@ (define_expand "altivec_vmrglh"
+    (use (match_operand:V8HI 2 "register_operand"))]
+   "TARGET_ALTIVEC"
+ {
+-  rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglh_direct
+-						: gen_altivec_vmrghh_direct;
+-  if (!BYTES_BIG_ENDIAN)
+-    std::swap (operands[1], operands[2]);
+-  emit_insn (fun (operands[0], operands[1], operands[2]));
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (
+      gen_altivec_vmrglh_direct_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (
+      gen_altivec_vmrghh_direct_le (operands[0], operands[2], operands[1]));
+   DONE;
+ })
+ 
+-(define_insn "altivec_vmrglh_direct"
+(define_insn "altivec_vmrglh_direct_be"
+   [(set (match_operand:V8HI 0 "register_operand" "=v")
+         (vec_select:V8HI
+ 	  (vec_concat:V16HI
+@@ -1362,7 +1378,21 @@ (define_insn "altivec_vmrglh_direct"
+ 		     (const_int 5) (const_int 13)
+ 		     (const_int 6) (const_int 14)
+ 		     (const_int 7) (const_int 15)])))]
+-  "TARGET_ALTIVEC"
+  "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
+  "vmrglh %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglh_direct_le"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+	(vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 2 "register_operand" "v")
+	    (match_operand:V8HI 1 "register_operand" "v"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)])))]
+  "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
+   "vmrglh %0,%1,%2"
+   [(set_attr "type" "vecperm")])
+ 
+@@ -3777,13 +3807,13 @@ (define_expand "vec_widen_umult_hi_v16qi"
+     {
+       emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+     }
+   else
+     {
+       emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+     }
+   DONE;
+ })
+@@ -3802,13 +3832,13 @@ (define_expand "vec_widen_umult_lo_v16qi"
+     {
+       emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+     }
+   else
+     {
+       emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+     }
+   DONE;
+ })
+@@ -3827,13 +3857,13 @@ (define_expand "vec_widen_smult_hi_v16qi"
+     {
+       emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+     }
+   else
+     {
+       emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+     }
+   DONE;
+ })
+@@ -3852,13 +3882,13 @@ (define_expand "vec_widen_smult_lo_v16qi"
+     {
+       emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
+      emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+     }
+   else
+     {
+       emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
+-      emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
+      emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+     }
+   DONE;
+ })
+diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
+index 10088033aa1..76eb89ad529 100644
+--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
+@@ -23170,8 +23170,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
+ 		      : CODE_FOR_altivec_vmrglb_direct_le,
+      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
+     {OPTION_MASK_ALTIVEC,
+-     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
+-		      : CODE_FOR_altivec_vmrglh_direct,
+     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct_be
+		      : CODE_FOR_altivec_vmrglh_direct_le,
+      {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
+     {OPTION_MASK_ALTIVEC,
+      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be
+@@ -23182,8 +23182,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
+ 		      : CODE_FOR_altivec_vmrghb_direct_le,
+      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
+     {OPTION_MASK_ALTIVEC,
+-     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
+-		      : CODE_FOR_altivec_vmrghh_direct,
+     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct_be
+		      : CODE_FOR_altivec_vmrghh_direct_le,
+      {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
+     {OPTION_MASK_ALTIVEC,
+      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be
+diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-2.c b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c
+new file mode 100644
+index 00000000000..283e3290fb3
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c
+@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target vmx_hw } */
+
+/* Test vector merge for 16-bit element size,
+   it will abort if the RTL pattern isn't expected.  */
+
+#include "altivec.h"
+
+__attribute__((noipa))
+signed short elem_2 (vector signed short a, vector signed short b)
+{
+  vector signed short c = vec_mergeh (a,b);
+  return vec_extract (c, 2);
+}
+
+__attribute__((noipa))
+unsigned short elem_7 (vector unsigned short a, vector unsigned short b)
+{
+  vector unsigned short c = vec_mergel (a,b);
+  return vec_extract (c, 7);
+}
+
+int
+main ()
+{
+  vector unsigned short v1 = {3, 22, 12, 34, 5, 25, 30, 11};
+  vector unsigned short v2 = {84, 168, 267, 966, 65, 399, 999, 99};
+  signed short x1 = elem_2 ((vector signed short) v1, (vector signed short) v2);
+  unsigned short x2 = elem_7 (v1, v2);
+
+  if (x1 != 22 || x2 != 99)
+    __builtin_abort ();
+
+  return 0;
+}
+