241 lines
9.3 KiB
Diff
241 lines
9.3 KiB
Diff
|
commit ffdd377fc07cdc7b62669d354e23f30940eaaffe
|
||
|
Author: Kewen Lin <linkw@linux.ibm.com>
|
||
|
Date: Wed Jun 26 02:16:17 2024 -0500
|
||
|
|
||
|
rs6000: Fix wrong RTL patterns for vector merge high/low char on LE
|
||
|
|
||
|
Commit r12-4496 changes some define_expands and define_insns
|
||
|
for vector merge high/low char, which are altivec_vmrg[hl]b.
|
||
|
These defines are mainly for built-in function vec_merge{h,l}
|
||
|
and some internal gen function needs. These functions should
|
||
|
consider endianness, taking vec_mergeh as example, as PVIPR
|
||
|
defines, vec_mergeh "Merges the first halves (in element order)
|
||
|
of two vectors", it does note it's in element order. So it's
|
||
|
mapped into vmrghb on BE while vmrglb on LE respectively.
|
||
|
Although the mapped insns are different, as the discussion in
|
||
|
PR106069, the RTL pattern should be still the same, it is
|
||
|
conformed before commit r12-4496, but gets changed into
|
||
|
different patterns on BE and LE starting from commit r12-4496.
|
||
|
Similar to 32-bit element case in commit log of r15-1504, this
|
||
|
8-bit element pattern on LE doesn't actually match what the
|
||
|
underlying insn is intended to represent, once some optimization
|
||
|
like combine does some changes basing on it, it would cause
|
||
|
the unexpected consequence. The newly constructed test case
|
||
|
pr106069-1.c is a typical example for this issue.
|
||
|
|
||
|
So this patch is to fix the wrong RTL pattern, ensure the
|
||
|
associated RTL patterns become the same as before which can
|
||
|
have the same semantic as their mapped insns. With the
|
||
|
proposed patch, the expanders like altivec_vmrghb expands
|
||
|
into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le
|
||
|
depending on endianness, "direct" can easily show which
|
||
|
insn would be generated, _be and _le are mainly for the
|
||
|
different RTL patterns as endianness.
|
||
|
|
||
|
Co-authored-by: Xionghu Luo <xionghuluo@tencent.com>
|
||
|
|
||
|
PR target/106069
|
||
|
PR target/115355
|
||
|
|
||
|
gcc/ChangeLog:
|
||
|
|
||
|
* config/rs6000/altivec.md (altivec_vmrghb_direct): Rename to ...
|
||
|
(altivec_vmrghb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN.
|
||
|
(altivec_vmrghb_direct_le): New define_insn.
|
||
|
(altivec_vmrglb_direct): Rename to ...
|
||
|
(altivec_vmrglb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN.
|
||
|
(altivec_vmrglb_direct_le): New define_insn.
|
||
|
(altivec_vmrghb): Adjust by calling gen_altivec_vmrghb_direct_be
|
||
|
for BE and gen_altivec_vmrglb_direct_le for LE.
|
||
|
(altivec_vmrglb): Adjust by calling gen_altivec_vmrglb_direct_be
|
||
|
for BE and gen_altivec_vmrghb_direct_le for LE.
|
||
|
* config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace
|
||
|
CODE_FOR_altivec_vmrghb_direct by
|
||
|
CODE_FOR_altivec_vmrghb_direct_be for BE and
|
||
|
CODE_FOR_altivec_vmrghb_direct_le for LE. And replace
|
||
|
CODE_FOR_altivec_vmrglb_direct by
|
||
|
CODE_FOR_altivec_vmrglb_direct_be for BE and
|
||
|
CODE_FOR_altivec_vmrglb_direct_le for LE.
|
||
|
|
||
|
gcc/testsuite/ChangeLog:
|
||
|
|
||
|
* gcc.target/powerpc/pr106069-1.c: New test.
|
||
|
|
||
|
(cherry picked from commit 62520e4e9f7e2fe8a16ee57a4bd35da2e921ae22)
|
||
|
|
||
|
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
|
||
|
index 92e2e4a4090..47664204bc5 100644
|
||
|
--- a/gcc/config/rs6000/altivec.md
|
||
|
+++ b/gcc/config/rs6000/altivec.md
|
||
|
@@ -1152,15 +1152,16 @@ (define_expand "altivec_vmrghb"
|
||
|
(use (match_operand:V16QI 2 "register_operand"))]
|
||
|
"TARGET_ALTIVEC"
|
||
|
{
|
||
|
- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghb_direct
|
||
|
- : gen_altivec_vmrglb_direct;
|
||
|
- if (!BYTES_BIG_ENDIAN)
|
||
|
- std::swap (operands[1], operands[2]);
|
||
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
||
|
+ if (BYTES_BIG_ENDIAN)
|
||
|
+ emit_insn (
|
||
|
+ gen_altivec_vmrghb_direct_be (operands[0], operands[1], operands[2]));
|
||
|
+ else
|
||
|
+ emit_insn (
|
||
|
+ gen_altivec_vmrglb_direct_le (operands[0], operands[2], operands[1]));
|
||
|
DONE;
|
||
|
})
|
||
|
|
||
|
-(define_insn "altivec_vmrghb_direct"
|
||
|
+(define_insn "altivec_vmrghb_direct_be"
|
||
|
[(set (match_operand:V16QI 0 "register_operand" "=v")
|
||
|
(vec_select:V16QI
|
||
|
(vec_concat:V32QI
|
||
|
@@ -1174,7 +1175,25 @@ (define_insn "altivec_vmrghb_direct"
|
||
|
(const_int 5) (const_int 21)
|
||
|
(const_int 6) (const_int 22)
|
||
|
(const_int 7) (const_int 23)])))]
|
||
|
- "TARGET_ALTIVEC"
|
||
|
+ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
|
||
|
+ "vmrghb %0,%1,%2"
|
||
|
+ [(set_attr "type" "vecperm")])
|
||
|
+
|
||
|
+(define_insn "altivec_vmrghb_direct_le"
|
||
|
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
|
||
|
+ (vec_select:V16QI
|
||
|
+ (vec_concat:V32QI
|
||
|
+ (match_operand:V16QI 2 "register_operand" "v")
|
||
|
+ (match_operand:V16QI 1 "register_operand" "v"))
|
||
|
+ (parallel [(const_int 8) (const_int 24)
|
||
|
+ (const_int 9) (const_int 25)
|
||
|
+ (const_int 10) (const_int 26)
|
||
|
+ (const_int 11) (const_int 27)
|
||
|
+ (const_int 12) (const_int 28)
|
||
|
+ (const_int 13) (const_int 29)
|
||
|
+ (const_int 14) (const_int 30)
|
||
|
+ (const_int 15) (const_int 31)])))]
|
||
|
+ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
|
||
|
"vmrghb %0,%1,%2"
|
||
|
[(set_attr "type" "vecperm")])
|
||
|
|
||
|
@@ -1274,15 +1293,16 @@ (define_expand "altivec_vmrglb"
|
||
|
(use (match_operand:V16QI 2 "register_operand"))]
|
||
|
"TARGET_ALTIVEC"
|
||
|
{
|
||
|
- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglb_direct
|
||
|
- : gen_altivec_vmrghb_direct;
|
||
|
- if (!BYTES_BIG_ENDIAN)
|
||
|
- std::swap (operands[1], operands[2]);
|
||
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
||
|
+ if (BYTES_BIG_ENDIAN)
|
||
|
+ emit_insn (
|
||
|
+ gen_altivec_vmrglb_direct_be (operands[0], operands[1], operands[2]));
|
||
|
+ else
|
||
|
+ emit_insn (
|
||
|
+ gen_altivec_vmrghb_direct_le (operands[0], operands[2], operands[1]));
|
||
|
DONE;
|
||
|
})
|
||
|
|
||
|
-(define_insn "altivec_vmrglb_direct"
|
||
|
+(define_insn "altivec_vmrglb_direct_be"
|
||
|
[(set (match_operand:V16QI 0 "register_operand" "=v")
|
||
|
(vec_select:V16QI
|
||
|
(vec_concat:V32QI
|
||
|
@@ -1296,7 +1316,25 @@ (define_insn "altivec_vmrglb_direct"
|
||
|
(const_int 13) (const_int 29)
|
||
|
(const_int 14) (const_int 30)
|
||
|
(const_int 15) (const_int 31)])))]
|
||
|
- "TARGET_ALTIVEC"
|
||
|
+ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
|
||
|
+ "vmrglb %0,%1,%2"
|
||
|
+ [(set_attr "type" "vecperm")])
|
||
|
+
|
||
|
+(define_insn "altivec_vmrglb_direct_le"
|
||
|
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
|
||
|
+ (vec_select:V16QI
|
||
|
+ (vec_concat:V32QI
|
||
|
+ (match_operand:V16QI 2 "register_operand" "v")
|
||
|
+ (match_operand:V16QI 1 "register_operand" "v"))
|
||
|
+ (parallel [(const_int 0) (const_int 16)
|
||
|
+ (const_int 1) (const_int 17)
|
||
|
+ (const_int 2) (const_int 18)
|
||
|
+ (const_int 3) (const_int 19)
|
||
|
+ (const_int 4) (const_int 20)
|
||
|
+ (const_int 5) (const_int 21)
|
||
|
+ (const_int 6) (const_int 22)
|
||
|
+ (const_int 7) (const_int 23)])))]
|
||
|
+ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
|
||
|
"vmrglb %0,%1,%2"
|
||
|
[(set_attr "type" "vecperm")])
|
||
|
|
||
|
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
|
||
|
index b3d648312f1..10088033aa1 100644
|
||
|
--- a/gcc/config/rs6000/rs6000.cc
|
||
|
+++ b/gcc/config/rs6000/rs6000.cc
|
||
|
@@ -23166,8 +23166,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
|
||
|
CODE_FOR_altivec_vpkuwum_direct,
|
||
|
{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
|
||
|
{OPTION_MASK_ALTIVEC,
|
||
|
- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
|
||
|
- : CODE_FOR_altivec_vmrglb_direct,
|
||
|
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct_be
|
||
|
+ : CODE_FOR_altivec_vmrglb_direct_le,
|
||
|
{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
|
||
|
{OPTION_MASK_ALTIVEC,
|
||
|
BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
|
||
|
@@ -23178,8 +23178,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
|
||
|
: CODE_FOR_altivec_vmrglw_direct_v4si_le,
|
||
|
{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
|
||
|
{OPTION_MASK_ALTIVEC,
|
||
|
- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
|
||
|
- : CODE_FOR_altivec_vmrghb_direct,
|
||
|
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct_be
|
||
|
+ : CODE_FOR_altivec_vmrghb_direct_le,
|
||
|
{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
|
||
|
{OPTION_MASK_ALTIVEC,
|
||
|
BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
|
||
|
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-1.c b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c
|
||
|
new file mode 100644
|
||
|
index 00000000000..4945d8fedfb
|
||
|
--- /dev/null
|
||
|
+++ b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c
|
||
|
@@ -0,0 +1,39 @@
|
||
|
+/* { dg-do run } */
|
||
|
+/* { dg-options "-O2" } */
|
||
|
+/* { dg-require-effective-target vmx_hw } */
|
||
|
+
|
||
|
+/* Test vector merge for 8-bit element size,
|
||
|
+ it will abort if the RTL pattern isn't expected. */
|
||
|
+
|
||
|
+#include "altivec.h"
|
||
|
+
|
||
|
+__attribute__((noipa))
|
||
|
+signed char elem_6 (vector signed char a, vector signed char b)
|
||
|
+{
|
||
|
+ vector signed char c = vec_mergeh (a,b);
|
||
|
+ return vec_extract (c, 6);
|
||
|
+}
|
||
|
+
|
||
|
+__attribute__((noipa))
|
||
|
+unsigned char elem_15 (vector unsigned char a, vector unsigned char b)
|
||
|
+{
|
||
|
+ vector unsigned char c = vec_mergel (a,b);
|
||
|
+ return vec_extract (c, 15);
|
||
|
+}
|
||
|
+
|
||
|
+int
|
||
|
+main ()
|
||
|
+{
|
||
|
+ vector unsigned char v1
|
||
|
+ = {3, 33, 22, 12, 34, 14, 5, 25, 30, 11, 0, 21, 17, 27, 38, 8};
|
||
|
+ vector unsigned char v2
|
||
|
+ = {81, 82, 83, 84, 68, 67, 66, 65, 99, 100, 101, 102, 250, 125, 0, 6};
|
||
|
+ signed char x1 = elem_6 ((vector signed char) v1, (vector signed char) v2);
|
||
|
+ unsigned char x2 = elem_15 (v1, v2);
|
||
|
+
|
||
|
+ if (x1 != 12 || x2 != 6)
|
||
|
+ __builtin_abort ();
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|