Compare commits
No commits in common. "c8-beta" and "c8" have entirely different histories.
107
SOURCES/gcc12-pr113960.patch
Normal file
107
SOURCES/gcc12-pr113960.patch
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
commit 6f5dcea85a31845ec6f4b6886734b0f02e013718
|
||||||
|
Author: Jonathan Wakely <jwakely@redhat.com>
|
||||||
|
Date: Tue Feb 27 17:50:34 2024 +0000
|
||||||
|
|
||||||
|
libstdc++: Fix conditions for using memcmp in std::lexicographical_compare_three_way [PR113960]
|
||||||
|
|
||||||
|
The change in r11-2981-g2f983fa69005b6 meant that
|
||||||
|
std::lexicographical_compare_three_way started to use memcmp for
|
||||||
|
unsigned integers on big endian targets, but for that to be valid we
|
||||||
|
need the two value types to have the same size and we need to use that
|
||||||
|
size to compute the length passed to memcmp.
|
||||||
|
|
||||||
|
I already defined a __is_memcmp_ordered_with trait that does the right
|
||||||
|
checks, std::lexicographical_compare_three_way just needs to use it.
|
||||||
|
|
||||||
|
libstdc++-v3/ChangeLog:
|
||||||
|
|
||||||
|
PR libstdc++/113960
|
||||||
|
* include/bits/stl_algobase.h (__is_byte_iter): Replace with ...
|
||||||
|
(__memcmp_ordered_with): New concept.
|
||||||
|
(lexicographical_compare_three_way): Use __memcmp_ordered_with
|
||||||
|
instead of __is_byte_iter. Use correct length for memcmp.
|
||||||
|
* testsuite/25_algorithms/lexicographical_compare_three_way/113960.cc:
|
||||||
|
New test.
|
||||||
|
|
||||||
|
(cherry picked from commit f5cdda8acb06c20335855ed353ab9a441c12128a)
|
||||||
|
|
||||||
|
diff --git a/libstdc++-v3/include/bits/stl_algobase.h b/libstdc++-v3/include/bits/stl_algobase.h
|
||||||
|
index 7664301a208..6e648e48ad0 100644
|
||||||
|
--- a/libstdc++-v3/include/bits/stl_algobase.h
|
||||||
|
+++ b/libstdc++-v3/include/bits/stl_algobase.h
|
||||||
|
@@ -1780,11 +1780,14 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
|
||||||
|
}
|
||||||
|
|
||||||
|
#if __cpp_lib_three_way_comparison
|
||||||
|
- // Iter points to a contiguous range of unsigned narrow character type
|
||||||
|
- // or std::byte, suitable for comparison by memcmp.
|
||||||
|
- template<typename _Iter>
|
||||||
|
- concept __is_byte_iter = contiguous_iterator<_Iter>
|
||||||
|
- && __is_memcmp_ordered<iter_value_t<_Iter>>::__value;
|
||||||
|
+ // Both iterators refer to contiguous ranges of unsigned narrow characters,
|
||||||
|
+ // or std::byte, or big-endian unsigned integers, suitable for comparison
|
||||||
|
+ // using memcmp.
|
||||||
|
+ template<typename _Iter1, typename _Iter2>
|
||||||
|
+ concept __memcmp_ordered_with
|
||||||
|
+ = (__is_memcmp_ordered_with<iter_value_t<_Iter1>,
|
||||||
|
+ iter_value_t<_Iter2>>::__value)
|
||||||
|
+ && contiguous_iterator<_Iter1> && contiguous_iterator<_Iter2>;
|
||||||
|
|
||||||
|
// Return a struct with two members, initialized to the smaller of x and y
|
||||||
|
// (or x if they compare equal) and the result of the comparison x <=> y.
|
||||||
|
@@ -1834,20 +1837,20 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
|
||||||
|
if (!std::__is_constant_evaluated())
|
||||||
|
if constexpr (same_as<_Comp, __detail::_Synth3way>
|
||||||
|
|| same_as<_Comp, compare_three_way>)
|
||||||
|
- if constexpr (__is_byte_iter<_InputIter1>)
|
||||||
|
- if constexpr (__is_byte_iter<_InputIter2>)
|
||||||
|
- {
|
||||||
|
- const auto [__len, __lencmp] = _GLIBCXX_STD_A::
|
||||||
|
- __min_cmp(__last1 - __first1, __last2 - __first2);
|
||||||
|
- if (__len)
|
||||||
|
- {
|
||||||
|
- const auto __c
|
||||||
|
- = __builtin_memcmp(&*__first1, &*__first2, __len) <=> 0;
|
||||||
|
- if (__c != 0)
|
||||||
|
- return __c;
|
||||||
|
- }
|
||||||
|
- return __lencmp;
|
||||||
|
- }
|
||||||
|
+ if constexpr (__memcmp_ordered_with<_InputIter1, _InputIter2>)
|
||||||
|
+ {
|
||||||
|
+ const auto [__len, __lencmp] = _GLIBCXX_STD_A::
|
||||||
|
+ __min_cmp(__last1 - __first1, __last2 - __first2);
|
||||||
|
+ if (__len)
|
||||||
|
+ {
|
||||||
|
+ const auto __blen = __len * sizeof(*__first1);
|
||||||
|
+ const auto __c
|
||||||
|
+ = __builtin_memcmp(&*__first1, &*__first2, __blen) <=> 0;
|
||||||
|
+ if (__c != 0)
|
||||||
|
+ return __c;
|
||||||
|
+ }
|
||||||
|
+ return __lencmp;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
while (__first1 != __last1)
|
||||||
|
{
|
||||||
|
diff --git a/libstdc++-v3/testsuite/25_algorithms/lexicographical_compare_three_way/113960.cc b/libstdc++-v3/testsuite/25_algorithms/lexicographical_compare_three_way/113960.cc
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..d51ae1a3d50
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/libstdc++-v3/testsuite/25_algorithms/lexicographical_compare_three_way/113960.cc
|
||||||
|
@@ -0,0 +1,15 @@
|
||||||
|
+// { dg-do run { target c++20 } }
|
||||||
|
+
|
||||||
|
+// PR libstdc++/113960
|
||||||
|
+// std::map with std::vector as input overwrites itself with c++20, on s390x
|
||||||
|
+
|
||||||
|
+#include <algorithm>
|
||||||
|
+#include <testsuite_hooks.h>
|
||||||
|
+
|
||||||
|
+int main()
|
||||||
|
+{
|
||||||
|
+ unsigned short a1[] { 1, 2, 3 };
|
||||||
|
+ unsigned short a2[] { 1, 2, 4 };
|
||||||
|
+ // Incorrect memcmp comparison for big endian targets.
|
||||||
|
+ VERIFY( std::lexicographical_compare_three_way(a1, a1+3, a2, a2+3) < 0 );
|
||||||
|
+}
|
522
SOURCES/gcc12-vector-merge-1.patch
Normal file
522
SOURCES/gcc12-vector-merge-1.patch
Normal file
@ -0,0 +1,522 @@
|
|||||||
|
commit 96ef3367067219c8e3eb88c0474a1090cc7749b4
|
||||||
|
Author: Kewen Lin <linkw@linux.ibm.com>
|
||||||
|
Date: Thu Jun 20 20:23:56 2024 -0500
|
||||||
|
|
||||||
|
rs6000: Fix wrong RTL patterns for vector merge high/low word on LE
|
||||||
|
|
||||||
|
Commit r12-4496 changes some define_expands and define_insns
|
||||||
|
for vector merge high/low word, which are altivec_vmrg[hl]w,
|
||||||
|
vsx_xxmrg[hl]w_<VSX_W:mode>. These defines are mainly for
|
||||||
|
built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw,
|
||||||
|
__builtin_vsx_xxmrghw_4si and some internal gen function
|
||||||
|
needs. These functions should consider endianness, taking
|
||||||
|
vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges
|
||||||
|
the first halves (in element order) of two vectors", it does
|
||||||
|
note it's in element order. So it's mapped into vmrghw on
|
||||||
|
BE while vmrglw on LE respectively. Although the mapped
|
||||||
|
insns are different, as the discussion in PR106069, the RTL
|
||||||
|
pattern should be still the same, it is conformed before
|
||||||
|
commit r12-4496, define_expand altivec_vmrghw got expanded
|
||||||
|
into:
|
||||||
|
|
||||||
|
(vec_select:VSX_W
|
||||||
|
(vec_concat:<VS_double>
|
||||||
|
(match_operand:VSX_W 1 "register_operand" "wa,v")
|
||||||
|
(match_operand:VSX_W 2 "register_operand" "wa,v"))
|
||||||
|
(parallel [(const_int 0) (const_int 4)
|
||||||
|
(const_int 1) (const_int 5)])))]
|
||||||
|
|
||||||
|
on both BE and LE then. But commit r12-4496 changed it to
|
||||||
|
expand into:
|
||||||
|
|
||||||
|
(vec_select:VSX_W
|
||||||
|
(vec_concat:<VS_double>
|
||||||
|
(match_operand:VSX_W 1 "register_operand" "wa,v")
|
||||||
|
(match_operand:VSX_W 2 "register_operand" "wa,v"))
|
||||||
|
(parallel [(const_int 0) (const_int 4)
|
||||||
|
(const_int 1) (const_int 5)])))]
|
||||||
|
|
||||||
|
on BE, and
|
||||||
|
|
||||||
|
(vec_select:VSX_W
|
||||||
|
(vec_concat:<VS_double>
|
||||||
|
(match_operand:VSX_W 1 "register_operand" "wa,v")
|
||||||
|
(match_operand:VSX_W 2 "register_operand" "wa,v"))
|
||||||
|
(parallel [(const_int 2) (const_int 6)
|
||||||
|
(const_int 3) (const_int 7)])))]
|
||||||
|
|
||||||
|
on LE, although the mapped insn are still vmrghw on BE and
|
||||||
|
vmrglw on LE, the associated RTL pattern is completely
|
||||||
|
wrong and inconsistent with the mapped insn. If optimization
|
||||||
|
passes leave this pattern alone, even if its pattern doesn't
|
||||||
|
represent its mapped insn, it's still fine, that's why simple
|
||||||
|
testing on bif doesn't expose this issue. But once some
|
||||||
|
optimization pass such as combine does some changes basing
|
||||||
|
on this wrong pattern, because the pattern doesn't match the
|
||||||
|
semantics that the expanded insn is intended to represent,
|
||||||
|
it would cause the unexpected result.
|
||||||
|
|
||||||
|
So this patch is to fix the wrong RTL pattern, ensure the
|
||||||
|
associated RTL patterns become the same as before which can
|
||||||
|
have the same semantic as their mapped insns. With the
|
||||||
|
proposed patch, the expanders like altivec_vmrghw expands
|
||||||
|
into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le
|
||||||
|
depending on endianness, "direct" can easily show which
|
||||||
|
insn would be generated, _be and _le are mainly for the
|
||||||
|
different RTL patterns as endianness.
|
||||||
|
|
||||||
|
Co-authored-by: Xionghu Luo <xionghuluo@tencent.com>
|
||||||
|
|
||||||
|
PR target/106069
|
||||||
|
PR target/115355
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/rs6000/altivec.md (altivec_vmrghw_direct_<VSX_W:mode>): Rename
|
||||||
|
to ...
|
||||||
|
(altivec_vmrghw_direct_<VSX_W:mode>_be): ... this. Add the condition
|
||||||
|
BYTES_BIG_ENDIAN.
|
||||||
|
(altivec_vmrghw_direct_<VSX_W:mode>_le): New define_insn.
|
||||||
|
(altivec_vmrglw_direct_<VSX_W:mode>): Rename to ...
|
||||||
|
(altivec_vmrglw_direct_<VSX_W:mode>_be): ... this. Add the condition
|
||||||
|
BYTES_BIG_ENDIAN.
|
||||||
|
(altivec_vmrglw_direct_<VSX_W:mode>_le): New define_insn.
|
||||||
|
(altivec_vmrghw): Adjust by calling gen_altivec_vmrghw_direct_v4si_be
|
||||||
|
for BE and gen_altivec_vmrglw_direct_v4si_le for LE.
|
||||||
|
(altivec_vmrglw): Adjust by calling gen_altivec_vmrglw_direct_v4si_be
|
||||||
|
for BE and gen_altivec_vmrghw_direct_v4si_le for LE.
|
||||||
|
(vec_widen_umult_hi_v8hi): Adjust the call to
|
||||||
|
gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE
|
||||||
|
and by gen_altivec_vmrglw for LE.
|
||||||
|
(vec_widen_smult_hi_v8hi): Likewise.
|
||||||
|
(vec_widen_umult_lo_v8hi): Adjust the call to
|
||||||
|
gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE
|
||||||
|
and by gen_altivec_vmrghw for LE
|
||||||
|
(vec_widen_smult_lo_v8hi): Likewise.
|
||||||
|
* config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace
|
||||||
|
CODE_FOR_altivec_vmrghw_direct_v4si by
|
||||||
|
CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and
|
||||||
|
CODE_FOR_altivec_vmrghw_direct_v4si_le for LE. And replace
|
||||||
|
CODE_FOR_altivec_vmrglw_direct_v4si by
|
||||||
|
CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and
|
||||||
|
CODE_FOR_altivec_vmrglw_direct_v4si_le for LE.
|
||||||
|
* config/rs6000/vsx.md (vsx_xxmrghw_<VSX_W:mode>): Adjust by calling
|
||||||
|
gen_altivec_vmrghw_direct_v4si_be for BE and
|
||||||
|
gen_altivec_vmrglw_direct_v4si_le for LE.
|
||||||
|
(vsx_xxmrglw_<VSX_W:mode>): Adjust by calling
|
||||||
|
gen_altivec_vmrglw_direct_v4si_be for BE and
|
||||||
|
gen_altivec_vmrghw_direct_v4si_le for LE.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* g++.target/powerpc/pr106069.C: New test.
|
||||||
|
* gcc.target/powerpc/pr115355.c: New test.
|
||||||
|
|
||||||
|
(cherry picked from commit 52c112800d9f44457c4832309a48c00945811313)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
|
||||||
|
index 3849db5ca3c..0c408a9e839 100644
|
||||||
|
--- a/gcc/config/rs6000/altivec.md
|
||||||
|
+++ b/gcc/config/rs6000/altivec.md
|
||||||
|
@@ -1212,16 +1212,18 @@ (define_expand "altivec_vmrghw"
|
||||||
|
(use (match_operand:V4SI 2 "register_operand"))]
|
||||||
|
"VECTOR_MEM_ALTIVEC_P (V4SImode)"
|
||||||
|
{
|
||||||
|
- rtx (*fun) (rtx, rtx, rtx);
|
||||||
|
- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_v4si
|
||||||
|
- : gen_altivec_vmrglw_direct_v4si;
|
||||||
|
- if (!BYTES_BIG_ENDIAN)
|
||||||
|
- std::swap (operands[1], operands[2]);
|
||||||
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
||||||
|
+ if (BYTES_BIG_ENDIAN)
|
||||||
|
+ emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0],
|
||||||
|
+ operands[1],
|
||||||
|
+ operands[2]));
|
||||||
|
+ else
|
||||||
|
+ emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0],
|
||||||
|
+ operands[2],
|
||||||
|
+ operands[1]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
-(define_insn "altivec_vmrghw_direct_<mode>"
|
||||||
|
+(define_insn "altivec_vmrghw_direct_<mode>_be"
|
||||||
|
[(set (match_operand:VSX_W 0 "register_operand" "=wa,v")
|
||||||
|
(vec_select:VSX_W
|
||||||
|
(vec_concat:<VS_double>
|
||||||
|
@@ -1229,7 +1231,21 @@ (define_insn "altivec_vmrghw_direct_<mode>"
|
||||||
|
(match_operand:VSX_W 2 "register_operand" "wa,v"))
|
||||||
|
(parallel [(const_int 0) (const_int 4)
|
||||||
|
(const_int 1) (const_int 5)])))]
|
||||||
|
- "TARGET_ALTIVEC"
|
||||||
|
+ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
|
||||||
|
+ "@
|
||||||
|
+ xxmrghw %x0,%x1,%x2
|
||||||
|
+ vmrghw %0,%1,%2"
|
||||||
|
+ [(set_attr "type" "vecperm")])
|
||||||
|
+
|
||||||
|
+(define_insn "altivec_vmrghw_direct_<mode>_le"
|
||||||
|
+ [(set (match_operand:VSX_W 0 "register_operand" "=wa,v")
|
||||||
|
+ (vec_select:VSX_W
|
||||||
|
+ (vec_concat:<VS_double>
|
||||||
|
+ (match_operand:VSX_W 2 "register_operand" "wa,v")
|
||||||
|
+ (match_operand:VSX_W 1 "register_operand" "wa,v"))
|
||||||
|
+ (parallel [(const_int 2) (const_int 6)
|
||||||
|
+ (const_int 3) (const_int 7)])))]
|
||||||
|
+ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
|
||||||
|
"@
|
||||||
|
xxmrghw %x0,%x1,%x2
|
||||||
|
vmrghw %0,%1,%2"
|
||||||
|
@@ -1318,16 +1334,18 @@ (define_expand "altivec_vmrglw"
|
||||||
|
(use (match_operand:V4SI 2 "register_operand"))]
|
||||||
|
"VECTOR_MEM_ALTIVEC_P (V4SImode)"
|
||||||
|
{
|
||||||
|
- rtx (*fun) (rtx, rtx, rtx);
|
||||||
|
- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_v4si
|
||||||
|
- : gen_altivec_vmrghw_direct_v4si;
|
||||||
|
- if (!BYTES_BIG_ENDIAN)
|
||||||
|
- std::swap (operands[1], operands[2]);
|
||||||
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
||||||
|
+ if (BYTES_BIG_ENDIAN)
|
||||||
|
+ emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0],
|
||||||
|
+ operands[1],
|
||||||
|
+ operands[2]));
|
||||||
|
+ else
|
||||||
|
+ emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0],
|
||||||
|
+ operands[2],
|
||||||
|
+ operands[1]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
-(define_insn "altivec_vmrglw_direct_<mode>"
|
||||||
|
+(define_insn "altivec_vmrglw_direct_<mode>_be"
|
||||||
|
[(set (match_operand:VSX_W 0 "register_operand" "=wa,v")
|
||||||
|
(vec_select:VSX_W
|
||||||
|
(vec_concat:<VS_double>
|
||||||
|
@@ -1335,7 +1353,21 @@ (define_insn "altivec_vmrglw_direct_<mode>"
|
||||||
|
(match_operand:VSX_W 2 "register_operand" "wa,v"))
|
||||||
|
(parallel [(const_int 2) (const_int 6)
|
||||||
|
(const_int 3) (const_int 7)])))]
|
||||||
|
- "TARGET_ALTIVEC"
|
||||||
|
+ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
|
||||||
|
+ "@
|
||||||
|
+ xxmrglw %x0,%x1,%x2
|
||||||
|
+ vmrglw %0,%1,%2"
|
||||||
|
+ [(set_attr "type" "vecperm")])
|
||||||
|
+
|
||||||
|
+(define_insn "altivec_vmrglw_direct_<mode>_le"
|
||||||
|
+ [(set (match_operand:VSX_W 0 "register_operand" "=wa,v")
|
||||||
|
+ (vec_select:VSX_W
|
||||||
|
+ (vec_concat:<VS_double>
|
||||||
|
+ (match_operand:VSX_W 2 "register_operand" "wa,v")
|
||||||
|
+ (match_operand:VSX_W 1 "register_operand" "wa,v"))
|
||||||
|
+ (parallel [(const_int 0) (const_int 4)
|
||||||
|
+ (const_int 1) (const_int 5)])))]
|
||||||
|
+ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
|
||||||
|
"@
|
||||||
|
xxmrglw %x0,%x1,%x2
|
||||||
|
vmrglw %0,%1,%2"
|
||||||
|
@@ -3807,13 +3839,13 @@ (define_expand "vec_widen_umult_hi_v8hi"
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo));
|
||||||
|
+ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve));
|
||||||
|
+ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
@@ -3832,13 +3864,13 @@ (define_expand "vec_widen_umult_lo_v8hi"
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo));
|
||||||
|
+ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve));
|
||||||
|
+ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
@@ -3857,13 +3889,13 @@ (define_expand "vec_widen_smult_hi_v8hi"
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo));
|
||||||
|
+ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve));
|
||||||
|
+ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
@@ -3882,13 +3914,13 @@ (define_expand "vec_widen_smult_lo_v8hi"
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo));
|
||||||
|
+ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve));
|
||||||
|
+ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
|
||||||
|
index f5db6436dfa..23b553131a9 100644
|
||||||
|
--- a/gcc/config/rs6000/rs6000.cc
|
||||||
|
+++ b/gcc/config/rs6000/rs6000.cc
|
||||||
|
@@ -22979,8 +22979,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
|
||||||
|
: CODE_FOR_altivec_vmrglh_direct,
|
||||||
|
{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
|
||||||
|
{OPTION_MASK_ALTIVEC,
|
||||||
|
- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
|
||||||
|
- : CODE_FOR_altivec_vmrglw_direct_v4si,
|
||||||
|
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be
|
||||||
|
+ : CODE_FOR_altivec_vmrglw_direct_v4si_le,
|
||||||
|
{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
|
||||||
|
{OPTION_MASK_ALTIVEC,
|
||||||
|
BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
|
||||||
|
@@ -22991,8 +22991,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
|
||||||
|
: CODE_FOR_altivec_vmrghh_direct,
|
||||||
|
{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
|
||||||
|
{OPTION_MASK_ALTIVEC,
|
||||||
|
- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
|
||||||
|
- : CODE_FOR_altivec_vmrghw_direct_v4si,
|
||||||
|
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be
|
||||||
|
+ : CODE_FOR_altivec_vmrghw_direct_v4si_le,
|
||||||
|
{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
|
||||||
|
{OPTION_MASK_P8_VECTOR,
|
||||||
|
BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
|
||||||
|
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
|
||||||
|
index e16f893c073..226a1049917 100644
|
||||||
|
--- a/gcc/config/rs6000/vsx.md
|
||||||
|
+++ b/gcc/config/rs6000/vsx.md
|
||||||
|
@@ -4694,12 +4694,14 @@ (define_expand "vsx_xxmrghw_<mode>"
|
||||||
|
(const_int 1) (const_int 5)])))]
|
||||||
|
"VECTOR_MEM_VSX_P (<MODE>mode)"
|
||||||
|
{
|
||||||
|
- rtx (*fun) (rtx, rtx, rtx);
|
||||||
|
- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_<mode>
|
||||||
|
- : gen_altivec_vmrglw_direct_<mode>;
|
||||||
|
- if (!BYTES_BIG_ENDIAN)
|
||||||
|
- std::swap (operands[1], operands[2]);
|
||||||
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
||||||
|
+ if (BYTES_BIG_ENDIAN)
|
||||||
|
+ emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0],
|
||||||
|
+ operands[1],
|
||||||
|
+ operands[2]));
|
||||||
|
+ else
|
||||||
|
+ emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0],
|
||||||
|
+ operands[2],
|
||||||
|
+ operands[1]));
|
||||||
|
DONE;
|
||||||
|
}
|
||||||
|
[(set_attr "type" "vecperm")])
|
||||||
|
@@ -4714,12 +4716,14 @@ (define_expand "vsx_xxmrglw_<mode>"
|
||||||
|
(const_int 3) (const_int 7)])))]
|
||||||
|
"VECTOR_MEM_VSX_P (<MODE>mode)"
|
||||||
|
{
|
||||||
|
- rtx (*fun) (rtx, rtx, rtx);
|
||||||
|
- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_<mode>
|
||||||
|
- : gen_altivec_vmrghw_direct_<mode>;
|
||||||
|
- if (!BYTES_BIG_ENDIAN)
|
||||||
|
- std::swap (operands[1], operands[2]);
|
||||||
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
||||||
|
+ if (BYTES_BIG_ENDIAN)
|
||||||
|
+ emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0],
|
||||||
|
+ operands[1],
|
||||||
|
+ operands[2]));
|
||||||
|
+ else
|
||||||
|
+ emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0],
|
||||||
|
+ operands[2],
|
||||||
|
+ operands[1]));
|
||||||
|
DONE;
|
||||||
|
}
|
||||||
|
[(set_attr "type" "vecperm")])
|
||||||
|
diff --git a/gcc/testsuite/g++.target/powerpc/pr106069.C b/gcc/testsuite/g++.target/powerpc/pr106069.C
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..537207d2fe8
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/g++.target/powerpc/pr106069.C
|
||||||
|
@@ -0,0 +1,119 @@
|
||||||
|
+/* { dg-options "-O -fno-tree-forwprop -maltivec" } */
|
||||||
|
+/* { dg-require-effective-target vmx_hw } */
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+
|
||||||
|
+typedef __attribute__ ((altivec (vector__))) unsigned native_simd_type;
|
||||||
|
+
|
||||||
|
+union
|
||||||
|
+{
|
||||||
|
+ native_simd_type V;
|
||||||
|
+ int R[4];
|
||||||
|
+} store_le_vec;
|
||||||
|
+
|
||||||
|
+struct S
|
||||||
|
+{
|
||||||
|
+ S () = default;
|
||||||
|
+ S (unsigned B0)
|
||||||
|
+ {
|
||||||
|
+ native_simd_type val{B0};
|
||||||
|
+ m_simd = val;
|
||||||
|
+ }
|
||||||
|
+ void store_le (unsigned int out[])
|
||||||
|
+ {
|
||||||
|
+ store_le_vec.V = m_simd;
|
||||||
|
+ unsigned int x0 = store_le_vec.R[0];
|
||||||
|
+ __builtin_memcpy (out, &x0, 4);
|
||||||
|
+ }
|
||||||
|
+ S rotl (unsigned int r)
|
||||||
|
+ {
|
||||||
|
+ native_simd_type rot{r};
|
||||||
|
+ return __builtin_vec_rl (m_simd, rot);
|
||||||
|
+ }
|
||||||
|
+ void operator+= (S other)
|
||||||
|
+ {
|
||||||
|
+ m_simd = __builtin_vec_add (m_simd, other.m_simd);
|
||||||
|
+ }
|
||||||
|
+ void operator^= (S other)
|
||||||
|
+ {
|
||||||
|
+ m_simd = __builtin_vec_xor (m_simd, other.m_simd);
|
||||||
|
+ }
|
||||||
|
+ static void transpose (S &B0, S B1, S B2, S B3)
|
||||||
|
+ {
|
||||||
|
+ native_simd_type T0 = __builtin_vec_mergeh (B0.m_simd, B2.m_simd);
|
||||||
|
+ native_simd_type T1 = __builtin_vec_mergeh (B1.m_simd, B3.m_simd);
|
||||||
|
+ native_simd_type T2 = __builtin_vec_mergel (B0.m_simd, B2.m_simd);
|
||||||
|
+ native_simd_type T3 = __builtin_vec_mergel (B1.m_simd, B3.m_simd);
|
||||||
|
+ B0 = __builtin_vec_mergeh (T0, T1);
|
||||||
|
+ B3 = __builtin_vec_mergel (T2, T3);
|
||||||
|
+ }
|
||||||
|
+ S (native_simd_type x) : m_simd (x) {}
|
||||||
|
+ native_simd_type m_simd;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+foo (unsigned int output[], unsigned state[])
|
||||||
|
+{
|
||||||
|
+ S R00 = state[0];
|
||||||
|
+ S R01 = state[0];
|
||||||
|
+ S R02 = state[2];
|
||||||
|
+ S R03 = state[0];
|
||||||
|
+ S R05 = state[5];
|
||||||
|
+ S R06 = state[6];
|
||||||
|
+ S R07 = state[7];
|
||||||
|
+ S R08 = state[8];
|
||||||
|
+ S R09 = state[9];
|
||||||
|
+ S R10 = state[10];
|
||||||
|
+ S R11 = state[11];
|
||||||
|
+ S R12 = state[12];
|
||||||
|
+ S R13 = state[13];
|
||||||
|
+ S R14 = state[4];
|
||||||
|
+ S R15 = state[15];
|
||||||
|
+ for (int r = 0; r != 10; ++r)
|
||||||
|
+ {
|
||||||
|
+ R09 += R13;
|
||||||
|
+ R11 += R15;
|
||||||
|
+ R05 ^= R09;
|
||||||
|
+ R06 ^= R10;
|
||||||
|
+ R07 ^= R11;
|
||||||
|
+ R07 = R07.rotl (7);
|
||||||
|
+ R00 += R05;
|
||||||
|
+ R01 += R06;
|
||||||
|
+ R02 += R07;
|
||||||
|
+ R15 ^= R00;
|
||||||
|
+ R12 ^= R01;
|
||||||
|
+ R13 ^= R02;
|
||||||
|
+ R00 += R05;
|
||||||
|
+ R01 += R06;
|
||||||
|
+ R02 += R07;
|
||||||
|
+ R15 ^= R00;
|
||||||
|
+ R12 = R12.rotl (8);
|
||||||
|
+ R13 = R13.rotl (8);
|
||||||
|
+ R10 += R15;
|
||||||
|
+ R11 += R12;
|
||||||
|
+ R08 += R13;
|
||||||
|
+ R09 += R14;
|
||||||
|
+ R05 ^= R10;
|
||||||
|
+ R06 ^= R11;
|
||||||
|
+ R07 ^= R08;
|
||||||
|
+ R05 = R05.rotl (7);
|
||||||
|
+ R06 = R06.rotl (7);
|
||||||
|
+ R07 = R07.rotl (7);
|
||||||
|
+ }
|
||||||
|
+ R00 += state[0];
|
||||||
|
+ S::transpose (R00, R01, R02, R03);
|
||||||
|
+ R00.store_le (output);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned int res[1];
|
||||||
|
+unsigned main_state[]{1634760805, 60878, 2036477234, 6,
|
||||||
|
+ 0, 825562964, 1471091955, 1346092787,
|
||||||
|
+ 506976774, 4197066702, 518848283, 118491664,
|
||||||
|
+ 0, 0, 0, 0};
|
||||||
|
+int
|
||||||
|
+main ()
|
||||||
|
+{
|
||||||
|
+ foo (res, main_state);
|
||||||
|
+ if (res[0] != 0x41fcef98)
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/powerpc/pr115355.c b/gcc/testsuite/gcc.target/powerpc/pr115355.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..8955126b808
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/powerpc/pr115355.c
|
||||||
|
@@ -0,0 +1,37 @@
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-require-effective-target p9vector_hw } */
|
||||||
|
+/* Force vectorization with -fno-vect-cost-model to have vector unpack
|
||||||
|
+ which exposes the issue in PR115355. */
|
||||||
|
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -fno-vect-cost-model" } */
|
||||||
|
+
|
||||||
|
+/* Verify it runs successfully. */
|
||||||
|
+
|
||||||
|
+__attribute__((noipa))
|
||||||
|
+void setToIdentityGOOD(unsigned long long *mVec, unsigned int mLen)
|
||||||
|
+{
|
||||||
|
+ #pragma GCC novector
|
||||||
|
+ for (unsigned int i = 0; i < mLen; i++)
|
||||||
|
+ mVec[i] = i;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+__attribute__((noipa))
|
||||||
|
+void setToIdentityBAD(unsigned long long *mVec, unsigned int mLen)
|
||||||
|
+{
|
||||||
|
+ for (unsigned int i = 0; i < mLen; i++)
|
||||||
|
+ mVec[i] = i;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned long long vec1[100];
|
||||||
|
+unsigned long long vec2[100];
|
||||||
|
+
|
||||||
|
+int main()
|
||||||
|
+{
|
||||||
|
+ unsigned int l = 29;
|
||||||
|
+ setToIdentityGOOD (vec1, 29);
|
||||||
|
+ setToIdentityBAD (vec2, 29);
|
||||||
|
+
|
||||||
|
+ if (__builtin_memcmp (vec1, vec2, l * sizeof (vec1[0])) != 0)
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
240
SOURCES/gcc12-vector-merge-2.patch
Normal file
240
SOURCES/gcc12-vector-merge-2.patch
Normal file
@ -0,0 +1,240 @@
|
|||||||
|
commit 13f0528c782c3732052973a5d340769af8182c8f
|
||||||
|
Author: Kewen Lin <linkw@linux.ibm.com>
|
||||||
|
Date: Wed Jun 26 02:16:17 2024 -0500
|
||||||
|
|
||||||
|
rs6000: Fix wrong RTL patterns for vector merge high/low char on LE
|
||||||
|
|
||||||
|
Commit r12-4496 changes some define_expands and define_insns
|
||||||
|
for vector merge high/low char, which are altivec_vmrg[hl]b.
|
||||||
|
These defines are mainly for built-in function vec_merge{h,l}
|
||||||
|
and some internal gen function needs. These functions should
|
||||||
|
consider endianness, taking vec_mergeh as example, as PVIPR
|
||||||
|
defines, vec_mergeh "Merges the first halves (in element order)
|
||||||
|
of two vectors", it does note it's in element order. So it's
|
||||||
|
mapped into vmrghb on BE while vmrglb on LE respectively.
|
||||||
|
Although the mapped insns are different, as the discussion in
|
||||||
|
PR106069, the RTL pattern should be still the same, it is
|
||||||
|
conformed before commit r12-4496, but gets changed into
|
||||||
|
different patterns on BE and LE starting from commit r12-4496.
|
||||||
|
Similar to 32-bit element case in commit log of r15-1504, this
|
||||||
|
8-bit element pattern on LE doesn't actually match what the
|
||||||
|
underlying insn is intended to represent, once some optimization
|
||||||
|
like combine does some changes basing on it, it would cause
|
||||||
|
the unexpected consequence. The newly constructed test case
|
||||||
|
pr106069-1.c is a typical example for this issue.
|
||||||
|
|
||||||
|
So this patch is to fix the wrong RTL pattern, ensure the
|
||||||
|
associated RTL patterns become the same as before which can
|
||||||
|
have the same semantic as their mapped insns. With the
|
||||||
|
proposed patch, the expanders like altivec_vmrghb expands
|
||||||
|
into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le
|
||||||
|
depending on endianness, "direct" can easily show which
|
||||||
|
insn would be generated, _be and _le are mainly for the
|
||||||
|
different RTL patterns as endianness.
|
||||||
|
|
||||||
|
Co-authored-by: Xionghu Luo <xionghuluo@tencent.com>
|
||||||
|
|
||||||
|
PR target/106069
|
||||||
|
PR target/115355
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/rs6000/altivec.md (altivec_vmrghb_direct): Rename to ...
|
||||||
|
(altivec_vmrghb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN.
|
||||||
|
(altivec_vmrghb_direct_le): New define_insn.
|
||||||
|
(altivec_vmrglb_direct): Rename to ...
|
||||||
|
(altivec_vmrglb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN.
|
||||||
|
(altivec_vmrglb_direct_le): New define_insn.
|
||||||
|
(altivec_vmrghb): Adjust by calling gen_altivec_vmrghb_direct_be
|
||||||
|
for BE and gen_altivec_vmrglb_direct_le for LE.
|
||||||
|
(altivec_vmrglb): Adjust by calling gen_altivec_vmrglb_direct_be
|
||||||
|
for BE and gen_altivec_vmrghb_direct_le for LE.
|
||||||
|
* config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace
|
||||||
|
CODE_FOR_altivec_vmrghb_direct by
|
||||||
|
CODE_FOR_altivec_vmrghb_direct_be for BE and
|
||||||
|
CODE_FOR_altivec_vmrghb_direct_le for LE. And replace
|
||||||
|
CODE_FOR_altivec_vmrglb_direct by
|
||||||
|
CODE_FOR_altivec_vmrglb_direct_be for BE and
|
||||||
|
CODE_FOR_altivec_vmrglb_direct_le for LE.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/powerpc/pr106069-1.c: New test.
|
||||||
|
|
||||||
|
(cherry picked from commit 62520e4e9f7e2fe8a16ee57a4bd35da2e921ae22)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
|
||||||
|
index 0c408a9e839..b8baae679c4 100644
|
||||||
|
--- a/gcc/config/rs6000/altivec.md
|
||||||
|
+++ b/gcc/config/rs6000/altivec.md
|
||||||
|
@@ -1152,15 +1152,16 @@ (define_expand "altivec_vmrghb"
|
||||||
|
(use (match_operand:V16QI 2 "register_operand"))]
|
||||||
|
"TARGET_ALTIVEC"
|
||||||
|
{
|
||||||
|
- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghb_direct
|
||||||
|
- : gen_altivec_vmrglb_direct;
|
||||||
|
- if (!BYTES_BIG_ENDIAN)
|
||||||
|
- std::swap (operands[1], operands[2]);
|
||||||
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
||||||
|
+ if (BYTES_BIG_ENDIAN)
|
||||||
|
+ emit_insn (
|
||||||
|
+ gen_altivec_vmrghb_direct_be (operands[0], operands[1], operands[2]));
|
||||||
|
+ else
|
||||||
|
+ emit_insn (
|
||||||
|
+ gen_altivec_vmrglb_direct_le (operands[0], operands[2], operands[1]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
-(define_insn "altivec_vmrghb_direct"
|
||||||
|
+(define_insn "altivec_vmrghb_direct_be"
|
||||||
|
[(set (match_operand:V16QI 0 "register_operand" "=v")
|
||||||
|
(vec_select:V16QI
|
||||||
|
(vec_concat:V32QI
|
||||||
|
@@ -1174,7 +1175,25 @@ (define_insn "altivec_vmrghb_direct"
|
||||||
|
(const_int 5) (const_int 21)
|
||||||
|
(const_int 6) (const_int 22)
|
||||||
|
(const_int 7) (const_int 23)])))]
|
||||||
|
- "TARGET_ALTIVEC"
|
||||||
|
+ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
|
||||||
|
+ "vmrghb %0,%1,%2"
|
||||||
|
+ [(set_attr "type" "vecperm")])
|
||||||
|
+
|
||||||
|
+(define_insn "altivec_vmrghb_direct_le"
|
||||||
|
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
|
||||||
|
+ (vec_select:V16QI
|
||||||
|
+ (vec_concat:V32QI
|
||||||
|
+ (match_operand:V16QI 2 "register_operand" "v")
|
||||||
|
+ (match_operand:V16QI 1 "register_operand" "v"))
|
||||||
|
+ (parallel [(const_int 8) (const_int 24)
|
||||||
|
+ (const_int 9) (const_int 25)
|
||||||
|
+ (const_int 10) (const_int 26)
|
||||||
|
+ (const_int 11) (const_int 27)
|
||||||
|
+ (const_int 12) (const_int 28)
|
||||||
|
+ (const_int 13) (const_int 29)
|
||||||
|
+ (const_int 14) (const_int 30)
|
||||||
|
+ (const_int 15) (const_int 31)])))]
|
||||||
|
+ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
|
||||||
|
"vmrghb %0,%1,%2"
|
||||||
|
[(set_attr "type" "vecperm")])
|
||||||
|
|
||||||
|
@@ -1274,15 +1293,16 @@ (define_expand "altivec_vmrglb"
|
||||||
|
(use (match_operand:V16QI 2 "register_operand"))]
|
||||||
|
"TARGET_ALTIVEC"
|
||||||
|
{
|
||||||
|
- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglb_direct
|
||||||
|
- : gen_altivec_vmrghb_direct;
|
||||||
|
- if (!BYTES_BIG_ENDIAN)
|
||||||
|
- std::swap (operands[1], operands[2]);
|
||||||
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
||||||
|
+ if (BYTES_BIG_ENDIAN)
|
||||||
|
+ emit_insn (
|
||||||
|
+ gen_altivec_vmrglb_direct_be (operands[0], operands[1], operands[2]));
|
||||||
|
+ else
|
||||||
|
+ emit_insn (
|
||||||
|
+ gen_altivec_vmrghb_direct_le (operands[0], operands[2], operands[1]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
-(define_insn "altivec_vmrglb_direct"
|
||||||
|
+(define_insn "altivec_vmrglb_direct_be"
|
||||||
|
[(set (match_operand:V16QI 0 "register_operand" "=v")
|
||||||
|
(vec_select:V16QI
|
||||||
|
(vec_concat:V32QI
|
||||||
|
@@ -1296,7 +1316,25 @@ (define_insn "altivec_vmrglb_direct"
|
||||||
|
(const_int 13) (const_int 29)
|
||||||
|
(const_int 14) (const_int 30)
|
||||||
|
(const_int 15) (const_int 31)])))]
|
||||||
|
- "TARGET_ALTIVEC"
|
||||||
|
+ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
|
||||||
|
+ "vmrglb %0,%1,%2"
|
||||||
|
+ [(set_attr "type" "vecperm")])
|
||||||
|
+
|
||||||
|
+(define_insn "altivec_vmrglb_direct_le"
|
||||||
|
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
|
||||||
|
+ (vec_select:V16QI
|
||||||
|
+ (vec_concat:V32QI
|
||||||
|
+ (match_operand:V16QI 2 "register_operand" "v")
|
||||||
|
+ (match_operand:V16QI 1 "register_operand" "v"))
|
||||||
|
+ (parallel [(const_int 0) (const_int 16)
|
||||||
|
+ (const_int 1) (const_int 17)
|
||||||
|
+ (const_int 2) (const_int 18)
|
||||||
|
+ (const_int 3) (const_int 19)
|
||||||
|
+ (const_int 4) (const_int 20)
|
||||||
|
+ (const_int 5) (const_int 21)
|
||||||
|
+ (const_int 6) (const_int 22)
|
||||||
|
+ (const_int 7) (const_int 23)])))]
|
||||||
|
+ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
|
||||||
|
"vmrglb %0,%1,%2"
|
||||||
|
[(set_attr "type" "vecperm")])
|
||||||
|
|
||||||
|
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
|
||||||
|
index 23b553131a9..e8ce629182b 100644
|
||||||
|
--- a/gcc/config/rs6000/rs6000.cc
|
||||||
|
+++ b/gcc/config/rs6000/rs6000.cc
|
||||||
|
@@ -22971,8 +22971,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
|
||||||
|
CODE_FOR_altivec_vpkuwum_direct,
|
||||||
|
{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
|
||||||
|
{OPTION_MASK_ALTIVEC,
|
||||||
|
- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
|
||||||
|
- : CODE_FOR_altivec_vmrglb_direct,
|
||||||
|
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct_be
|
||||||
|
+ : CODE_FOR_altivec_vmrglb_direct_le,
|
||||||
|
{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
|
||||||
|
{OPTION_MASK_ALTIVEC,
|
||||||
|
BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
|
||||||
|
@@ -22983,8 +22983,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
|
||||||
|
: CODE_FOR_altivec_vmrglw_direct_v4si_le,
|
||||||
|
{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
|
||||||
|
{OPTION_MASK_ALTIVEC,
|
||||||
|
- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
|
||||||
|
- : CODE_FOR_altivec_vmrghb_direct,
|
||||||
|
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct_be
|
||||||
|
+ : CODE_FOR_altivec_vmrghb_direct_le,
|
||||||
|
{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
|
||||||
|
{OPTION_MASK_ALTIVEC,
|
||||||
|
BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-1.c b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..4945d8fedfb
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c
|
||||||
|
@@ -0,0 +1,39 @@
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-options "-O2" } */
|
||||||
|
+/* { dg-require-effective-target vmx_hw } */
|
||||||
|
+
|
||||||
|
+/* Test vector merge for 8-bit element size,
|
||||||
|
+ it will abort if the RTL pattern isn't expected. */
|
||||||
|
+
|
||||||
|
+#include "altivec.h"
|
||||||
|
+
|
||||||
|
+__attribute__((noipa))
|
||||||
|
+signed char elem_6 (vector signed char a, vector signed char b)
|
||||||
|
+{
|
||||||
|
+ vector signed char c = vec_mergeh (a,b);
|
||||||
|
+ return vec_extract (c, 6);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+__attribute__((noipa))
|
||||||
|
+unsigned char elem_15 (vector unsigned char a, vector unsigned char b)
|
||||||
|
+{
|
||||||
|
+ vector unsigned char c = vec_mergel (a,b);
|
||||||
|
+ return vec_extract (c, 15);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main ()
|
||||||
|
+{
|
||||||
|
+ vector unsigned char v1
|
||||||
|
+ = {3, 33, 22, 12, 34, 14, 5, 25, 30, 11, 0, 21, 17, 27, 38, 8};
|
||||||
|
+ vector unsigned char v2
|
||||||
|
+ = {81, 82, 83, 84, 68, 67, 66, 65, 99, 100, 101, 102, 250, 125, 0, 6};
|
||||||
|
+ signed char x1 = elem_6 ((vector signed char) v1, (vector signed char) v2);
|
||||||
|
+ unsigned char x2 = elem_15 (v1, v2);
|
||||||
|
+
|
||||||
|
+ if (x1 != 12 || x2 != 6)
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
306
SOURCES/gcc12-vector-merge-3.patch
Normal file
306
SOURCES/gcc12-vector-merge-3.patch
Normal file
@ -0,0 +1,306 @@
|
|||||||
|
commit ca6eea0eb33de8b2e23e0bef3466575bb14ab63f
|
||||||
|
Author: Kewen Lin <linkw@linux.ibm.com>
|
||||||
|
Date: Wed Jun 26 02:16:17 2024 -0500
|
||||||
|
|
||||||
|
rs6000: Fix wrong RTL patterns for vector merge high/low short on LE
|
||||||
|
|
||||||
|
Commit r12-4496 changes some define_expands and define_insns
|
||||||
|
for vector merge high/low short, which are altivec_vmrg[hl]h.
|
||||||
|
These defines are mainly for built-in function vec_merge{h,l}
|
||||||
|
and some internal gen function needs. These functions should
|
||||||
|
consider endianness, taking vec_mergeh as example, as PVIPR
|
||||||
|
defines, vec_mergeh "Merges the first halves (in element order)
|
||||||
|
of two vectors", it does note it's in element order. So it's
|
||||||
|
mapped into vmrghh on BE while vmrglh on LE respectively.
|
||||||
|
Although the mapped insns are different, as the discussion in
|
||||||
|
PR106069, the RTL pattern should be still the same, it is
|
||||||
|
conformed before commit r12-4496, but gets changed into
|
||||||
|
different patterns on BE and LE starting from commit r12-4496.
|
||||||
|
Similar to 32-bit element case in commit log of r15-1504, this
|
||||||
|
16-bit element pattern on LE doesn't actually match what the
|
||||||
|
underlying insn is intended to represent, once some optimization
|
||||||
|
like combine does some changes basing on it, it would cause
|
||||||
|
the unexpected consequence. The newly constructed test case
|
||||||
|
pr106069-2.c is a typical example for this issue on element type
|
||||||
|
short.
|
||||||
|
|
||||||
|
So this patch is to fix the wrong RTL pattern, ensure the
|
||||||
|
associated RTL patterns become the same as before which can
|
||||||
|
have the same semantic as their mapped insns. With the
|
||||||
|
proposed patch, the expanders like altivec_vmrghh expands
|
||||||
|
into altivec_vmrghh_direct_be or altivec_vmrglh_direct_le
|
||||||
|
depending on endianness, "direct" can easily show which
|
||||||
|
insn would be generated, _be and _le are mainly for the
|
||||||
|
different RTL patterns as endianness.
|
||||||
|
|
||||||
|
Co-authored-by: Xionghu Luo <xionghuluo@tencent.com>
|
||||||
|
|
||||||
|
PR target/106069
|
||||||
|
PR target/115355
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/rs6000/altivec.md (altivec_vmrghh_direct): Rename to ...
|
||||||
|
(altivec_vmrghh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN.
|
||||||
|
(altivec_vmrghh_direct_le): New define_insn.
|
||||||
|
(altivec_vmrglh_direct): Rename to ...
|
||||||
|
(altivec_vmrglh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN.
|
||||||
|
(altivec_vmrglh_direct_le): New define_insn.
|
||||||
|
(altivec_vmrghh): Adjust by calling gen_altivec_vmrghh_direct_be
|
||||||
|
for BE and gen_altivec_vmrglh_direct_le for LE.
|
||||||
|
(altivec_vmrglh): Adjust by calling gen_altivec_vmrglh_direct_be
|
||||||
|
for BE and gen_altivec_vmrghh_direct_le for LE.
|
||||||
|
(vec_widen_umult_hi_v16qi): Adjust the call to
|
||||||
|
gen_altivec_vmrghh_direct by gen_altivec_vmrghh for BE
|
||||||
|
and by gen_altivec_vmrglh for LE.
|
||||||
|
(vec_widen_smult_hi_v16qi): Likewise.
|
||||||
|
(vec_widen_umult_lo_v16qi): Adjust the call to
|
||||||
|
gen_altivec_vmrglh_direct by gen_altivec_vmrglh for BE
|
||||||
|
and by gen_altivec_vmrghh for LE.
|
||||||
|
(vec_widen_smult_lo_v16qi): Likewise.
|
||||||
|
* config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace
|
||||||
|
CODE_FOR_altivec_vmrghh_direct by
|
||||||
|
CODE_FOR_altivec_vmrghh_direct_be for BE and
|
||||||
|
CODE_FOR_altivec_vmrghh_direct_le for LE. And replace
|
||||||
|
CODE_FOR_altivec_vmrglh_direct by
|
||||||
|
CODE_FOR_altivec_vmrglh_direct_be for BE and
|
||||||
|
CODE_FOR_altivec_vmrglh_direct_le for LE.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/powerpc/pr106069-2.c: New test.
|
||||||
|
|
||||||
|
(cherry picked from commit 812c70bf4981958488331d4ea5af8709b5321da1)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
|
||||||
|
index b8baae679c4..50689e418ed 100644
|
||||||
|
--- a/gcc/config/rs6000/altivec.md
|
||||||
|
+++ b/gcc/config/rs6000/altivec.md
|
||||||
|
@@ -1203,17 +1203,18 @@ (define_expand "altivec_vmrghh"
|
||||||
|
(use (match_operand:V8HI 2 "register_operand"))]
|
||||||
|
"TARGET_ALTIVEC"
|
||||||
|
{
|
||||||
|
- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghh_direct
|
||||||
|
- : gen_altivec_vmrglh_direct;
|
||||||
|
- if (!BYTES_BIG_ENDIAN)
|
||||||
|
- std::swap (operands[1], operands[2]);
|
||||||
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
||||||
|
+ if (BYTES_BIG_ENDIAN)
|
||||||
|
+ emit_insn (
|
||||||
|
+ gen_altivec_vmrghh_direct_be (operands[0], operands[1], operands[2]));
|
||||||
|
+ else
|
||||||
|
+ emit_insn (
|
||||||
|
+ gen_altivec_vmrglh_direct_le (operands[0], operands[2], operands[1]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
-(define_insn "altivec_vmrghh_direct"
|
||||||
|
+(define_insn "altivec_vmrghh_direct_be"
|
||||||
|
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||||
|
- (vec_select:V8HI
|
||||||
|
+ (vec_select:V8HI
|
||||||
|
(vec_concat:V16HI
|
||||||
|
(match_operand:V8HI 1 "register_operand" "v")
|
||||||
|
(match_operand:V8HI 2 "register_operand" "v"))
|
||||||
|
@@ -1221,7 +1222,21 @@ (define_insn "altivec_vmrghh_direct"
|
||||||
|
(const_int 1) (const_int 9)
|
||||||
|
(const_int 2) (const_int 10)
|
||||||
|
(const_int 3) (const_int 11)])))]
|
||||||
|
- "TARGET_ALTIVEC"
|
||||||
|
+ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
|
||||||
|
+ "vmrghh %0,%1,%2"
|
||||||
|
+ [(set_attr "type" "vecperm")])
|
||||||
|
+
|
||||||
|
+(define_insn "altivec_vmrghh_direct_le"
|
||||||
|
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||||
|
+ (vec_select:V8HI
|
||||||
|
+ (vec_concat:V16HI
|
||||||
|
+ (match_operand:V8HI 2 "register_operand" "v")
|
||||||
|
+ (match_operand:V8HI 1 "register_operand" "v"))
|
||||||
|
+ (parallel [(const_int 4) (const_int 12)
|
||||||
|
+ (const_int 5) (const_int 13)
|
||||||
|
+ (const_int 6) (const_int 14)
|
||||||
|
+ (const_int 7) (const_int 15)])))]
|
||||||
|
+ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
|
||||||
|
"vmrghh %0,%1,%2"
|
||||||
|
[(set_attr "type" "vecperm")])
|
||||||
|
|
||||||
|
@@ -1344,15 +1359,16 @@ (define_expand "altivec_vmrglh"
|
||||||
|
(use (match_operand:V8HI 2 "register_operand"))]
|
||||||
|
"TARGET_ALTIVEC"
|
||||||
|
{
|
||||||
|
- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglh_direct
|
||||||
|
- : gen_altivec_vmrghh_direct;
|
||||||
|
- if (!BYTES_BIG_ENDIAN)
|
||||||
|
- std::swap (operands[1], operands[2]);
|
||||||
|
- emit_insn (fun (operands[0], operands[1], operands[2]));
|
||||||
|
+ if (BYTES_BIG_ENDIAN)
|
||||||
|
+ emit_insn (
|
||||||
|
+ gen_altivec_vmrglh_direct_be (operands[0], operands[1], operands[2]));
|
||||||
|
+ else
|
||||||
|
+ emit_insn (
|
||||||
|
+ gen_altivec_vmrghh_direct_le (operands[0], operands[2], operands[1]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
-(define_insn "altivec_vmrglh_direct"
|
||||||
|
+(define_insn "altivec_vmrglh_direct_be"
|
||||||
|
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||||
|
(vec_select:V8HI
|
||||||
|
(vec_concat:V16HI
|
||||||
|
@@ -1362,7 +1378,21 @@ (define_insn "altivec_vmrglh_direct"
|
||||||
|
(const_int 5) (const_int 13)
|
||||||
|
(const_int 6) (const_int 14)
|
||||||
|
(const_int 7) (const_int 15)])))]
|
||||||
|
- "TARGET_ALTIVEC"
|
||||||
|
+ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN"
|
||||||
|
+ "vmrglh %0,%1,%2"
|
||||||
|
+ [(set_attr "type" "vecperm")])
|
||||||
|
+
|
||||||
|
+(define_insn "altivec_vmrglh_direct_le"
|
||||||
|
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||||
|
+ (vec_select:V8HI
|
||||||
|
+ (vec_concat:V16HI
|
||||||
|
+ (match_operand:V8HI 2 "register_operand" "v")
|
||||||
|
+ (match_operand:V8HI 1 "register_operand" "v"))
|
||||||
|
+ (parallel [(const_int 0) (const_int 8)
|
||||||
|
+ (const_int 1) (const_int 9)
|
||||||
|
+ (const_int 2) (const_int 10)
|
||||||
|
+ (const_int 3) (const_int 11)])))]
|
||||||
|
+ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN"
|
||||||
|
"vmrglh %0,%1,%2"
|
||||||
|
[(set_attr "type" "vecperm")])
|
||||||
|
|
||||||
|
@@ -3777,13 +3807,13 @@ (define_expand "vec_widen_umult_hi_v16qi"
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
|
||||||
|
+ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
|
||||||
|
+ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
@@ -3802,13 +3832,13 @@ (define_expand "vec_widen_umult_lo_v16qi"
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
|
||||||
|
+ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
|
||||||
|
+ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
@@ -3827,13 +3857,13 @@ (define_expand "vec_widen_smult_hi_v16qi"
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
|
||||||
|
+ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
|
||||||
|
+ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
@@ -3852,13 +3882,13 @@ (define_expand "vec_widen_smult_lo_v16qi"
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
|
||||||
|
+ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
|
||||||
|
emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
|
||||||
|
+ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
|
||||||
|
}
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
|
||||||
|
index e8ce629182b..34be43c9f84 100644
|
||||||
|
--- a/gcc/config/rs6000/rs6000.cc
|
||||||
|
+++ b/gcc/config/rs6000/rs6000.cc
|
||||||
|
@@ -22975,8 +22975,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
|
||||||
|
: CODE_FOR_altivec_vmrglb_direct_le,
|
||||||
|
{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
|
||||||
|
{OPTION_MASK_ALTIVEC,
|
||||||
|
- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
|
||||||
|
- : CODE_FOR_altivec_vmrglh_direct,
|
||||||
|
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct_be
|
||||||
|
+ : CODE_FOR_altivec_vmrglh_direct_le,
|
||||||
|
{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
|
||||||
|
{OPTION_MASK_ALTIVEC,
|
||||||
|
BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be
|
||||||
|
@@ -22987,8 +22987,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
|
||||||
|
: CODE_FOR_altivec_vmrghb_direct_le,
|
||||||
|
{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
|
||||||
|
{OPTION_MASK_ALTIVEC,
|
||||||
|
- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
|
||||||
|
- : CODE_FOR_altivec_vmrghh_direct,
|
||||||
|
+ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct_be
|
||||||
|
+ : CODE_FOR_altivec_vmrghh_direct_le,
|
||||||
|
{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
|
||||||
|
{OPTION_MASK_ALTIVEC,
|
||||||
|
BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-2.c b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..283e3290fb3
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c
|
||||||
|
@@ -0,0 +1,37 @@
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-options "-O2" } */
|
||||||
|
+/* { dg-require-effective-target vmx_hw } */
|
||||||
|
+
|
||||||
|
+/* Test vector merge for 16-bit element size,
|
||||||
|
+ it will abort if the RTL pattern isn't expected. */
|
||||||
|
+
|
||||||
|
+#include "altivec.h"
|
||||||
|
+
|
||||||
|
+__attribute__((noipa))
|
||||||
|
+signed short elem_2 (vector signed short a, vector signed short b)
|
||||||
|
+{
|
||||||
|
+ vector signed short c = vec_mergeh (a,b);
|
||||||
|
+ return vec_extract (c, 2);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+__attribute__((noipa))
|
||||||
|
+unsigned short elem_7 (vector unsigned short a, vector unsigned short b)
|
||||||
|
+{
|
||||||
|
+ vector unsigned short c = vec_mergel (a,b);
|
||||||
|
+ return vec_extract (c, 7);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main ()
|
||||||
|
+{
|
||||||
|
+ vector unsigned short v1 = {3, 22, 12, 34, 5, 25, 30, 11};
|
||||||
|
+ vector unsigned short v2 = {84, 168, 267, 966, 65, 399, 999, 99};
|
||||||
|
+ signed short x1 = elem_2 ((vector signed short) v1, (vector signed short) v2);
|
||||||
|
+ unsigned short x2 = elem_7 (v1, v2);
|
||||||
|
+
|
||||||
|
+ if (x1 != 22 || x2 != 99)
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
@ -1,3 +1,6 @@
|
|||||||
|
%{?scl_package:%global scl gcc-toolset-12}
|
||||||
|
%global scl_prefix gcc-toolset-12-
|
||||||
|
BuildRequires: scl-utils-build
|
||||||
%global __python /usr/bin/python3
|
%global __python /usr/bin/python3
|
||||||
%{?scl:%global __strip %%{_scl_root}/usr/bin/strip}
|
%{?scl:%global __strip %%{_scl_root}/usr/bin/strip}
|
||||||
%{?scl:%global __objdump %%{_scl_root}/usr/bin/objdump}
|
%{?scl:%global __objdump %%{_scl_root}/usr/bin/objdump}
|
||||||
@ -147,7 +150,7 @@
|
|||||||
Summary: GCC version 12
|
Summary: GCC version 12
|
||||||
Name: %{?scl_prefix}gcc
|
Name: %{?scl_prefix}gcc
|
||||||
Version: %{gcc_version}
|
Version: %{gcc_version}
|
||||||
Release: %{gcc_release}.4%{?dist}
|
Release: %{gcc_release}.8%{?dist}
|
||||||
# libgcc, libgfortran, libgomp, libstdc++ and crtstuff have
|
# libgcc, libgfortran, libgomp, libstdc++ and crtstuff have
|
||||||
# GCC Runtime Exception.
|
# GCC Runtime Exception.
|
||||||
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
||||||
@ -193,10 +196,8 @@ URL: http://gcc.gnu.org
|
|||||||
# Need binutils which support -plugin
|
# Need binutils which support -plugin
|
||||||
# Need binutils which support .loc view >= 2.30
|
# Need binutils which support .loc view >= 2.30
|
||||||
# Need binutils which support --generate-missing-build-notes=yes >= 2.31
|
# Need binutils which support --generate-missing-build-notes=yes >= 2.31
|
||||||
%if 0%{?scl:1}
|
|
||||||
BuildRequires: %{?scl_prefix}binutils >= 2.31
|
BuildRequires: %{?scl_prefix}binutils >= 2.31
|
||||||
BuildRequires: %{?scl_prefix}gdb >= 7.4.50
|
BuildRequires: %{?scl_prefix}gdb >= 7.4.50
|
||||||
%endif
|
|
||||||
# While gcc doesn't include statically linked binaries, during testing
|
# While gcc doesn't include statically linked binaries, during testing
|
||||||
# -static is used several times.
|
# -static is used several times.
|
||||||
BuildRequires: glibc-static
|
BuildRequires: glibc-static
|
||||||
@ -351,6 +352,10 @@ Patch11: gcc12-d-shared-libphobos.patch
|
|||||||
Patch12: gcc12-pr107468.patch
|
Patch12: gcc12-pr107468.patch
|
||||||
Patch15: gcc12-static-libquadmath.patch
|
Patch15: gcc12-static-libquadmath.patch
|
||||||
Patch16: gcc12-FMA-chains.patch
|
Patch16: gcc12-FMA-chains.patch
|
||||||
|
Patch17: gcc12-pr113960.patch
|
||||||
|
Patch18: gcc12-vector-merge-1.patch
|
||||||
|
Patch19: gcc12-vector-merge-2.patch
|
||||||
|
Patch20: gcc12-vector-merge-3.patch
|
||||||
|
|
||||||
Patch100: gcc12-fortran-fdec-duplicates.patch
|
Patch100: gcc12-fortran-fdec-duplicates.patch
|
||||||
Patch101: gcc12-fortran-flogical-as-integer.patch
|
Patch101: gcc12-fortran-flogical-as-integer.patch
|
||||||
@ -731,6 +736,10 @@ so that there cannot be any synchronization problems.
|
|||||||
%patch12 -p0 -b .pr107468~
|
%patch12 -p0 -b .pr107468~
|
||||||
%patch15 -p0 -b .static-libquadmath~
|
%patch15 -p0 -b .static-libquadmath~
|
||||||
%patch16 -p1 -b .fma~
|
%patch16 -p1 -b .fma~
|
||||||
|
%patch17 -p1 -b .pr113960~
|
||||||
|
%patch18 -p1 -b .vector-merge-1~
|
||||||
|
%patch19 -p1 -b .vector-merge-2~
|
||||||
|
%patch20 -p1 -b .vector-merge-3~
|
||||||
|
|
||||||
%if 0%{?rhel} >= 6
|
%if 0%{?rhel} >= 6
|
||||||
%patch100 -p1 -b .fortran-fdec-duplicates~
|
%patch100 -p1 -b .fortran-fdec-duplicates~
|
||||||
@ -2990,6 +2999,19 @@ fi
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Mon Jul 15 2024 Marek Polacek <polacek@redhat.com> 12.2.1-7.8
|
||||||
|
- bump NVR (RHEL-45189)
|
||||||
|
|
||||||
|
* Fri Jul 12 2024 Marek Polacek <polacek@redhat.com> 12.2.1-7.7
|
||||||
|
- fix wrong RTL patterns for vector merge high/low word on LE (RHEL-45189)
|
||||||
|
|
||||||
|
* Wed Apr 3 2024 Marek Polacek <polacek@redhat.com> 12.2.1-7.6
|
||||||
|
- bump NVR (RHEL-31253)
|
||||||
|
|
||||||
|
* Tue Mar 26 2024 Marek Polacek <polacek@redhat.com> 12.2.1-7.5
|
||||||
|
- fix conditions for using memcmp in
|
||||||
|
std::lexicographical_compare_three_way (PR libstdc++/113960, RHEL-29952)
|
||||||
|
|
||||||
* Fri Feb 10 2023 Marek Polacek <polacek@redhat.com> 12.2.1-7.4
|
* Fri Feb 10 2023 Marek Polacek <polacek@redhat.com> 12.2.1-7.4
|
||||||
- avoid fma_chain for -march=alderlake and sapphirerapids (#2168917)
|
- avoid fma_chain for -march=alderlake and sapphirerapids (#2168917)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user