4.4.0-15
This commit is contained in:
parent
b4b081811c
commit
aefc846ec6
234
gcc44-pr40811.patch
Normal file
234
gcc44-pr40811.patch
Normal file
@ -0,0 +1,234 @@
|
||||
2009-07-21 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/sse.md (vec_unpacku_float_hi_v4si): New expander.
|
||||
(vec_unpacku_float_lo_v4si): Ditto.
|
||||
|
||||
2009-07-21 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/40811
|
||||
* config/i386/sse.md (sse2_cvtudq2ps): New expander.
|
||||
(enum ix86_builtins): Add IX86_BUILTIN_CVTUDQ2PS.
|
||||
(builtin_description): Add __builtin_ia32_cvtudq2ps.
|
||||
(ix86_vectorize_builtin_conversion): Handle IX86_BUILTIN_CVTUDQ2PS.
|
||||
|
||||
2009-07-21 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* gcc.target/i386/vectorize8.c: New test.
|
||||
|
||||
2009-07-21 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/40811
|
||||
* lib/target-supports.exp (check_effective_target_vect_uintfloat_cvt):
|
||||
Add i?86 and x86_64 targets.
|
||||
* gcc.target/i386/vectorize7.c: New test.
|
||||
|
||||
--- gcc/config/i386/sse.md (revision 149860)
|
||||
+++ gcc/config/i386/sse.md (revision 149862)
|
||||
@@ -2420,6 +2420,31 @@ (define_insn "sse2_cvtdq2ps"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
||||
+(define_expand "sse2_cvtudq2ps"
|
||||
+ [(set (match_dup 5)
|
||||
+ (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
|
||||
+ (set (match_dup 6)
|
||||
+ (lt:V4SF (match_dup 5) (match_dup 3)))
|
||||
+ (set (match_dup 7)
|
||||
+ (and:V4SF (match_dup 6) (match_dup 4)))
|
||||
+ (set (match_operand:V4SF 0 "register_operand" "")
|
||||
+ (plus:V4SF (match_dup 5) (match_dup 7)))]
|
||||
+ "TARGET_SSE2"
|
||||
+{
|
||||
+ REAL_VALUE_TYPE TWO32r;
|
||||
+ rtx x;
|
||||
+ int i;
|
||||
+
|
||||
+ real_ldexp (&TWO32r, &dconst1, 32);
|
||||
+ x = const_double_from_real_value (TWO32r, SFmode);
|
||||
+
|
||||
+ operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
|
||||
+ operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
|
||||
+
|
||||
+ for (i = 5; i < 8; i++)
|
||||
+ operands[i] = gen_reg_rtx (V4SFmode);
|
||||
+})
|
||||
+
|
||||
(define_insn "avx_cvtps2dq<avxmodesuffix>"
|
||||
[(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
|
||||
(unspec:AVXMODEDCVTPS2DQ
|
||||
@@ -2945,6 +2970,71 @@ (define_expand "vec_unpacks_float_lo_v4s
|
||||
(parallel [(const_int 0) (const_int 1)]))))]
|
||||
"TARGET_SSE2")
|
||||
|
||||
+(define_expand "vec_unpacku_float_hi_v4si"
|
||||
+ [(set (match_dup 5)
|
||||
+ (vec_select:V4SI
|
||||
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
|
||||
+ (parallel [(const_int 2)
|
||||
+ (const_int 3)
|
||||
+ (const_int 2)
|
||||
+ (const_int 3)])))
|
||||
+ (set (match_dup 6)
|
||||
+ (float:V2DF
|
||||
+ (vec_select:V2SI
|
||||
+ (match_dup 5)
|
||||
+ (parallel [(const_int 0) (const_int 1)]))))
|
||||
+ (set (match_dup 7)
|
||||
+ (lt:V2DF (match_dup 6) (match_dup 3)))
|
||||
+ (set (match_dup 8)
|
||||
+ (and:V2DF (match_dup 7) (match_dup 4)))
|
||||
+ (set (match_operand:V2DF 0 "register_operand" "")
|
||||
+ (plus:V2DF (match_dup 6) (match_dup 8)))]
|
||||
+ "TARGET_SSE2"
|
||||
+{
|
||||
+ REAL_VALUE_TYPE TWO32r;
|
||||
+ rtx x;
|
||||
+ int i;
|
||||
+
|
||||
+ real_ldexp (&TWO32r, &dconst1, 32);
|
||||
+ x = const_double_from_real_value (TWO32r, DFmode);
|
||||
+
|
||||
+ operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
|
||||
+ operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
|
||||
+
|
||||
+ operands[5] = gen_reg_rtx (V4SImode);
|
||||
+
|
||||
+ for (i = 6; i < 9; i++)
|
||||
+ operands[i] = gen_reg_rtx (V2DFmode);
|
||||
+})
|
||||
+
|
||||
+(define_expand "vec_unpacku_float_lo_v4si"
|
||||
+ [(set (match_dup 5)
|
||||
+ (float:V2DF
|
||||
+ (vec_select:V2SI
|
||||
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
|
||||
+ (parallel [(const_int 0) (const_int 1)]))))
|
||||
+ (set (match_dup 6)
|
||||
+ (lt:V2DF (match_dup 5) (match_dup 3)))
|
||||
+ (set (match_dup 7)
|
||||
+ (and:V2DF (match_dup 6) (match_dup 4)))
|
||||
+ (set (match_operand:V2DF 0 "register_operand" "")
|
||||
+ (plus:V2DF (match_dup 5) (match_dup 7)))]
|
||||
+ "TARGET_SSE2"
|
||||
+{
|
||||
+ REAL_VALUE_TYPE TWO32r;
|
||||
+ rtx x;
|
||||
+ int i;
|
||||
+
|
||||
+ real_ldexp (&TWO32r, &dconst1, 32);
|
||||
+ x = const_double_from_real_value (TWO32r, DFmode);
|
||||
+
|
||||
+ operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
|
||||
+ operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
|
||||
+
|
||||
+ for (i = 5; i < 8; i++)
|
||||
+ operands[i] = gen_reg_rtx (V2DFmode);
|
||||
+})
|
||||
+
|
||||
(define_expand "vec_pack_trunc_v2df"
|
||||
[(match_operand:V4SF 0 "register_operand" "")
|
||||
(match_operand:V2DF 1 "nonimmediate_operand" "")
|
||||
--- gcc/config/i386/i386.c (revision 149860)
|
||||
+++ gcc/config/i386/i386.c (revision 149862)
|
||||
@@ -20908,6 +20908,8 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_CPYSGNPS,
|
||||
IX86_BUILTIN_CPYSGNPD,
|
||||
|
||||
+ IX86_BUILTIN_CVTUDQ2PS,
|
||||
+
|
||||
/* SSE5 instructions */
|
||||
IX86_BUILTIN_FMADDSS,
|
||||
IX86_BUILTIN_FMADDSD,
|
||||
@@ -21785,6 +21787,7 @@ static const struct builtin_description
|
||||
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
|
||||
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
|
||||
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
|
||||
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
|
||||
|
||||
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
|
||||
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
|
||||
@@ -25962,9 +25965,7 @@ ix86_veclibabi_acml (enum built_in_funct
|
||||
static tree
|
||||
ix86_vectorize_builtin_conversion (unsigned int code, tree type)
|
||||
{
|
||||
- if (TREE_CODE (type) != VECTOR_TYPE
|
||||
- /* There are only conversions from/to signed integers. */
|
||||
- || TYPE_UNSIGNED (TREE_TYPE (type)))
|
||||
+ if (TREE_CODE (type) != VECTOR_TYPE)
|
||||
return NULL_TREE;
|
||||
|
||||
switch (code)
|
||||
@@ -25973,7 +25974,9 @@ ix86_vectorize_builtin_conversion (unsig
|
||||
switch (TYPE_MODE (type))
|
||||
{
|
||||
case V4SImode:
|
||||
- return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
|
||||
+ return TYPE_UNSIGNED (type)
|
||||
+ ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
|
||||
+ : ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
@@ -25982,7 +25985,9 @@ ix86_vectorize_builtin_conversion (unsig
|
||||
switch (TYPE_MODE (type))
|
||||
{
|
||||
case V4SImode:
|
||||
- return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
|
||||
+ return TYPE_UNSIGNED (type)
|
||||
+ ? NULL_TREE
|
||||
+ : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
--- gcc/testsuite/gcc.target/i386/vectorize8.c (revision 0)
|
||||
+++ gcc/testsuite/gcc.target/i386/vectorize8.c (revision 149862)
|
||||
@@ -0,0 +1,16 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
|
||||
+
|
||||
+unsigned int a[256];
|
||||
+double b[256];
|
||||
+
|
||||
+void foo(void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i=0; i<256; ++i)
|
||||
+ b[i] = a[i];
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "cvtdq2pd" } } */
|
||||
+
|
||||
--- gcc/testsuite/gcc.target/i386/vectorize7.c (revision 0)
|
||||
+++ gcc/testsuite/gcc.target/i386/vectorize7.c (revision 149862)
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
|
||||
+
|
||||
+unsigned int a[256];
|
||||
+float b[256];
|
||||
+
|
||||
+void foo(void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i=0; i<256; ++i)
|
||||
+ b[i] = a[i];
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "cvtdq2ps" } } */
|
||||
--- gcc/testsuite/lib/target-supports.exp (revision 149860)
|
||||
+++ gcc/testsuite/lib/target-supports.exp (revision 149862)
|
||||
@@ -1399,8 +1399,10 @@ proc check_effective_target_vect_uintflo
|
||||
verbose "check_effective_target_vect_uintfloat_cvt: using cached result" 2
|
||||
} else {
|
||||
set et_vect_uintfloat_cvt_saved 0
|
||||
- if { ([istarget powerpc*-*-*]
|
||||
- && ![istarget powerpc-*-linux*paired*]) } {
|
||||
+ if { [istarget i?86-*-*]
|
||||
+ || ([istarget powerpc*-*-*]
|
||||
+ && ![istarget powerpc-*-linux*paired*])
|
||||
+ || [istarget x86_64-*-*] } {
|
||||
set et_vect_uintfloat_cvt_saved 1
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user