From aefc846ec69ee89bee8aee9e53aceabfb10869dd Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 21 Jul 2009 16:55:31 +0000 Subject: [PATCH] 4.4.0-15 --- gcc44-pr40811.patch | 234 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 gcc44-pr40811.patch diff --git a/gcc44-pr40811.patch b/gcc44-pr40811.patch new file mode 100644 index 0000000..d796820 --- /dev/null +++ b/gcc44-pr40811.patch @@ -0,0 +1,234 @@ +2009-07-21 Uros Bizjak + + * config/i386/sse.md (vec_unpacku_float_hi_v4si): New expander. + (vec_unpacku_float_lo_v4si): Ditto. + +2009-07-21 Uros Bizjak + + PR target/40811 + * config/i386/sse.md (sse2_cvtudq2ps): New expander. + (enum ix86_builtins): Add IX86_BUILTIN_CVTUDQ2PS. + (builtin_description): Add __builtin_ia32_cvtudq2ps. + (ix86_vectorize_builtin_conversion): Handle IX86_BUILTIN_CVTUDQ2PS. + +2009-07-21 Uros Bizjak + + * gcc.target/i386/vectorize8.c: New test. + +2009-07-21 Uros Bizjak + + PR target/40811 + * lib/target-supports.exp (check_effective_target_vect_uintfloat_cvt): + Add i?86 and x86_64 targets. + * gcc.target/i386/vectorize7.c: New test. + +--- gcc/config/i386/sse.md (revision 149860) ++++ gcc/config/i386/sse.md (revision 149862) +@@ -2420,6 +2420,31 @@ (define_insn "sse2_cvtdq2ps" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + ++(define_expand "sse2_cvtudq2ps" ++ [(set (match_dup 5) ++ (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" ""))) ++ (set (match_dup 6) ++ (lt:V4SF (match_dup 5) (match_dup 3))) ++ (set (match_dup 7) ++ (and:V4SF (match_dup 6) (match_dup 4))) ++ (set (match_operand:V4SF 0 "register_operand" "") ++ (plus:V4SF (match_dup 5) (match_dup 7)))] ++ "TARGET_SSE2" ++{ ++ REAL_VALUE_TYPE TWO32r; ++ rtx x; ++ int i; ++ ++ real_ldexp (&TWO32r, &dconst1, 32); ++ x = const_double_from_real_value (TWO32r, SFmode); ++ ++ operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode)); ++ operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x)); ++ ++ for (i = 5; i < 8; i++) ++ operands[i] = gen_reg_rtx (V4SFmode); ++}) ++ + (define_insn "avx_cvtps2dq" + [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x") + (unspec:AVXMODEDCVTPS2DQ +@@ -2945,6 +2970,71 @@ (define_expand "vec_unpacks_float_lo_v4s + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE2") + ++(define_expand "vec_unpacku_float_hi_v4si" ++ [(set (match_dup 5) ++ (vec_select:V4SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "") ++ (parallel [(const_int 2) ++ (const_int 3) ++ (const_int 2) ++ (const_int 3)]))) ++ (set (match_dup 6) ++ (float:V2DF ++ (vec_select:V2SI ++ (match_dup 5) ++ (parallel [(const_int 0) (const_int 1)])))) ++ (set (match_dup 7) ++ (lt:V2DF (match_dup 6) (match_dup 3))) ++ (set (match_dup 8) ++ (and:V2DF (match_dup 7) (match_dup 4))) ++ (set (match_operand:V2DF 0 "register_operand" "") ++ (plus:V2DF (match_dup 6) (match_dup 8)))] ++ "TARGET_SSE2" ++{ ++ REAL_VALUE_TYPE TWO32r; ++ rtx x; ++ int i; ++ ++ real_ldexp (&TWO32r, &dconst1, 32); ++ x = const_double_from_real_value (TWO32r, DFmode); ++ ++ operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); ++ operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x)); ++ ++ operands[5] = gen_reg_rtx (V4SImode); ++ ++ for (i = 6; i < 9; i++) ++ operands[i] = gen_reg_rtx (V2DFmode); ++}) ++ ++(define_expand "vec_unpacku_float_lo_v4si" ++ [(set (match_dup 5) ++ (float:V2DF ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "") ++ (parallel [(const_int 0) (const_int 1)])))) ++ (set (match_dup 6) ++ (lt:V2DF (match_dup 5) (match_dup 3))) ++ (set (match_dup 7) ++ (and:V2DF (match_dup 6) (match_dup 4))) ++ (set (match_operand:V2DF 0 "register_operand" "") ++ (plus:V2DF (match_dup 5) (match_dup 7)))] ++ "TARGET_SSE2" ++{ ++ REAL_VALUE_TYPE TWO32r; ++ rtx x; ++ int i; ++ ++ real_ldexp (&TWO32r, &dconst1, 32); ++ x = const_double_from_real_value (TWO32r, DFmode); ++ ++ operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); ++ operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x)); ++ ++ for (i = 5; i < 8; i++) ++ operands[i] = gen_reg_rtx (V2DFmode); ++}) ++ + (define_expand "vec_pack_trunc_v2df" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:V2DF 1 "nonimmediate_operand" "") +--- gcc/config/i386/i386.c (revision 149860) ++++ gcc/config/i386/i386.c (revision 149862) +@@ -20908,6 +20908,8 @@ enum ix86_builtins + IX86_BUILTIN_CPYSGNPS, + IX86_BUILTIN_CPYSGNPD, + ++ IX86_BUILTIN_CVTUDQ2PS, ++ + /* SSE5 instructions */ + IX86_BUILTIN_FMADDSS, + IX86_BUILTIN_FMADDSD, +@@ -21785,6 +21787,7 @@ static const struct builtin_description + { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI }, ++ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF }, +@@ -25962,9 +25965,7 @@ ix86_veclibabi_acml (enum built_in_funct + static tree + ix86_vectorize_builtin_conversion (unsigned int code, tree type) + { +- if (TREE_CODE (type) != VECTOR_TYPE +- /* There are only conversions from/to signed integers. */ +- || TYPE_UNSIGNED (TREE_TYPE (type))) ++ if (TREE_CODE (type) != VECTOR_TYPE) + return NULL_TREE; + + switch (code) +@@ -25973,7 +25974,9 @@ ix86_vectorize_builtin_conversion (unsig + switch (TYPE_MODE (type)) + { + case V4SImode: +- return ix86_builtins[IX86_BUILTIN_CVTDQ2PS]; ++ return TYPE_UNSIGNED (type) ++ ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS] ++ : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]; + default: + return NULL_TREE; + } +@@ -25982,7 +25985,9 @@ ix86_vectorize_builtin_conversion (unsig + switch (TYPE_MODE (type)) + { + case V4SImode: +- return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]; ++ return TYPE_UNSIGNED (type) ++ ? NULL_TREE ++ : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]; + default: + return NULL_TREE; + } +--- gcc/testsuite/gcc.target/i386/vectorize8.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/vectorize8.c (revision 149862) +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -msse2" } */ ++ ++unsigned int a[256]; ++double b[256]; ++ ++void foo(void) ++{ ++ int i; ++ ++ for (i=0; i<256; ++i) ++ b[i] = a[i]; ++} ++ ++/* { dg-final { scan-assembler "cvtdq2pd" } } */ ++ +--- gcc/testsuite/gcc.target/i386/vectorize7.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/vectorize7.c (revision 149862) +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -msse2" } */ ++ ++unsigned int a[256]; ++float b[256]; ++ ++void foo(void) ++{ ++ int i; ++ ++ for (i=0; i<256; ++i) ++ b[i] = a[i]; ++} ++ ++/* { dg-final { scan-assembler "cvtdq2ps" } } */ +--- gcc/testsuite/lib/target-supports.exp (revision 149860) ++++ gcc/testsuite/lib/target-supports.exp (revision 149862) +@@ -1399,8 +1399,10 @@ proc check_effective_target_vect_uintflo + verbose "check_effective_target_vect_uintfloat_cvt: using cached result" 2 + } else { + set et_vect_uintfloat_cvt_saved 0 +- if { ([istarget powerpc*-*-*] +- && ![istarget powerpc-*-linux*paired*]) } { ++ if { [istarget i?86-*-*] ++ || ([istarget powerpc*-*-*] ++ && ![istarget powerpc-*-linux*paired*]) ++ || [istarget x86_64-*-*] } { + set et_vect_uintfloat_cvt_saved 1 + } + }