1370 lines
49 KiB
Diff
1370 lines
49 KiB
Diff
|
2009-03-13 Michael Meissner <meissner@linux.vnet.ibm.com>
|
|||
|
|
|||
|
PR target/39457
|
|||
|
* config/rs6000/rs6000.opt (-mdisallow-float-in-lr-ctr): Add
|
|||
|
temporary debug switch.
|
|||
|
|
|||
|
* config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Revert
|
|||
|
behavior of disallowing
|
|||
|
|
|||
|
2009-03-13 Michael Meissner <meissner@linux.vnet.ibm.com>
|
|||
|
|
|||
|
* config/rs6000/vector.md (vec_extract_evenv2df): Delete, insn
|
|||
|
causes problems in building spec 2006.
|
|||
|
(vec_extract_oddv2df): Ditto.
|
|||
|
(vec_pack_trunc_v2df): New expanders for VSX vectorized
|
|||
|
conversions.
|
|||
|
(vec_pack_sfix_trunc_v2df): Ditto.
|
|||
|
(vec_pack_ufix_trunc_v2df): Ditto.
|
|||
|
(vec_unpacks_hi_v4sf): Ditto.
|
|||
|
(vec_unpacks_lo_v4sf): Ditto.
|
|||
|
(vec_unpacks_float_hi_v4si): Ditto.
|
|||
|
(vec_unpacks_float_lo_v4si): Ditto.
|
|||
|
(vec_unpacku_float_hi_v4si): Ditto.
|
|||
|
(vec_unpacku_float_lo_v4si): Ditto.
|
|||
|
|
|||
|
* config/rs6000/rs6000-protos.h (rs6000_vector_secondary_reload):
|
|||
|
Declaration for new target hook.
|
|||
|
|
|||
|
* config/rs6000/rs6000.c (TARGET_SECONDARY_RELOAD): Add new target
|
|||
|
hook for eventually fixing up the memory references for Altivec
|
|||
|
and VSX reloads to be reg+reg instead of reg+offset. Right now,
|
|||
|
this is a stub function that prints debug information if
|
|||
|
-mdebug=addr and then calls default_secondary_reload.
|
|||
|
(rs6000_secondary_reload): Ditto.
|
|||
|
(rs6000_vector_secondary_reload): Ditto.
|
|||
|
(rs6000_builtin_conversion): Add support for V2DI/V2DF
|
|||
|
conversions.
|
|||
|
(rs6000_legitimate_offset_address_p): Test for the vector unit
|
|||
|
doing the memory references.
|
|||
|
(rs6000_legimize_reload_address): Ditto.
|
|||
|
(rs6000_legitimize_address): Print extra \n if -mdebug=addr.
|
|||
|
(rs6000_legitimize_reload_address): Ditto.
|
|||
|
(rs6000_legitimate_address): Ditto.
|
|||
|
(rs6000_mode_dependent_address): Ditto.
|
|||
|
(bdesc_2arg): Add VSX builtins.
|
|||
|
(bdesc_abs): Ditto.
|
|||
|
(bdesc_1arg): Ditto.
|
|||
|
(altivec_init_builtins): Ditto.
|
|||
|
(rs6000_secondary_memory_needed_rtx): Add debug support if
|
|||
|
-mdebug=addr.
|
|||
|
(rs6000_preferred_reload_class): Ditto.
|
|||
|
(rs6000_secondary_memory_needed): Ditto.
|
|||
|
(rs6000_secondary_reload_class): Ditto.
|
|||
|
(rs6000_cannot_change_mode_class): Ditto.
|
|||
|
|
|||
|
* config/rs6000/vsx.md (UNSPEC_VSX_*): Add unspecs for VSX
|
|||
|
conversions.
|
|||
|
(vsx_nabs<mode>): Add generator function.
|
|||
|
(vsx_float<VSi><mode>2): Ditto.
|
|||
|
(vsx_floatuns<VSi><mode>2): Ditto.
|
|||
|
(vsx_xxmrghw): Ditto.
|
|||
|
(vsx_xxmrglw): Ditto.
|
|||
|
(vsx_xvcvdpsp): New VSX vector conversion insn.
|
|||
|
(vsx_xvcvdpsxws): Ditto.
|
|||
|
(vsx_xvcvdpuxws): Ditto.
|
|||
|
(vsx_xvcvspdp): Ditto.
|
|||
|
(vsx_xvcvsxwdp): Ditto.
|
|||
|
(vsx_xvcvuxwdp): Ditto.
|
|||
|
(vsx_reload_*): New insns for reload support.
|
|||
|
|
|||
|
* config/rs6000/rs6000.h: Fix a comment.
|
|||
|
|
|||
|
* config/rs6000/altivec.md (altivec_reload_*): New insns for
|
|||
|
reload support.
|
|||
|
|
|||
|
* config/rs6000/rs6000.md (ptrsize): New mode attribute for the
|
|||
|
pointer size.
|
|||
|
|
|||
|
2009-03-10 Michael Meissner <meissner@linux.vnet.ibm.com>
|
|||
|
|
|||
|
* config/rs6000/vsx.md (vsx_concat_v2df): Add explicit 'f'
|
|||
|
register class for scalar data, correct uses of the xxpermdi
|
|||
|
instruction.
|
|||
|
(vsx_set_v2df): Ditto.
|
|||
|
(vsx_extract_v2df): Ditto.
|
|||
|
(vsx_xxpermdi): Ditto.
|
|||
|
(vsx_splatv2df): Ditto.
|
|||
|
(vsx_xxmrghw): Use wf instead of v constraints.
|
|||
|
(vsx_xxmrglw): Ditto.
|
|||
|
testsuite/
|
|||
|
2009-03-13 Michael Meissner <meissner@linux.vnet.ibm.com>
|
|||
|
|
|||
|
PR target/39457
|
|||
|
* gcc.target/powerpc/pr39457.c: New test for PR39457.
|
|||
|
|
|||
|
2009-03-13 Michael Meissner <meissner@linux.vnet.ibm.com>
|
|||
|
|
|||
|
* gcc.target/powerpc/vsx-builtin-1.c: New test for builtins.
|
|||
|
* gcc.target/powerpc/vsx-builtin-2.c: Ditto.
|
|||
|
|
|||
|
--- gcc/config/rs6000/vector.md (revision 144758)
|
|||
|
+++ gcc/config/rs6000/vector.md (revision 144843)
|
|||
|
@@ -496,23 +496,122 @@ (define_expand "vec_interleave_lowv2df"
|
|||
|
"VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
"")
|
|||
|
|
|||
|
-;; For 2 element vectors, even/odd is the same as high/low
|
|||
|
-(define_expand "vec_extract_evenv2df"
|
|||
|
- [(set (match_operand:V2DF 0 "vfloat_operand" "")
|
|||
|
- (vec_concat:V2DF
|
|||
|
- (vec_select:DF (match_operand:V2DF 1 "vfloat_operand" "")
|
|||
|
- (parallel [(const_int 0)]))
|
|||
|
- (vec_select:DF (match_operand:V2DF 2 "vfloat_operand" "")
|
|||
|
- (parallel [(const_int 0)]))))]
|
|||
|
- "VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
- "")
|
|||
|
+
|
|||
|
+;; Convert double word types to single word types
|
|||
|
+(define_expand "vec_pack_trunc_v2df"
|
|||
|
+ [(match_operand:V4SF 0 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V2DF 1 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V2DF 2 "vsx_register_operand" "")]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
|
|||
|
+{
|
|||
|
+ rtx r1 = gen_reg_rtx (V4SFmode);
|
|||
|
+ rtx r2 = gen_reg_rtx (V4SFmode);
|
|||
|
|
|||
|
-(define_expand "vec_extract_oddv2df"
|
|||
|
- [(set (match_operand:V2DF 0 "vfloat_operand" "")
|
|||
|
- (vec_concat:V2DF
|
|||
|
- (vec_select:DF (match_operand:V2DF 1 "vfloat_operand" "")
|
|||
|
- (parallel [(const_int 1)]))
|
|||
|
- (vec_select:DF (match_operand:V2DF 2 "vfloat_operand" "")
|
|||
|
- (parallel [(const_int 1)]))))]
|
|||
|
- "VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
- "")
|
|||
|
+ emit_insn (gen_vsx_xvcvdpsp (r1, operands[1]));
|
|||
|
+ emit_insn (gen_vsx_xvcvdpsp (r2, operands[2]));
|
|||
|
+ emit_insn (gen_vec_extract_evenv4sf (operands[0], r1, r2));
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
+
|
|||
|
+(define_expand "vec_pack_sfix_trunc_v2df"
|
|||
|
+ [(match_operand:V4SI 0 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V2DF 1 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V2DF 2 "vsx_register_operand" "")]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
|
|||
|
+{
|
|||
|
+ rtx r1 = gen_reg_rtx (V4SImode);
|
|||
|
+ rtx r2 = gen_reg_rtx (V4SImode);
|
|||
|
+
|
|||
|
+ emit_insn (gen_vsx_xvcvdpsxws (r1, operands[1]));
|
|||
|
+ emit_insn (gen_vsx_xvcvdpsxws (r2, operands[2]));
|
|||
|
+ emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2));
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
+
|
|||
|
+(define_expand "vec_pack_ufix_trunc_v2df"
|
|||
|
+ [(match_operand:V4SI 0 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V2DF 1 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V2DF 2 "vsx_register_operand" "")]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
|
|||
|
+{
|
|||
|
+ rtx r1 = gen_reg_rtx (V4SImode);
|
|||
|
+ rtx r2 = gen_reg_rtx (V4SImode);
|
|||
|
+
|
|||
|
+ emit_insn (gen_vsx_xvcvdpuxws (r1, operands[1]));
|
|||
|
+ emit_insn (gen_vsx_xvcvdpuxws (r2, operands[2]));
|
|||
|
+ emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2));
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
+
|
|||
|
+;; Convert single word types to double word
|
|||
|
+(define_expand "vec_unpacks_hi_v4sf"
|
|||
|
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V4SF 1 "vsx_register_operand" "")]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
|
|||
|
+{
|
|||
|
+ rtx reg = gen_reg_rtx (V4SFmode);
|
|||
|
+
|
|||
|
+ emit_insn (gen_vec_interleave_highv4sf (reg, operands[1], operands[1]));
|
|||
|
+ emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
+
|
|||
|
+(define_expand "vec_unpacks_lo_v4sf"
|
|||
|
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V4SF 1 "vsx_register_operand" "")]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
|
|||
|
+{
|
|||
|
+ rtx reg = gen_reg_rtx (V4SFmode);
|
|||
|
+
|
|||
|
+ emit_insn (gen_vec_interleave_lowv4sf (reg, operands[1], operands[1]));
|
|||
|
+ emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
+
|
|||
|
+(define_expand "vec_unpacks_float_hi_v4si"
|
|||
|
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V4SI 1 "vsx_register_operand" "")]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
|
|||
|
+{
|
|||
|
+ rtx reg = gen_reg_rtx (V4SImode);
|
|||
|
+
|
|||
|
+ emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1]));
|
|||
|
+ emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
+
|
|||
|
+(define_expand "vec_unpacks_float_lo_v4si"
|
|||
|
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V4SI 1 "vsx_register_operand" "")]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
|
|||
|
+{
|
|||
|
+ rtx reg = gen_reg_rtx (V4SImode);
|
|||
|
+
|
|||
|
+ emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1]));
|
|||
|
+ emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
+
|
|||
|
+(define_expand "vec_unpacku_float_hi_v4si"
|
|||
|
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V4SI 1 "vsx_register_operand" "")]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
|
|||
|
+{
|
|||
|
+ rtx reg = gen_reg_rtx (V4SImode);
|
|||
|
+
|
|||
|
+ emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1]));
|
|||
|
+ emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
+
|
|||
|
+(define_expand "vec_unpacku_float_lo_v4si"
|
|||
|
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
|
|||
|
+ (match_operand:V4SI 1 "vsx_register_operand" "")]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
|
|||
|
+{
|
|||
|
+ rtx reg = gen_reg_rtx (V4SImode);
|
|||
|
+
|
|||
|
+ emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1]));
|
|||
|
+ emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
--- gcc/config/rs6000/rs6000-protos.h (revision 144758)
|
|||
|
+++ gcc/config/rs6000/rs6000-protos.h (revision 144843)
|
|||
|
@@ -72,6 +72,7 @@ extern bool rs6000_secondary_memory_need
|
|||
|
extern bool rs6000_cannot_change_mode_class (enum machine_mode,
|
|||
|
enum machine_mode,
|
|||
|
enum reg_class);
|
|||
|
+extern void rs6000_vector_secondary_reload (rtx, rtx, rtx, bool);
|
|||
|
extern int paired_emit_vector_cond_expr (rtx, rtx, rtx,
|
|||
|
rtx, rtx, rtx);
|
|||
|
extern void paired_expand_vector_move (rtx operands[]);
|
|||
|
--- gcc/config/rs6000/rs6000.opt (revision 144845)
|
|||
|
+++ gcc/config/rs6000/rs6000.opt (revision 144857)
|
|||
|
@@ -139,6 +139,9 @@ mvsx-scalar-memory
|
|||
|
Target Report Var(TARGET_VSX_SCALAR_MEMORY)
|
|||
|
If -mvsx, use VSX scalar memory reference instructions for scalar double (off by default)
|
|||
|
|
|||
|
+mdisallow-float-in-lr-ctr
|
|||
|
+Target Undocumented Var(TARGET_DISALLOW_FLOAT_IN_LR_CTR) Init(-1)
|
|||
|
+
|
|||
|
mupdate
|
|||
|
Target Report Var(TARGET_UPDATE) Init(1)
|
|||
|
Generate load/store with update instructions
|
|||
|
--- gcc/config/rs6000/rs6000.c (revision 144758)
|
|||
|
+++ gcc/config/rs6000/rs6000.c (revision 144843)
|
|||
|
@@ -1004,6 +1004,10 @@ static rtx rs6000_emit_vector_compare (e
|
|||
|
enum machine_mode);
|
|||
|
static tree rs6000_stack_protect_fail (void);
|
|||
|
|
|||
|
+static enum reg_class rs6000_secondary_reload (bool, rtx, enum reg_class,
|
|||
|
+ enum machine_mode,
|
|||
|
+ struct secondary_reload_info *);
|
|||
|
+
|
|||
|
const int INSN_NOT_AVAILABLE = -1;
|
|||
|
static enum machine_mode rs6000_eh_return_filter_mode (void);
|
|||
|
|
|||
|
@@ -1333,6 +1337,9 @@ static const char alt_reg_names[][8] =
|
|||
|
#undef TARGET_INSTANTIATE_DECLS
|
|||
|
#define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
|
|||
|
|
|||
|
+#undef TARGET_SECONDARY_RELOAD
|
|||
|
+#define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
|
|||
|
+
|
|||
|
struct gcc_target targetm = TARGET_INITIALIZER;
|
|||
|
|
|||
|
/* Return number of consecutive hard regs needed starting at reg REGNO
|
|||
|
@@ -1448,10 +1448,16 @@ rs6000_hard_regno_mode_ok (int regno, en
|
|||
|
if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
|
|||
|
return 1;
|
|||
|
|
|||
|
- /* Don't allow anything but word sized integers (aka pointers) in CTR/LR. You
|
|||
|
- really don't want to spill your floating point values to those
|
|||
|
- registers. Also do it for the old MQ register in the power. */
|
|||
|
- if (regno == CTR_REGNO || regno == LR_REGNO || regno == MQ_REGNO)
|
|||
|
+ /* Don't allow anything but word sized integers (aka pointers) in CTR/LR.
|
|||
|
+ You really don't want to spill your floating point values to those
|
|||
|
+ registers. Also do it for the old MQ register in the power.
|
|||
|
+
|
|||
|
+ While this is desirable in theory, disabling float to go in LR/CTR does
|
|||
|
+ cause some regressions, so until they are taken care of, revert to the old
|
|||
|
+ behavior by default for most power systems, but enable it for power7. */
|
|||
|
+ if ((TARGET_DISALLOW_FLOAT_IN_LR_CTR > 0
|
|||
|
+ || (TARGET_DISALLOW_FLOAT_IN_LR_CTR < 0 && TARGET_VSX))
|
|||
|
+ && (regno == CTR_REGNO || regno == LR_REGNO || regno == MQ_REGNO))
|
|||
|
return (GET_MODE_CLASS (mode) == MODE_INT
|
|||
|
&& GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
|
|||
|
|
|||
|
@@ -2447,6 +2454,14 @@ rs6000_builtin_conversion (enum tree_cod
|
|||
|
case FIX_TRUNC_EXPR:
|
|||
|
switch (TYPE_MODE (type))
|
|||
|
{
|
|||
|
+ case V2DImode:
|
|||
|
+ if (!VECTOR_UNIT_VSX_P (V2DFmode))
|
|||
|
+ return NULL_TREE;
|
|||
|
+
|
|||
|
+ return TYPE_UNSIGNED (type)
|
|||
|
+ ? rs6000_builtin_decls[VSX_BUILTIN_XVCVDPUXDS]
|
|||
|
+ : rs6000_builtin_decls[VSX_BUILTIN_XVCVDPSXDS];
|
|||
|
+
|
|||
|
case V4SImode:
|
|||
|
if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode))
|
|||
|
return NULL_TREE;
|
|||
|
@@ -2462,6 +2477,14 @@ rs6000_builtin_conversion (enum tree_cod
|
|||
|
case FLOAT_EXPR:
|
|||
|
switch (TYPE_MODE (type))
|
|||
|
{
|
|||
|
+ case V2DImode:
|
|||
|
+ if (!VECTOR_UNIT_VSX_P (V2DFmode))
|
|||
|
+ return NULL_TREE;
|
|||
|
+
|
|||
|
+ return TYPE_UNSIGNED (type)
|
|||
|
+ ? rs6000_builtin_decls[VSX_BUILTIN_XVCVUXDSP]
|
|||
|
+ : rs6000_builtin_decls[VSX_BUILTIN_XVCVSXDSP];
|
|||
|
+
|
|||
|
case V4SImode:
|
|||
|
if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode))
|
|||
|
return NULL_TREE;
|
|||
|
@@ -2469,6 +2492,7 @@ rs6000_builtin_conversion (enum tree_cod
|
|||
|
return TYPE_UNSIGNED (type)
|
|||
|
? rs6000_builtin_decls[VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF]
|
|||
|
: rs6000_builtin_decls[VECTOR_BUILTIN_FLOAT_V4SI_V4SF];
|
|||
|
+
|
|||
|
default:
|
|||
|
return NULL_TREE;
|
|||
|
}
|
|||
|
@@ -4101,7 +4125,7 @@ rs6000_legitimate_offset_address_p (enum
|
|||
|
case V2DImode:
|
|||
|
/* AltiVec/VSX vector modes. Only reg+reg addressing is valid and
|
|||
|
constant offset zero should not occur due to canonicalization. */
|
|||
|
- if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode))
|
|||
|
+ if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
|
|||
|
return false;
|
|||
|
break;
|
|||
|
|
|||
|
@@ -4441,6 +4465,7 @@ rs6000_legitimize_address (rtx x, rtx ol
|
|||
|
}
|
|||
|
else
|
|||
|
fprintf (stderr, "NULL returned\n");
|
|||
|
+ fprintf (stderr, "\n");
|
|||
|
}
|
|||
|
|
|||
|
return ret;
|
|||
|
@@ -4776,8 +4801,7 @@ rs6000_legitimize_reload_address (rtx x,
|
|||
|
&& REG_MODE_OK_FOR_BASE_P (XEXP (x, 0), mode)
|
|||
|
&& GET_CODE (XEXP (x, 1)) == CONST_INT
|
|||
|
&& (INTVAL (XEXP (x, 1)) & 3) != 0
|
|||
|
- && !ALTIVEC_VECTOR_MODE (mode)
|
|||
|
- && !VSX_VECTOR_MODE (mode)
|
|||
|
+ && VECTOR_MEM_NONE_P (mode)
|
|||
|
&& GET_MODE_SIZE (mode) >= UNITS_PER_WORD
|
|||
|
&& TARGET_POWERPC64)
|
|||
|
{
|
|||
|
@@ -4798,8 +4822,7 @@ rs6000_legitimize_reload_address (rtx x,
|
|||
|
&& !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
|
|||
|
|| mode == DDmode || mode == TDmode
|
|||
|
|| mode == DImode))
|
|||
|
- && !ALTIVEC_VECTOR_MODE (mode)
|
|||
|
- && !VSX_VECTOR_MODE (mode))
|
|||
|
+ && VECTOR_MEM_NONE_P (mode))
|
|||
|
{
|
|||
|
HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
|
|||
|
HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
|
|||
|
@@ -4843,6 +4866,7 @@ rs6000_legitimize_reload_address (rtx x,
|
|||
|
/* Don't do this for TFmode or TDmode, since the result isn't
|
|||
|
offsettable. The same goes for DImode without 64-bit gprs and
|
|||
|
DFmode and DDmode without fprs. */
|
|||
|
+ && VECTOR_MEM_NONE_P (mode)
|
|||
|
&& mode != TFmode
|
|||
|
&& mode != TDmode
|
|||
|
&& (mode != DImode || TARGET_POWERPC64)
|
|||
|
@@ -4918,6 +4942,8 @@ rs6000_legitimize_reload_address (rtx x,
|
|||
|
fprintf (stderr, "New address:\n");
|
|||
|
debug_rtx (ret);
|
|||
|
}
|
|||
|
+
|
|||
|
+ fprintf (stderr, "\n");
|
|||
|
}
|
|||
|
|
|||
|
return ret;
|
|||
|
@@ -5035,6 +5061,7 @@ rs6000_legitimate_address (enum machine_
|
|||
|
GET_MODE_NAME (mode),
|
|||
|
reg_ok_strict);
|
|||
|
debug_rtx (orig_x);
|
|||
|
+ fprintf (stderr, "\n");
|
|||
|
}
|
|||
|
|
|||
|
return ret;
|
|||
|
@@ -5082,9 +5109,10 @@ rs6000_mode_dependent_address (rtx addr)
|
|||
|
if (TARGET_DEBUG_ADDR)
|
|||
|
{
|
|||
|
fprintf (stderr,
|
|||
|
- "\nrs6000_mode_dependent_address: ret = %d\n",
|
|||
|
- (int)ret);
|
|||
|
+ "\nrs6000_mode_dependent_address: ret = %s\n",
|
|||
|
+ ret ? "true" : "false");
|
|||
|
debug_rtx (addr);
|
|||
|
+ fprintf (stderr, "\n");
|
|||
|
}
|
|||
|
|
|||
|
return ret;
|
|||
|
@@ -7917,6 +7945,20 @@ static struct builtin_description bdesc_
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_altivec_vsumsws, "__builtin_altivec_vsumsws", ALTIVEC_BUILTIN_VSUMSWS },
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_xorv4si3, "__builtin_altivec_vxor", ALTIVEC_BUILTIN_VXOR },
|
|||
|
|
|||
|
+ { MASK_VSX, CODE_FOR_addv2df3, "__builtin_vsx_xvadddp", VSX_BUILTIN_XVADDDP },
|
|||
|
+ { MASK_VSX, CODE_FOR_subv2df3, "__builtin_vsx_xvsubdp", VSX_BUILTIN_XVSUBDP },
|
|||
|
+ { MASK_VSX, CODE_FOR_mulv2df3, "__builtin_vsx_xvmuldp", VSX_BUILTIN_XVMULDP },
|
|||
|
+ { MASK_VSX, CODE_FOR_divv2df3, "__builtin_vsx_xvdivdp", VSX_BUILTIN_XVDIVDP },
|
|||
|
+ { MASK_VSX, CODE_FOR_sminv2df3, "__builtin_vsx_xvmindp", VSX_BUILTIN_XVMINDP },
|
|||
|
+ { MASK_VSX, CODE_FOR_smaxv2df3, "__builtin_vsx_xvmaxdp", VSX_BUILTIN_XVMAXDP },
|
|||
|
+
|
|||
|
+ { MASK_VSX, CODE_FOR_addv4sf3, "__builtin_vsx_xvaddsp", VSX_BUILTIN_XVADDSP },
|
|||
|
+ { MASK_VSX, CODE_FOR_subv4sf3, "__builtin_vsx_xvsubsp", VSX_BUILTIN_XVSUBSP },
|
|||
|
+ { MASK_VSX, CODE_FOR_mulv4sf3, "__builtin_vsx_xvmulsp", VSX_BUILTIN_XVMULSP },
|
|||
|
+ { MASK_VSX, CODE_FOR_divv4sf3, "__builtin_vsx_xvdivsp", VSX_BUILTIN_XVDIVSP },
|
|||
|
+ { MASK_VSX, CODE_FOR_sminv4sf3, "__builtin_vsx_xvminsp", VSX_BUILTIN_XVMINSP },
|
|||
|
+ { MASK_VSX, CODE_FOR_smaxv4sf3, "__builtin_vsx_xvmaxsp", VSX_BUILTIN_XVMAXSP },
|
|||
|
+
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_add", ALTIVEC_BUILTIN_VEC_ADD },
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddfp", ALTIVEC_BUILTIN_VEC_VADDFP },
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vadduwm", ALTIVEC_BUILTIN_VEC_VADDUWM },
|
|||
|
@@ -8288,7 +8330,11 @@ static const struct builtin_description
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_absv16qi2, "__builtin_altivec_abs_v16qi", ALTIVEC_BUILTIN_ABS_V16QI },
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_altivec_abss_v4si, "__builtin_altivec_abss_v4si", ALTIVEC_BUILTIN_ABSS_V4SI },
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_altivec_abss_v8hi, "__builtin_altivec_abss_v8hi", ALTIVEC_BUILTIN_ABSS_V8HI },
|
|||
|
- { MASK_ALTIVEC, CODE_FOR_altivec_abss_v16qi, "__builtin_altivec_abss_v16qi", ALTIVEC_BUILTIN_ABSS_V16QI }
|
|||
|
+ { MASK_ALTIVEC, CODE_FOR_altivec_abss_v16qi, "__builtin_altivec_abss_v16qi", ALTIVEC_BUILTIN_ABSS_V16QI },
|
|||
|
+ { MASK_VSX, CODE_FOR_absv2df2, "__builtin_vsx_xvabsdp", VSX_BUILTIN_XVABSDP },
|
|||
|
+ { MASK_VSX, CODE_FOR_vsx_nabsv2df2, "__builtin_vsx_xvnabsdp", VSX_BUILTIN_XVNABSDP },
|
|||
|
+ { MASK_VSX, CODE_FOR_absv4sf2, "__builtin_vsx_xvabssp", VSX_BUILTIN_XVABSSP },
|
|||
|
+ { MASK_VSX, CODE_FOR_vsx_nabsv4sf2, "__builtin_vsx_xvnabssp", VSX_BUILTIN_XVNABSSP },
|
|||
|
};
|
|||
|
|
|||
|
/* Simple unary operations: VECb = foo (unsigned literal) or VECb =
|
|||
|
@@ -8314,6 +8360,11 @@ static struct builtin_description bdesc_
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_altivec_vupklpx, "__builtin_altivec_vupklpx", ALTIVEC_BUILTIN_VUPKLPX },
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_altivec_vupklsh, "__builtin_altivec_vupklsh", ALTIVEC_BUILTIN_VUPKLSH },
|
|||
|
|
|||
|
+ { MASK_VSX, CODE_FOR_negv2df2, "__builtin_vsx_xvnegdp", VSX_BUILTIN_XVNEGDP },
|
|||
|
+ { MASK_VSX, CODE_FOR_sqrtv2df2, "__builtin_vsx_xvsqrtdp", VSX_BUILTIN_XVSQRTDP },
|
|||
|
+ { MASK_VSX, CODE_FOR_negv4sf2, "__builtin_vsx_xvnegsp", VSX_BUILTIN_XVNEGSP },
|
|||
|
+ { MASK_VSX, CODE_FOR_sqrtv4sf2, "__builtin_vsx_xvsqrtsp", VSX_BUILTIN_XVSQRTSP },
|
|||
|
+
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_abs", ALTIVEC_BUILTIN_VEC_ABS },
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_abss", ALTIVEC_BUILTIN_VEC_ABSS },
|
|||
|
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_ceil", ALTIVEC_BUILTIN_VEC_CEIL },
|
|||
|
@@ -8339,6 +8390,15 @@ static struct builtin_description bdesc_
|
|||
|
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_fix_truncv4sfv4si2, "__builtin_vec_fix_sfsi", VECTOR_BUILTIN_FIX_V4SF_V4SI },
|
|||
|
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_fixuns_truncv4sfv4si2, "__builtin_vec_fixuns_sfsi", VECTOR_BUILTIN_FIXUNS_V4SF_V4SI },
|
|||
|
|
|||
|
+ { MASK_VSX, CODE_FOR_floatv2div2df2, "__builtin_vsx_xvcvsxddp", VSX_BUILTIN_XVCVSXDDP },
|
|||
|
+ { MASK_VSX, CODE_FOR_unsigned_floatv2div2df2, "__builtin_vsx_xvcvuxddp", VSX_BUILTIN_XVCVUXDDP },
|
|||
|
+ { MASK_VSX, CODE_FOR_fix_truncv2dfv2di2, "__builtin_vsx_xvdpsxds", VSX_BUILTIN_XVCVDPSXDS },
|
|||
|
+ { MASK_VSX, CODE_FOR_fixuns_truncv2dfv2di2, "__builtin_vsx_xvdpuxds", VSX_BUILTIN_XVCVDPUXDS },
|
|||
|
+ { MASK_VSX, CODE_FOR_floatv4siv4sf2, "__builtin_vsx_xvcvsxwsp", VSX_BUILTIN_XVCVSXDSP },
|
|||
|
+ { MASK_VSX, CODE_FOR_unsigned_floatv4siv4sf2, "__builtin_vsx_xvcvuxwsp", VSX_BUILTIN_XVCVUXWSP },
|
|||
|
+ { MASK_VSX, CODE_FOR_fix_truncv4sfv4si2, "__builtin_vsx_xvspsxws", VSX_BUILTIN_XVCVSPSXWS },
|
|||
|
+ { MASK_VSX, CODE_FOR_fixuns_truncv4sfv4si2, "__builtin_vsx_xvspuxws", VSX_BUILTIN_XVCVSPUXWS },
|
|||
|
+
|
|||
|
/* The SPE unary builtins must start with SPE_BUILTIN_EVABS and
|
|||
|
end with SPE_BUILTIN_EVSUBFUSIAAW. */
|
|||
|
{ 0, CODE_FOR_spe_evabs, "__builtin_spe_evabs", SPE_BUILTIN_EVABS },
|
|||
|
@@ -10484,6 +10544,8 @@ altivec_init_builtins (void)
|
|||
|
= build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
|
|||
|
tree v4sf_ftype_v4sf
|
|||
|
= build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
|
|||
|
+ tree v2df_ftype_v2df
|
|||
|
+ = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
|
|||
|
tree void_ftype_pcvoid_int_int
|
|||
|
= build_function_type_list (void_type_node,
|
|||
|
pcvoid_type_node, integer_type_node,
|
|||
|
@@ -10641,6 +10703,9 @@ altivec_init_builtins (void)
|
|||
|
case V4SFmode:
|
|||
|
type = v4sf_ftype_v4sf;
|
|||
|
break;
|
|||
|
+ case V2DFmode:
|
|||
|
+ type = v2df_ftype_v2df;
|
|||
|
+ break;
|
|||
|
default:
|
|||
|
gcc_unreachable ();
|
|||
|
}
|
|||
|
@@ -10960,6 +11025,18 @@ rs6000_common_init_builtins (void)
|
|||
|
tree int_ftype_v8hi_v8hi
|
|||
|
= build_function_type_list (integer_type_node,
|
|||
|
V8HI_type_node, V8HI_type_node, NULL_TREE);
|
|||
|
+ tree v2di_ftype_v2df
|
|||
|
+ = build_function_type_list (V2DI_type_node,
|
|||
|
+ V2DF_type_node, NULL_TREE);
|
|||
|
+ tree v2df_ftype_v2df
|
|||
|
+ = build_function_type_list (V2DF_type_node,
|
|||
|
+ V2DF_type_node, NULL_TREE);
|
|||
|
+ tree v2df_ftype_v2di
|
|||
|
+ = build_function_type_list (V2DF_type_node,
|
|||
|
+ V2DI_type_node, NULL_TREE);
|
|||
|
+ tree v2df_ftype_v2df_v2df
|
|||
|
+ = build_function_type_list (V2DF_type_node,
|
|||
|
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
|
|||
|
tree v2df_ftype_v2df_v2df_v2df
|
|||
|
= build_function_type_list (V2DF_type_node,
|
|||
|
V2DF_type_node, V2DF_type_node,
|
|||
|
@@ -11136,6 +11213,9 @@ rs6000_common_init_builtins (void)
|
|||
|
case VOIDmode:
|
|||
|
type = opaque_ftype_opaque_opaque;
|
|||
|
break;
|
|||
|
+ case V2DFmode:
|
|||
|
+ type = v2df_ftype_v2df_v2df;
|
|||
|
+ break;
|
|||
|
case V4SFmode:
|
|||
|
type = v4sf_ftype_v4sf_v4sf;
|
|||
|
break;
|
|||
|
@@ -11285,6 +11365,8 @@ rs6000_common_init_builtins (void)
|
|||
|
type = v16qi_ftype_int;
|
|||
|
else if (mode0 == VOIDmode && mode1 == VOIDmode)
|
|||
|
type = opaque_ftype_opaque;
|
|||
|
+ else if (mode0 == V2DFmode && mode1 == V2DFmode)
|
|||
|
+ type = v2df_ftype_v2df;
|
|||
|
else if (mode0 == V4SFmode && mode1 == V4SFmode)
|
|||
|
type = v4sf_ftype_v4sf;
|
|||
|
else if (mode0 == V8HImode && mode1 == V16QImode)
|
|||
|
@@ -11310,6 +11392,10 @@ rs6000_common_init_builtins (void)
|
|||
|
type = v4si_ftype_v4sf;
|
|||
|
else if (mode0 == V4SFmode && mode1 == V4SImode)
|
|||
|
type = v4sf_ftype_v4si;
|
|||
|
+ else if (mode0 == V2DImode && mode1 == V2DFmode)
|
|||
|
+ type = v2di_ftype_v2df;
|
|||
|
+ else if (mode0 == V2DFmode && mode1 == V2DImode)
|
|||
|
+ type = v2df_ftype_v2di;
|
|||
|
else
|
|||
|
gcc_unreachable ();
|
|||
|
|
|||
|
@@ -12092,8 +12178,10 @@ rtx
|
|||
|
rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
|
|||
|
{
|
|||
|
static bool eliminated = false;
|
|||
|
+ rtx ret;
|
|||
|
+
|
|||
|
if (mode != SDmode)
|
|||
|
- return assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
|
|||
|
+ ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
|
|||
|
else
|
|||
|
{
|
|||
|
rtx mem = cfun->machine->sdmode_stack_slot;
|
|||
|
@@ -12105,8 +12193,21 @@ rs6000_secondary_memory_needed_rtx (enum
|
|||
|
cfun->machine->sdmode_stack_slot = mem;
|
|||
|
eliminated = true;
|
|||
|
}
|
|||
|
- return mem;
|
|||
|
+ ret = mem;
|
|||
|
+ }
|
|||
|
+
|
|||
|
+ if (TARGET_DEBUG_ADDR)
|
|||
|
+ {
|
|||
|
+ fprintf (stderr, "rs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
|
|||
|
+ GET_MODE_NAME (mode));
|
|||
|
+ if (!ret)
|
|||
|
+ fprintf (stderr, "\tNULL_RTX\n");
|
|||
|
+ else
|
|||
|
+ debug_rtx (ret);
|
|||
|
+ fprintf (stderr, "\n");
|
|||
|
}
|
|||
|
+
|
|||
|
+ return ret;
|
|||
|
}
|
|||
|
|
|||
|
static tree
|
|||
|
@@ -12140,6 +12241,54 @@ rs6000_check_sdmode (tree *tp, int *walk
|
|||
|
return NULL_TREE;
|
|||
|
}
|
|||
|
|
|||
|
+/* Inform reload about cases where moving X with a mode MODE to a register in
|
|||
|
+ RCLASS requires an extra scratch or immediate register. Return the class
|
|||
|
+ needed for the immediate register. */
|
|||
|
+
|
|||
|
+static enum reg_class
|
|||
|
+rs6000_secondary_reload (bool in_p,
|
|||
|
+ rtx x,
|
|||
|
+ enum reg_class rclass,
|
|||
|
+ enum machine_mode mode,
|
|||
|
+ secondary_reload_info *sri)
|
|||
|
+{
|
|||
|
+ if (TARGET_DEBUG_ADDR)
|
|||
|
+ {
|
|||
|
+ fprintf (stderr,
|
|||
|
+ "rs6000_secondary_reload, in_p = %s, rclass = %s, mode = %s\n",
|
|||
|
+ in_p ? "true" : "false", reg_class_names[rclass],
|
|||
|
+ GET_MODE_NAME (mode));
|
|||
|
+ debug_rtx (x);
|
|||
|
+ fprintf (stderr, "\n");
|
|||
|
+ }
|
|||
|
+
|
|||
|
+ return default_secondary_reload (in_p, x, rclass, mode, sri);
|
|||
|
+}
|
|||
|
+
|
|||
|
+/* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
|
|||
|
+ to SP+reg addressing. */
|
|||
|
+
|
|||
|
+void
|
|||
|
+rs6000_vector_secondary_reload (rtx op0, rtx op1, rtx op2, bool to_mem_p)
|
|||
|
+{
|
|||
|
+ rtx memref = to_mem_p ? op0 : op1;
|
|||
|
+ gcc_assert (MEM_P (memref));
|
|||
|
+
|
|||
|
+ if (TARGET_DEBUG_ADDR)
|
|||
|
+ {
|
|||
|
+ fprintf (stderr, "rs6000_vector_secondary_reload, to_mem_p = %s\n",
|
|||
|
+ to_mem_p ? "true" : "false");
|
|||
|
+ fprintf (stderr, "op0:\n");
|
|||
|
+ debug_rtx (op0);
|
|||
|
+ fprintf (stderr, "op1:\n");
|
|||
|
+ debug_rtx (op1);
|
|||
|
+ fprintf (stderr, "op2:\n");
|
|||
|
+ debug_rtx (op2);
|
|||
|
+ fprintf (stderr, "\n");
|
|||
|
+ }
|
|||
|
+
|
|||
|
+ gcc_unreachable ();
|
|||
|
+}
|
|||
|
|
|||
|
/* Allocate a 64-bit stack slot to be used for copying SDmode
|
|||
|
values through if this function has any SDmode references. */
|
|||
|
@@ -12212,32 +12361,44 @@ enum reg_class
|
|||
|
rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
|
|||
|
{
|
|||
|
enum machine_mode mode = GET_MODE (x);
|
|||
|
+ enum reg_class ret;
|
|||
|
|
|||
|
if (TARGET_VSX && VSX_VECTOR_MODE (mode) && x == CONST0_RTX (mode)
|
|||
|
&& VSX_REG_CLASS_P (rclass))
|
|||
|
- return rclass;
|
|||
|
+ ret = rclass;
|
|||
|
|
|||
|
- if (TARGET_ALTIVEC && ALTIVEC_VECTOR_MODE (mode) && rclass == ALTIVEC_REGS
|
|||
|
- && easy_vector_constant (x, mode))
|
|||
|
- return rclass;
|
|||
|
+ else if (TARGET_ALTIVEC && ALTIVEC_VECTOR_MODE (mode)
|
|||
|
+ && rclass == ALTIVEC_REGS && easy_vector_constant (x, mode))
|
|||
|
+ ret = rclass;
|
|||
|
|
|||
|
- if (CONSTANT_P (x) && reg_classes_intersect_p (rclass, FLOAT_REGS))
|
|||
|
- return NO_REGS;
|
|||
|
+ else if (CONSTANT_P (x) && reg_classes_intersect_p (rclass, FLOAT_REGS))
|
|||
|
+ ret = NO_REGS;
|
|||
|
|
|||
|
- if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
|
|||
|
- return GENERAL_REGS;
|
|||
|
+ else if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
|
|||
|
+ ret = GENERAL_REGS;
|
|||
|
|
|||
|
/* For VSX, prefer the traditional registers. */
|
|||
|
- if (rclass == VSX_REGS)
|
|||
|
+ else if (rclass == VSX_REGS)
|
|||
|
{
|
|||
|
if (mode == DFmode)
|
|||
|
- return FLOAT_REGS;
|
|||
|
+ ret = FLOAT_REGS;
|
|||
|
|
|||
|
if (ALTIVEC_VECTOR_MODE (mode))
|
|||
|
- return ALTIVEC_REGS;
|
|||
|
+ ret = ALTIVEC_REGS;
|
|||
|
+ }
|
|||
|
+ else
|
|||
|
+ ret = rclass;
|
|||
|
+
|
|||
|
+ if (TARGET_DEBUG_ADDR)
|
|||
|
+ {
|
|||
|
+ fprintf (stderr,
|
|||
|
+ "rs6000_preferred_reload_class, return %s, rclass = %s, x:\n",
|
|||
|
+ reg_class_names[ret], reg_class_names[rclass]);
|
|||
|
+ debug_rtx (x);
|
|||
|
+ fprintf (stderr, "\n");
|
|||
|
}
|
|||
|
|
|||
|
- return rclass;
|
|||
|
+ return ret;
|
|||
|
}
|
|||
|
|
|||
|
/* If we are copying between FP or AltiVec registers and anything else, we need
|
|||
|
@@ -12251,31 +12412,46 @@ rs6000_secondary_memory_needed (enum reg
|
|||
|
enum reg_class class2,
|
|||
|
enum machine_mode mode)
|
|||
|
{
|
|||
|
+ bool ret;
|
|||
|
+ bool vsx1;
|
|||
|
+ bool vsx2;
|
|||
|
+
|
|||
|
if (class1 == class2)
|
|||
|
- return false;
|
|||
|
+ ret = false;
|
|||
|
|
|||
|
- if (TARGET_VSX && VSX_MOVE_MODE (mode) && VSX_REG_CLASS_P (class1)
|
|||
|
- && VSX_REG_CLASS_P (class2))
|
|||
|
- return false;
|
|||
|
+ else if (TARGET_VSX && VECTOR_MEM_VSX_P (mode)
|
|||
|
+ && ((vsx1 = VSX_REG_CLASS_P (class1))
|
|||
|
+ || (vsx2 = VSX_REG_CLASS_P (class2))))
|
|||
|
+ ret = (vsx1 != vsx2);
|
|||
|
+
|
|||
|
+ else if (class1 == FLOAT_REGS
|
|||
|
+ && (!TARGET_MFPGPR || !TARGET_POWERPC64
|
|||
|
+ || ((mode != DFmode)
|
|||
|
+ && (mode != DDmode)
|
|||
|
+ && (mode != DImode))))
|
|||
|
+ ret = true;
|
|||
|
+
|
|||
|
+ else if (class2 == FLOAT_REGS
|
|||
|
+ && (!TARGET_MFPGPR || !TARGET_POWERPC64
|
|||
|
+ || ((mode != DFmode)
|
|||
|
+ && (mode != DDmode)
|
|||
|
+ && (mode != DImode))))
|
|||
|
+ ret = true;
|
|||
|
|
|||
|
- if (class1 == FLOAT_REGS
|
|||
|
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
|
|||
|
- || ((mode != DFmode)
|
|||
|
- && (mode != DDmode)
|
|||
|
- && (mode != DImode))))
|
|||
|
- return true;
|
|||
|
+ else if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
|
|||
|
+ ret = true;
|
|||
|
|
|||
|
- if (class2 == FLOAT_REGS
|
|||
|
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
|
|||
|
- || ((mode != DFmode)
|
|||
|
- && (mode != DDmode)
|
|||
|
- && (mode != DImode))))
|
|||
|
- return true;
|
|||
|
+ else
|
|||
|
+ ret = false;
|
|||
|
|
|||
|
- if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
|
|||
|
- return true;
|
|||
|
+ if (TARGET_DEBUG_ADDR)
|
|||
|
+ fprintf (stderr,
|
|||
|
+ "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
|
|||
|
+ "class2 = %s, mode = %s\n",
|
|||
|
+ ret ? "true" : "false", reg_class_names[class1],
|
|||
|
+ reg_class_names[class2], GET_MODE_NAME (mode));
|
|||
|
|
|||
|
- return false;
|
|||
|
+ return ret;
|
|||
|
}
|
|||
|
|
|||
|
/* Return the register class of a scratch register needed to copy IN into
|
|||
|
@@ -12287,6 +12463,7 @@ rs6000_secondary_reload_class (enum reg_
|
|||
|
enum machine_mode mode,
|
|||
|
rtx in)
|
|||
|
{
|
|||
|
+ enum reg_class ret = NO_REGS;
|
|||
|
int regno;
|
|||
|
|
|||
|
if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
|
|||
|
@@ -12307,58 +12484,75 @@ rs6000_secondary_reload_class (enum reg_
|
|||
|
|| GET_CODE (in) == HIGH
|
|||
|
|| GET_CODE (in) == LABEL_REF
|
|||
|
|| GET_CODE (in) == CONST))
|
|||
|
- return BASE_REGS;
|
|||
|
+ ret = BASE_REGS;
|
|||
|
}
|
|||
|
|
|||
|
- if (GET_CODE (in) == REG)
|
|||
|
+ if (ret == NO_REGS)
|
|||
|
{
|
|||
|
- regno = REGNO (in);
|
|||
|
- if (regno >= FIRST_PSEUDO_REGISTER)
|
|||
|
+ if (GET_CODE (in) == REG)
|
|||
|
+ {
|
|||
|
+ regno = REGNO (in);
|
|||
|
+ if (regno >= FIRST_PSEUDO_REGISTER)
|
|||
|
+ {
|
|||
|
+ regno = true_regnum (in);
|
|||
|
+ if (regno >= FIRST_PSEUDO_REGISTER)
|
|||
|
+ regno = -1;
|
|||
|
+ }
|
|||
|
+ }
|
|||
|
+ else if (GET_CODE (in) == SUBREG)
|
|||
|
{
|
|||
|
regno = true_regnum (in);
|
|||
|
if (regno >= FIRST_PSEUDO_REGISTER)
|
|||
|
regno = -1;
|
|||
|
}
|
|||
|
- }
|
|||
|
- else if (GET_CODE (in) == SUBREG)
|
|||
|
- {
|
|||
|
- regno = true_regnum (in);
|
|||
|
- if (regno >= FIRST_PSEUDO_REGISTER)
|
|||
|
+ else
|
|||
|
regno = -1;
|
|||
|
- }
|
|||
|
- else
|
|||
|
- regno = -1;
|
|||
|
|
|||
|
- /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
|
|||
|
- into anything. */
|
|||
|
- if (rclass == GENERAL_REGS || rclass == BASE_REGS
|
|||
|
- || (regno >= 0 && INT_REGNO_P (regno)))
|
|||
|
- return NO_REGS;
|
|||
|
-
|
|||
|
- /* Constants, memory, and FP registers can go into FP registers. */
|
|||
|
- if ((regno == -1 || FP_REGNO_P (regno))
|
|||
|
- && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
|
|||
|
- return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
|
|||
|
-
|
|||
|
- /* Memory, and FP/altivec registers can go into fp/altivec registers under
|
|||
|
- VSX. */
|
|||
|
- if (TARGET_VSX
|
|||
|
- && (regno == -1 || VSX_REGNO_P (regno))
|
|||
|
- && VSX_REG_CLASS_P (rclass))
|
|||
|
- return NO_REGS;
|
|||
|
+ /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
|
|||
|
+ into anything. */
|
|||
|
+ if (rclass == GENERAL_REGS || rclass == BASE_REGS
|
|||
|
+ || (regno >= 0 && INT_REGNO_P (regno)))
|
|||
|
+ ret = NO_REGS;
|
|||
|
+
|
|||
|
+ /* Constants, memory, and FP registers can go into FP registers. */
|
|||
|
+ else if ((regno == -1 || FP_REGNO_P (regno))
|
|||
|
+ && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
|
|||
|
+ ret = (mode != SDmode) ? NO_REGS : GENERAL_REGS;
|
|||
|
+
|
|||
|
+ /* Memory, and FP/altivec registers can go into fp/altivec registers under
|
|||
|
+ VSX. */
|
|||
|
+ else if (TARGET_VSX
|
|||
|
+ && (regno == -1 || VSX_REGNO_P (regno))
|
|||
|
+ && VSX_REG_CLASS_P (rclass))
|
|||
|
+ ret = NO_REGS;
|
|||
|
+
|
|||
|
+ /* Memory, and AltiVec registers can go into AltiVec registers. */
|
|||
|
+ else if ((regno == -1 || ALTIVEC_REGNO_P (regno))
|
|||
|
+ && rclass == ALTIVEC_REGS)
|
|||
|
+ ret = NO_REGS;
|
|||
|
+
|
|||
|
+ /* We can copy among the CR registers. */
|
|||
|
+ else if ((rclass == CR_REGS || rclass == CR0_REGS)
|
|||
|
+ && regno >= 0 && CR_REGNO_P (regno))
|
|||
|
+ ret = NO_REGS;
|
|||
|
+
|
|||
|
+ /* Otherwise, we need GENERAL_REGS. */
|
|||
|
+ else
|
|||
|
+ ret = GENERAL_REGS;
|
|||
|
+ }
|
|||
|
|
|||
|
- /* Memory, and AltiVec registers can go into AltiVec registers. */
|
|||
|
- if ((regno == -1 || ALTIVEC_REGNO_P (regno))
|
|||
|
- && rclass == ALTIVEC_REGS)
|
|||
|
- return NO_REGS;
|
|||
|
-
|
|||
|
- /* We can copy among the CR registers. */
|
|||
|
- if ((rclass == CR_REGS || rclass == CR0_REGS)
|
|||
|
- && regno >= 0 && CR_REGNO_P (regno))
|
|||
|
- return NO_REGS;
|
|||
|
+ if (TARGET_DEBUG_ADDR)
|
|||
|
+ {
|
|||
|
+ fprintf (stderr,
|
|||
|
+ "rs6000_secondary_reload_class, return %s, rclass = %s, "
|
|||
|
+ "mode = %s, input rtx:\n",
|
|||
|
+ reg_class_names[ret], reg_class_names[rclass],
|
|||
|
+ GET_MODE_NAME (mode));
|
|||
|
+ debug_rtx (in);
|
|||
|
+ fprintf (stderr, "\n");
|
|||
|
+ }
|
|||
|
|
|||
|
- /* Otherwise, we need GENERAL_REGS. */
|
|||
|
- return GENERAL_REGS;
|
|||
|
+ return ret;
|
|||
|
}
|
|||
|
|
|||
|
/* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
|
|||
|
@@ -12368,19 +12562,29 @@ rs6000_cannot_change_mode_class (enum ma
|
|||
|
enum machine_mode to,
|
|||
|
enum reg_class rclass)
|
|||
|
{
|
|||
|
- return (GET_MODE_SIZE (from) != GET_MODE_SIZE (to)
|
|||
|
- ? ((GET_MODE_SIZE (from) < 8 || GET_MODE_SIZE (to) < 8
|
|||
|
- || TARGET_IEEEQUAD)
|
|||
|
- && reg_classes_intersect_p (FLOAT_REGS, rclass))
|
|||
|
- : (((TARGET_E500_DOUBLE
|
|||
|
- && ((((to) == DFmode) + ((from) == DFmode)) == 1
|
|||
|
- || (((to) == TFmode) + ((from) == TFmode)) == 1
|
|||
|
- || (((to) == DDmode) + ((from) == DDmode)) == 1
|
|||
|
- || (((to) == TDmode) + ((from) == TDmode)) == 1
|
|||
|
- || (((to) == DImode) + ((from) == DImode)) == 1))
|
|||
|
- || (TARGET_SPE
|
|||
|
- && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1))
|
|||
|
- && reg_classes_intersect_p (GENERAL_REGS, rclass)));
|
|||
|
+ bool ret = (GET_MODE_SIZE (from) != GET_MODE_SIZE (to)
|
|||
|
+ ? ((GET_MODE_SIZE (from) < 8 || GET_MODE_SIZE (to) < 8
|
|||
|
+ || TARGET_IEEEQUAD)
|
|||
|
+ && reg_classes_intersect_p (FLOAT_REGS, rclass))
|
|||
|
+ : (((TARGET_E500_DOUBLE
|
|||
|
+ && ((((to) == DFmode) + ((from) == DFmode)) == 1
|
|||
|
+ || (((to) == TFmode) + ((from) == TFmode)) == 1
|
|||
|
+ || (((to) == DDmode) + ((from) == DDmode)) == 1
|
|||
|
+ || (((to) == TDmode) + ((from) == TDmode)) == 1
|
|||
|
+ || (((to) == DImode) + ((from) == DImode)) == 1))
|
|||
|
+ || (TARGET_SPE
|
|||
|
+ && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1))
|
|||
|
+ && reg_classes_intersect_p (GENERAL_REGS, rclass)));
|
|||
|
+
|
|||
|
+ if (TARGET_DEBUG_ADDR)
|
|||
|
+ fprintf (stderr,
|
|||
|
+ "rs6000_cannot_change_mode_class, return %s, from = %s, "
|
|||
|
+ "to = %s, rclass = %s\n",
|
|||
|
+ ret ? "true" : "false",
|
|||
|
+ GET_MODE_NAME (from), GET_MODE_NAME (to),
|
|||
|
+ reg_class_names[rclass]);
|
|||
|
+
|
|||
|
+ return ret;
|
|||
|
}
|
|||
|
|
|||
|
/* Given a comparison operation, return the bit number in CCR to test. We
|
|||
|
--- gcc/config/rs6000/vsx.md (revision 144758)
|
|||
|
+++ gcc/config/rs6000/vsx.md (revision 144843)
|
|||
|
@@ -68,7 +68,13 @@ (define_mode_attr VSbit [(SI "32")
|
|||
|
(DI "64")])
|
|||
|
|
|||
|
(define_constants
|
|||
|
- [(UNSPEC_VSX_CONCAT_V2DF 500)])
|
|||
|
+ [(UNSPEC_VSX_CONCAT_V2DF 500)
|
|||
|
+ (UNSPEC_VSX_XVCVDPSP 501)
|
|||
|
+ (UNSPEC_VSX_XVCVDPSXWS 502)
|
|||
|
+ (UNSPEC_VSX_XVCVDPUXWS 503)
|
|||
|
+ (UNSPEC_VSX_XVCVSPDP 504)
|
|||
|
+ (UNSPEC_VSX_XVCVSXWDP 505)
|
|||
|
+ (UNSPEC_VSX_XVCVUXWDP 506)])
|
|||
|
|
|||
|
;; VSX moves
|
|||
|
(define_insn "*vsx_mov<mode>"
|
|||
|
@@ -245,7 +251,7 @@ (define_insn "*vsx_abs<mode>2"
|
|||
|
"xvabs<VSs> %x0,%x1"
|
|||
|
[(set_attr "type" "vecfloat")])
|
|||
|
|
|||
|
-(define_insn "*vsx_nabs<mode>2"
|
|||
|
+(define_insn "vsx_nabs<mode>2"
|
|||
|
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>")
|
|||
|
(neg:VSX_F
|
|||
|
(abs:VSX_F
|
|||
|
@@ -417,14 +423,14 @@ (define_insn "*vsx_ftrunc<mode>2"
|
|||
|
"xvr<VSs>piz %x0,%x1"
|
|||
|
[(set_attr "type" "vecperm")])
|
|||
|
|
|||
|
-(define_insn "*vsx_float<VSi><mode>2"
|
|||
|
+(define_insn "vsx_float<VSi><mode>2"
|
|||
|
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>")
|
|||
|
(float:VSX_F (match_operand:<VSI> 1 "vsx_register_operand" "<VSr>")))]
|
|||
|
"VECTOR_UNIT_VSX_P (<MODE>mode)"
|
|||
|
"xvcvsx<VSc><VSs> %x0,%x1"
|
|||
|
[(set_attr "type" "vecfloat")])
|
|||
|
|
|||
|
-(define_insn "*vsx_floatuns<VSi><mode>2"
|
|||
|
+(define_insn "vsx_floatuns<VSi><mode>2"
|
|||
|
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>")
|
|||
|
(unsigned_float:VSX_F (match_operand:<VSI> 1 "vsx_register_operand" "<VSr>")))]
|
|||
|
"VECTOR_UNIT_VSX_P (<MODE>mode)"
|
|||
|
@@ -446,6 +452,62 @@ (define_insn "*vsx_fixuns_trunc<mode><VS
|
|||
|
[(set_attr "type" "vecfloat")])
|
|||
|
|
|||
|
|
|||
|
+;; VSX convert to/from double vector
|
|||
|
+
|
|||
|
+;; Convert from 64-bit to 32-bit types
|
|||
|
+;; Note, favor the Altivec registers since the usual use of these instructions
|
|||
|
+;; is in vector converts and we need to use the Altivec vperm instruction.
|
|||
|
+
|
|||
|
+(define_insn "vsx_xvcvdpsp"
|
|||
|
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=v,?wa")
|
|||
|
+ (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
|
|||
|
+ UNSPEC_VSX_XVCVDPSP))]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
+ "xvcvdpsp %x0,%x1"
|
|||
|
+ [(set_attr "type" "vecfloat")])
|
|||
|
+
|
|||
|
+(define_insn "vsx_xvcvdpsxws"
|
|||
|
+ [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
|
|||
|
+ (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
|
|||
|
+ UNSPEC_VSX_XVCVDPSXWS))]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
+ "xvcvdpsxws %x0,%x1"
|
|||
|
+ [(set_attr "type" "vecfloat")])
|
|||
|
+
|
|||
|
+(define_insn "vsx_xvcvdpuxws"
|
|||
|
+ [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
|
|||
|
+ (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
|
|||
|
+ UNSPEC_VSX_XVCVDPUXWS))]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
+ "xvcvdpuxws %x0,%x1"
|
|||
|
+ [(set_attr "type" "vecfloat")])
|
|||
|
+
|
|||
|
+;; Convert from 32-bit to 64-bit types
|
|||
|
+(define_insn "vsx_xvcvspdp"
|
|||
|
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
|
|||
|
+ (unspec:V2DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")]
|
|||
|
+ UNSPEC_VSX_XVCVSPDP))]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
+ "xvcvspdp %x0,%x1"
|
|||
|
+ [(set_attr "type" "vecfloat")])
|
|||
|
+
|
|||
|
+(define_insn "vsx_xvcvsxwdp"
|
|||
|
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
|
|||
|
+ (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
|
|||
|
+ UNSPEC_VSX_XVCVSXWDP))]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
+ "xvcvsxwdp %x0,%x1"
|
|||
|
+ [(set_attr "type" "vecfloat")])
|
|||
|
+
|
|||
|
+(define_insn "vsx_xvcvuxwdp"
|
|||
|
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
|
|||
|
+ (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
|
|||
|
+ UNSPEC_VSX_XVCVUXWDP))]
|
|||
|
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
+ "xvcvuxwdp %x0,%x1"
|
|||
|
+ [(set_attr "type" "vecfloat")])
|
|||
|
+
|
|||
|
+
|
|||
|
;; VSX scalar double precision floating point operations
|
|||
|
(define_insn"*vsx_adddf3"
|
|||
|
[(set (match_operand:DF 0 "vsx_register_operand" "=ws")
|
|||
|
@@ -753,8 +815,8 @@ (define_insn "*vsx_andc<mode>3"
|
|||
|
(define_insn "vsx_concat_v2df"
|
|||
|
[(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
|
|||
|
(unspec:V2DF
|
|||
|
- [(match_operand:DF 1 "vsx_register_operand" "f,wa")
|
|||
|
- (match_operand:DF 2 "vsx_register_operand" "f,wa")]
|
|||
|
+ [(match_operand:DF 1 "vsx_register_operand" "ws,wa")
|
|||
|
+ (match_operand:DF 2 "vsx_register_operand" "ws,wa")]
|
|||
|
UNSPEC_VSX_CONCAT_V2DF))]
|
|||
|
"VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
"xxpermdi %x0,%x1,%x2,0"
|
|||
|
@@ -762,32 +824,37 @@ (define_insn "vsx_concat_v2df"
|
|||
|
|
|||
|
;; Set a double into one element
|
|||
|
(define_insn "vsx_set_v2df"
|
|||
|
- [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd")
|
|||
|
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
|
|||
|
(vec_merge:V2DF
|
|||
|
- (match_operand:V2DF 1 "vsx_register_operand" "wd")
|
|||
|
- (vec_duplicate:V2DF (match_operand:DF 2 "vsx_register_operand" "ws"))
|
|||
|
- (match_operand:QI 3 "u5bit_cint_operand" "i")))]
|
|||
|
+ (match_operand:V2DF 1 "vsx_register_operand" "wd,wa")
|
|||
|
+ (vec_duplicate:V2DF (match_operand:DF 2 "vsx_register_operand" "ws,f"))
|
|||
|
+ (match_operand:QI 3 "u5bit_cint_operand" "i,i")))]
|
|||
|
"VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
{
|
|||
|
- operands[3] = GEN_INT (INTVAL (operands[3]) & 1);
|
|||
|
- return \"xxpermdi %x0,%x1,%x2,%3\";
|
|||
|
+ if (INTVAL (operands[3]) == 0)
|
|||
|
+ return \"xxpermdi %x0,%x1,%x2,1\";
|
|||
|
+ else if (INTVAL (operands[3]) == 1)
|
|||
|
+ return \"xxpermdi %x0,%x2,%x1,0\";
|
|||
|
+ else
|
|||
|
+ gcc_unreachable ();
|
|||
|
}
|
|||
|
[(set_attr "type" "vecperm")])
|
|||
|
|
|||
|
;; Extract a DF element from V2DF
|
|||
|
(define_insn "vsx_extract_v2df"
|
|||
|
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
|
|||
|
- (vec_select:DF (match_operand:V2DF 1 "vsx_register_operand" "wd")
|
|||
|
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,f,?wa")
|
|||
|
+ (vec_select:DF (match_operand:V2DF 1 "vsx_register_operand" "wd,wd,wa")
|
|||
|
(parallel
|
|||
|
- [(match_operand:QI 2 "u5bit_cint_operand" "i")])))]
|
|||
|
+ [(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
|
|||
|
"VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
{
|
|||
|
- operands[3] = GEN_INT (INTVAL (operands[2]) & 1);
|
|||
|
+ gcc_assert (UINTVAL (operands[2]) <= 1);
|
|||
|
+ operands[3] = GEN_INT (INTVAL (operands[2]) << 1);
|
|||
|
return \"xxpermdi %x0,%x1,%x1,%3\";
|
|||
|
}
|
|||
|
[(set_attr "type" "vecperm")])
|
|||
|
|
|||
|
-;; General V2DF permute
|
|||
|
+;; General V2DF permute, extract_{high,low,even,odd}
|
|||
|
(define_insn "vsx_xxpermdi"
|
|||
|
[(set (match_operand:V2DF 0 "vsx_register_operand" "=wd")
|
|||
|
(vec_concat:V2DF
|
|||
|
@@ -799,6 +866,7 @@ (define_insn "vsx_xxpermdi"
|
|||
|
[(match_operand:QI 4 "u5bit_cint_operand" "i")]))))]
|
|||
|
"VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
{
|
|||
|
+ gcc_assert ((UINTVAL (operands[2]) <= 1) && (UINTVAL (operands[4]) <= 1));
|
|||
|
operands[5] = GEN_INT (((INTVAL (operands[2]) & 1) << 1)
|
|||
|
| (INTVAL (operands[4]) & 1));
|
|||
|
return \"xxpermdi %x0,%x1,%x3,%5\";
|
|||
|
@@ -807,14 +875,15 @@ (define_insn "vsx_xxpermdi"
|
|||
|
|
|||
|
;; V2DF splat
|
|||
|
(define_insn "vsx_splatv2df"
|
|||
|
- [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd")
|
|||
|
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,wd")
|
|||
|
(vec_duplicate:V2DF
|
|||
|
- (match_operand:DF 1 "input_operand" "ws,Z")))]
|
|||
|
+ (match_operand:DF 1 "input_operand" "ws,f,Z")))]
|
|||
|
"VECTOR_UNIT_VSX_P (V2DFmode)"
|
|||
|
"@
|
|||
|
xxpermdi %x0,%x1,%x1,0
|
|||
|
+ xxpermdi %x0,%x1,%x1,0
|
|||
|
lxvdsx %x0,%y1"
|
|||
|
- [(set_attr "type" "vecperm,vecload")])
|
|||
|
+ [(set_attr "type" "vecperm,vecperm,vecload")])
|
|||
|
|
|||
|
;; V4SF splat
|
|||
|
(define_insn "*vsx_xxspltw"
|
|||
|
@@ -828,14 +897,14 @@ (define_insn "*vsx_xxspltw"
|
|||
|
[(set_attr "type" "vecperm")])
|
|||
|
|
|||
|
;; V4SF interleave
|
|||
|
-(define_insn "*vsx_xxmrghw"
|
|||
|
- [(set (match_operand:V4SF 0 "register_operand" "=v")
|
|||
|
- (vec_merge:V4SF (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v")
|
|||
|
+(define_insn "vsx_xxmrghw"
|
|||
|
+ [(set (match_operand:V4SF 0 "register_operand" "=wf")
|
|||
|
+ (vec_merge:V4SF (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "wf")
|
|||
|
(parallel [(const_int 0)
|
|||
|
(const_int 2)
|
|||
|
(const_int 1)
|
|||
|
(const_int 3)]))
|
|||
|
- (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v")
|
|||
|
+ (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "wf")
|
|||
|
(parallel [(const_int 2)
|
|||
|
(const_int 0)
|
|||
|
(const_int 3)
|
|||
|
@@ -845,15 +914,15 @@ (define_insn "*vsx_xxmrghw"
|
|||
|
"xxmrghw %x0,%x1,%x2"
|
|||
|
[(set_attr "type" "vecperm")])
|
|||
|
|
|||
|
-(define_insn "*vsx_xxmrglw"
|
|||
|
- [(set (match_operand:V4SF 0 "register_operand" "=v")
|
|||
|
+(define_insn "vsx_xxmrglw"
|
|||
|
+ [(set (match_operand:V4SF 0 "register_operand" "=wf")
|
|||
|
(vec_merge:V4SF
|
|||
|
- (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v")
|
|||
|
+ (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "wf")
|
|||
|
(parallel [(const_int 2)
|
|||
|
(const_int 0)
|
|||
|
(const_int 3)
|
|||
|
(const_int 1)]))
|
|||
|
- (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v")
|
|||
|
+ (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "wf")
|
|||
|
(parallel [(const_int 0)
|
|||
|
(const_int 2)
|
|||
|
(const_int 1)
|
|||
|
@@ -862,3 +931,26 @@ (define_insn "*vsx_xxmrglw"
|
|||
|
"VECTOR_UNIT_VSX_P (V4SFmode)"
|
|||
|
"xxmrglw %x0,%x1,%x2"
|
|||
|
[(set_attr "type" "vecperm")])
|
|||
|
+
|
|||
|
+
|
|||
|
+;; Reload patterns for VSX loads/stores. We need a scratch register to convert
|
|||
|
+;; the stack temporary address from reg+offset to reg+reg addressing.
|
|||
|
+(define_expand "vsx_reload_<VSX_L:mode>_<P:ptrsize>_to_mem"
|
|||
|
+ [(parallel [(match_operand:VSX_L 0 "memory_operand" "")
|
|||
|
+ (match_operand:VSX_L 1 "register_operand" "=wa")
|
|||
|
+ (match_operand:P 2 "register_operand" "=&b")])]
|
|||
|
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
|
|||
|
+{
|
|||
|
+ rs6000_vector_secondary_reload (operands[0], operands[1], operands[2], true);
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
+
|
|||
|
+(define_expand "vsx_reload_<VSX_L:mode>_<P:ptrsize>_to_reg"
|
|||
|
+ [(parallel [(match_operand:VSX_L 0 "register_operand" "=wa")
|
|||
|
+ (match_operand:VSX_L 1 "memory_operand" "")
|
|||
|
+ (match_operand:P 2 "register_operand" "=&b")])]
|
|||
|
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
|
|||
|
+{
|
|||
|
+ rs6000_vector_secondary_reload (operands[0], operands[1], operands[2], false);
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
--- gcc/config/rs6000/rs6000.h (revision 144758)
|
|||
|
+++ gcc/config/rs6000/rs6000.h (revision 144843)
|
|||
|
@@ -3388,7 +3388,7 @@ enum rs6000_builtins
|
|||
|
VSX_BUILTIN_XXSPLTW,
|
|||
|
VSX_BUILTIN_XXSWAPD,
|
|||
|
|
|||
|
- /* Combine VSX/Altivec builtins. */
|
|||
|
+ /* Combined VSX/Altivec builtins. */
|
|||
|
VECTOR_BUILTIN_FLOAT_V4SI_V4SF,
|
|||
|
VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF,
|
|||
|
VECTOR_BUILTIN_FIX_V4SF_V4SI,
|
|||
|
--- gcc/config/rs6000/altivec.md (revision 144758)
|
|||
|
+++ gcc/config/rs6000/altivec.md (revision 144843)
|
|||
|
@@ -2685,3 +2685,27 @@ (define_expand "vec_unpacku_float_lo_v8h
|
|||
|
emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
|
|||
|
DONE;
|
|||
|
}")
|
|||
|
+
|
|||
|
+
|
|||
|
+;; Reload patterns for Altivec loads/stores. We need a scratch register to
|
|||
|
+;; convert the stack temporary address from reg+offset to reg+reg addressing.
|
|||
|
+
|
|||
|
+(define_expand "altivec_reload_<V:mode>_<P:ptrsize>_to_mem"
|
|||
|
+ [(parallel [(match_operand:V 0 "memory_operand" "")
|
|||
|
+ (match_operand:V 1 "register_operand" "=v")
|
|||
|
+ (match_operand:P 2 "register_operand" "=&b")])]
|
|||
|
+ "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
|
|||
|
+{
|
|||
|
+ rs6000_vector_secondary_reload (operands[0], operands[1], operands[2], true);
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
+
|
|||
|
+(define_expand "altivec_reload_<V:mode>_<P:ptrsize>_to_reg"
|
|||
|
+ [(parallel [(match_operand:V 0 "register_operand" "=v")
|
|||
|
+ (match_operand:V 1 "memory_operand" "")
|
|||
|
+ (match_operand:P 2 "register_operand" "=&b")])]
|
|||
|
+ "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
|
|||
|
+{
|
|||
|
+ rs6000_vector_secondary_reload (operands[0], operands[1], operands[2], false);
|
|||
|
+ DONE;
|
|||
|
+})
|
|||
|
--- gcc/config/rs6000/rs6000.md (revision 144758)
|
|||
|
+++ gcc/config/rs6000/rs6000.md (revision 144843)
|
|||
|
@@ -222,6 +222,10 @@ (define_mode_attr dbits [(QI "56") (HI "
|
|||
|
;; ISEL/ISEL64 target selection
|
|||
|
(define_mode_attr sel [(SI "") (DI "64")])
|
|||
|
|
|||
|
+;; Suffix for reload patterns
|
|||
|
+(define_mode_attr ptrsize [(SI "32bit")
|
|||
|
+ (DI "64bit")])
|
|||
|
+
|
|||
|
|
|||
|
;; Start with fixed-point load and store insns. Here we put only the more
|
|||
|
;; complex forms. Basic data transfer is done later.
|
|||
|
--- gcc/testsuite/gcc.target/powerpc/vsx-builtin-2.c (revision 0)
|
|||
|
+++ gcc/testsuite/gcc.target/powerpc/vsx-builtin-2.c (revision 144843)
|
|||
|
@@ -0,0 +1,29 @@
|
|||
|
+/* { dg-do compile { target { powerpc*-*-* } } } */
|
|||
|
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
|
|||
|
+/* { dg-require-effective-target powerpc_vsx_ok } */
|
|||
|
+/* { dg-options "-O2 -mcpu=power7" } */
|
|||
|
+/* { dg-final { scan-assembler "xvaddsp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvsubsp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvmulsp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvdivsp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvmaxsp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvminsp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvsqrtsp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvabssp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvnabssp" } } */
|
|||
|
+
|
|||
|
+void use_builtins (__vector float *p, __vector float *q, __vector float *r)
|
|||
|
+{
|
|||
|
+ __vector float tmp1 = *q;
|
|||
|
+ __vector float tmp2 = *r;
|
|||
|
+
|
|||
|
+ *p++ = __builtin_vsx_xvaddsp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvsubsp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvmulsp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvdivsp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvmaxsp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvminsp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvabssp (tmp1);
|
|||
|
+ *p++ = __builtin_vsx_xvnabssp (tmp1);
|
|||
|
+ *p = __builtin_vsx_xvsqrtsp (tmp1);
|
|||
|
+}
|
|||
|
--- gcc/testsuite/gcc.target/powerpc/vsx-builtin-1.c (revision 0)
|
|||
|
+++ gcc/testsuite/gcc.target/powerpc/vsx-builtin-1.c (revision 144843)
|
|||
|
@@ -0,0 +1,29 @@
|
|||
|
+/* { dg-do compile { target { powerpc*-*-* } } } */
|
|||
|
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
|
|||
|
+/* { dg-require-effective-target powerpc_vsx_ok } */
|
|||
|
+/* { dg-options "-O2 -mcpu=power7" } */
|
|||
|
+/* { dg-final { scan-assembler "xvadddp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvsubdp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvmuldp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvdivdp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvmaxdp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvmindp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvsqrtdp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvabsdp" } } */
|
|||
|
+/* { dg-final { scan-assembler "xvnabsdp" } } */
|
|||
|
+
|
|||
|
+void use_builtins (__vector double *p, __vector double *q, __vector double *r)
|
|||
|
+{
|
|||
|
+ __vector double tmp1 = *q;
|
|||
|
+ __vector double tmp2 = *r;
|
|||
|
+
|
|||
|
+ *p++ = __builtin_vsx_xvadddp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvsubdp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvmuldp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvdivdp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvmaxdp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvmindp (tmp1, tmp2);
|
|||
|
+ *p++ = __builtin_vsx_xvabsdp (tmp1);
|
|||
|
+ *p++ = __builtin_vsx_xvnabsdp (tmp1);
|
|||
|
+ *p = __builtin_vsx_xvsqrtdp (tmp1);
|
|||
|
+}
|
|||
|
--- gcc/testsuite/gcc.target/powerpc/pr39457.c (revision 0)
|
|||
|
+++ gcc/testsuite/gcc.target/powerpc/pr39457.c (revision 144857)
|
|||
|
@@ -0,0 +1,56 @@
|
|||
|
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
|
|||
|
+/* { dg-options "-m64 -O2 -mminimal-toc" } */
|
|||
|
+
|
|||
|
+/* PR 39457 -- fix breakage because the compiler ran out of registers and
|
|||
|
+ wanted to stash a floating point value to the LR/CTR register. */
|
|||
|
+
|
|||
|
+/* -O2 -m64 -mminimal-toc */
|
|||
|
+typedef struct { void *s; } S;
|
|||
|
+typedef void (*T1) (void);
|
|||
|
+typedef void (*T2) (void *, void *, int, void *);
|
|||
|
+char *fn1 (const char *, ...);
|
|||
|
+void *fn2 (void);
|
|||
|
+int fn3 (char *, int);
|
|||
|
+int fn4 (const void *);
|
|||
|
+int fn5 (const void *);
|
|||
|
+long fn6 (void) __attribute__ ((__const__));
|
|||
|
+int fn7 (void *, void *, void *);
|
|||
|
+void *fn8 (void *, long);
|
|||
|
+void *fn9 (void *, long, const char *, ...);
|
|||
|
+void *fn10 (void *);
|
|||
|
+long fn11 (void) __attribute__ ((__const__));
|
|||
|
+long fn12 (void *, const char *, T1, T2, void *);
|
|||
|
+void *fn13 (void *);
|
|||
|
+long fn14 (void) __attribute__ ((__const__));
|
|||
|
+extern void *v1;
|
|||
|
+extern char *v2;
|
|||
|
+extern int v3;
|
|||
|
+
|
|||
|
+void
|
|||
|
+foo (void *x, char *z)
|
|||
|
+{
|
|||
|
+ void *i1, *i2;
|
|||
|
+ int y;
|
|||
|
+ if (v1)
|
|||
|
+ return;
|
|||
|
+ v1 = fn9 (fn10 (fn2 ()), fn6 (), "x", 0., "y", 0., 0);
|
|||
|
+ y = 520 - (520 - fn4 (x)) / 2;
|
|||
|
+ fn9 (fn8 (v1, fn6 ()), fn6 (), "wig", fn8 (v1, fn14 ()), "x", 18.0,
|
|||
|
+ "y", 16.0, "wid", 80.0, "hi", 500.0, 0);
|
|||
|
+ fn9 (fn10 (v1), fn6 (), "x1", 0., "y1", 0., "x2", 80.0, "y2",
|
|||
|
+ 500.0, "f", fn3 ("fff", 0x0D0DFA00), 0);
|
|||
|
+ fn13 (((S *) fn8 (v1, fn6 ()))->s);
|
|||
|
+ fn12 (fn8 (v1, fn11 ()), "ev", (T1) fn7, 0, fn8 (v1, fn6 ()));
|
|||
|
+ fn9 (fn8 (v1, fn6 ()), fn6 (), "wig",
|
|||
|
+ fn8 (v1, fn14 ()), "x", 111.0, "y", 14.0, "wid", 774.0, "hi",
|
|||
|
+ 500.0, 0);
|
|||
|
+ v1 = fn9 (fn10 (v1), fn6 (), "x1", 0., "y1", 0., "x2", 774.0, "y2",
|
|||
|
+ 500.0, "f", fn3 ("gc", 0x0D0DFA00), 0);
|
|||
|
+ fn1 (z, 0);
|
|||
|
+ i1 = fn9 (fn8 (v1, fn6 ()), fn6 (), "pixbuf", x, "x",
|
|||
|
+ 800 - fn5 (x) / 2, "y", y - fn4 (x), 0);
|
|||
|
+ fn12 (fn8 (i1, fn11 ()), "ev", (T1) fn7, 0, "/ok/");
|
|||
|
+ fn12 (fn8 (i1, fn11 ()), "ev", (T1) fn7, 0, 0);
|
|||
|
+ i2 = fn9 (fn8 (v1, fn6 ()), fn6 (), "txt", "OK", "fnt", v2, "x",
|
|||
|
+ 800, "y", y - fn4 (x) + 15, "ar", 0, "f", v3, 0);
|
|||
|
+}
|