287 lines
8.5 KiB
Diff
287 lines
8.5 KiB
Diff
|
2009-03-27 Jakub Jelinek <jakub@redhat.com>
|
||
|
|
||
|
PR rtl-optimization/39543
|
||
|
* fwprop.c (forward_propagate_asm): New function.
|
||
|
(forward_propagate_and_simplify): Propagate also into __asm, if it
|
||
|
doesn't increase the number of referenced registers.
|
||
|
|
||
|
* gcc.target/i386/pr39543-1.c: New test.
|
||
|
* gcc.target/i386/pr39543-2.c: New test.
|
||
|
* gcc.target/i386/pr39543-3.c: New test.
|
||
|
|
||
|
--- gcc/fwprop.c.jj 2009-03-27 07:55:33.000000000 +0100
|
||
|
+++ gcc/fwprop.c 2009-03-27 10:00:48.000000000 +0100
|
||
|
@@ -1,5 +1,5 @@
|
||
|
/* RTL-based forward propagation pass for GNU compiler.
|
||
|
- Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
||
|
+ Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
|
||
|
Contributed by Paolo Bonzini and Steven Bosscher.
|
||
|
|
||
|
This file is part of GCC.
|
||
|
@@ -852,6 +852,73 @@ forward_propagate_subreg (df_ref use, rt
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
+/* Try to replace USE with SRC (defined in DEF_INSN) in __asm. */
|
||
|
+
|
||
|
+static bool
|
||
|
+forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
|
||
|
+{
|
||
|
+ rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
|
||
|
+ int speed_p, i;
|
||
|
+ df_ref *use_vec;
|
||
|
+
|
||
|
+ gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
|
||
|
+
|
||
|
+ src = SET_SRC (def_set);
|
||
|
+ use_pat = PATTERN (use_insn);
|
||
|
+
|
||
|
+ /* In __asm don't replace if src might need more registers than
|
||
|
+ reg, as that could increase register pressure on the __asm. */
|
||
|
+ use_vec = DF_INSN_USES (def_insn);
|
||
|
+ if (use_vec[0] && use_vec[1])
|
||
|
+ return false;
|
||
|
+
|
||
|
+ speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
|
||
|
+ asm_operands = NULL_RTX;
|
||
|
+ switch (GET_CODE (use_pat))
|
||
|
+ {
|
||
|
+ case ASM_OPERANDS:
|
||
|
+ asm_operands = use_pat;
|
||
|
+ break;
|
||
|
+ case SET:
|
||
|
+ loc = &SET_DEST (use_pat);
|
||
|
+ new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
|
||
|
+ if (new_rtx)
|
||
|
+ validate_unshare_change (use_insn, loc, new_rtx, true);
|
||
|
+ asm_operands = SET_SRC (use_pat);
|
||
|
+ break;
|
||
|
+ case PARALLEL:
|
||
|
+ for (i = 0; i < XVECLEN (use_pat, 0); i++)
|
||
|
+ if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
|
||
|
+ {
|
||
|
+ loc = &SET_DEST (XVECEXP (use_pat, 0, i));
|
||
|
+ new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
|
||
|
+ if (new_rtx)
|
||
|
+ validate_unshare_change (use_insn, loc, new_rtx, true);
|
||
|
+ asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
|
||
|
+ }
|
||
|
+ else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
|
||
|
+ asm_operands = XVECEXP (use_pat, 0, i);
|
||
|
+ break;
|
||
|
+ default:
|
||
|
+ gcc_unreachable ();
|
||
|
+ }
|
||
|
+
|
||
|
+ gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
|
||
|
+ for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
|
||
|
+ {
|
||
|
+ loc = &ASM_OPERANDS_INPUT (asm_operands, i);
|
||
|
+ new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
|
||
|
+ if (new_rtx)
|
||
|
+ validate_unshare_change (use_insn, loc, new_rtx, true);
|
||
|
+ }
|
||
|
+
|
||
|
+ if (num_changes_pending () == 0 || !apply_change_group ())
|
||
|
+ return false;
|
||
|
+
|
||
|
+ num_changes++;
|
||
|
+ return true;
|
||
|
+}
|
||
|
+
|
||
|
/* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
|
||
|
result. */
|
||
|
|
||
|
@@ -863,12 +930,16 @@ forward_propagate_and_simplify (df_ref u
|
||
|
rtx src, reg, new_rtx, *loc;
|
||
|
bool set_reg_equal;
|
||
|
enum machine_mode mode;
|
||
|
+ int asm_use = -1;
|
||
|
+
|
||
|
+ if (INSN_CODE (use_insn) < 0)
|
||
|
+ asm_use = asm_noperands (PATTERN (use_insn));
|
||
|
|
||
|
- if (!use_set)
|
||
|
+ if (!use_set && asm_use < 0)
|
||
|
return false;
|
||
|
|
||
|
/* Do not propagate into PC, CC0, etc. */
|
||
|
- if (GET_MODE (SET_DEST (use_set)) == VOIDmode)
|
||
|
+ if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
|
||
|
return false;
|
||
|
|
||
|
/* If def and use are subreg, check if they match. */
|
||
|
@@ -900,7 +971,7 @@ forward_propagate_and_simplify (df_ref u
|
||
|
if (MEM_P (src) && MEM_READONLY_P (src))
|
||
|
{
|
||
|
rtx x = avoid_constant_pool_reference (src);
|
||
|
- if (x != src)
|
||
|
+ if (x != src && use_set)
|
||
|
{
|
||
|
rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
|
||
|
rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
|
||
|
@@ -911,6 +982,9 @@ forward_propagate_and_simplify (df_ref u
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
+ if (asm_use >= 0)
|
||
|
+ return forward_propagate_asm (use, def_insn, def_set, reg);
|
||
|
+
|
||
|
/* Else try simplifying. */
|
||
|
|
||
|
if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
|
||
|
--- gcc/testsuite/gcc.target/i386/pr39543-1.c.jj 2009-03-25 16:40:18.000000000 +0100
|
||
|
+++ gcc/testsuite/gcc.target/i386/pr39543-1.c 2009-03-25 16:40:50.000000000 +0100
|
||
|
@@ -0,0 +1,52 @@
|
||
|
+/* PR rtl-optimization/39543 */
|
||
|
+/* { dg-do compile } */
|
||
|
+/* { dg-options "-O3 -fomit-frame-pointer" } */
|
||
|
+
|
||
|
+float __attribute__ ((aligned (16))) s0[128];
|
||
|
+const float s1 = 0.707;
|
||
|
+float s2[8] __attribute__ ((aligned (16)));
|
||
|
+float s3[8] __attribute__ ((aligned (16)));
|
||
|
+float s4[16] __attribute__ ((aligned (16)));
|
||
|
+float s5[16] __attribute__ ((aligned (16)));
|
||
|
+
|
||
|
+void
|
||
|
+foo (int k, float *x, float *y, const float *d, const float *z)
|
||
|
+{
|
||
|
+ float *a, *b, *c, *e;
|
||
|
+
|
||
|
+ a = x + 2 * k;
|
||
|
+ b = a + 2 * k;
|
||
|
+ c = b + 2 * k;
|
||
|
+ e = y + 2 * k;
|
||
|
+ __asm__ volatile (""
|
||
|
+ : "=m" (x[0]), "=m" (b[0]), "=m" (a[0]), "=m" (c[0])
|
||
|
+ : "m" (y[0]), "m" (y[k * 2]), "m" (x[0]), "m" (a[0])
|
||
|
+ : "memory");
|
||
|
+ for (;;)
|
||
|
+ {
|
||
|
+ __asm__ volatile (""
|
||
|
+ :
|
||
|
+ : "m" (y[2]), "m" (d[2]), "m" (e[2]), "m" (z[2])
|
||
|
+ : "memory");
|
||
|
+ if (!--k)
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ __asm__ volatile (""
|
||
|
+ : "=m" (x[2]), "=m" (x[10]), "=m" (x[6]), "=m" (x[14])
|
||
|
+ : "m" (y[2]), "m" (y[6]), "m" (x[2]), "m" (x[6]),
|
||
|
+ "m" (y[18]), "m" (s1)
|
||
|
+ : "memory");
|
||
|
+}
|
||
|
+
|
||
|
+void
|
||
|
+bar (float *a)
|
||
|
+{
|
||
|
+ foo (4, a, a + 16, s2, s3);
|
||
|
+ foo (8, a, a + 32, s4, s5);
|
||
|
+}
|
||
|
+
|
||
|
+void
|
||
|
+baz (void)
|
||
|
+{
|
||
|
+ bar (s0);
|
||
|
+}
|
||
|
--- gcc/testsuite/gcc.target/i386/pr39543-2.c.jj 2009-03-25 16:40:18.000000000 +0100
|
||
|
+++ gcc/testsuite/gcc.target/i386/pr39543-2.c 2009-03-25 16:40:38.000000000 +0100
|
||
|
@@ -0,0 +1,51 @@
|
||
|
+/* PR rtl-optimization/39543 */
|
||
|
+/* { dg-do compile } */
|
||
|
+/* { dg-options "-O3" } */
|
||
|
+
|
||
|
+float __attribute__ ((aligned (16))) s0[128];
|
||
|
+const float s1 = 0.707;
|
||
|
+float s2[8] __attribute__ ((aligned (16)));
|
||
|
+float s3[8] __attribute__ ((aligned (16)));
|
||
|
+float s4[16] __attribute__ ((aligned (16)));
|
||
|
+float s5[16] __attribute__ ((aligned (16)));
|
||
|
+
|
||
|
+void
|
||
|
+foo (int k, float *x, float *y, const float *d, const float *z)
|
||
|
+{
|
||
|
+ float *a, *b, *c, *e;
|
||
|
+
|
||
|
+ a = x + 2 * k;
|
||
|
+ b = a + 2 * k;
|
||
|
+ c = b + 2 * k;
|
||
|
+ e = y + 2 * k;
|
||
|
+ __asm__ volatile (""
|
||
|
+ : "=m" (x[0]), "=m" (b[0]), "=m" (a[0]), "=m" (c[0])
|
||
|
+ : "m" (y[0]), "m" (y[k * 2]), "m" (x[0]), "m" (a[0])
|
||
|
+ : "memory");
|
||
|
+ for (;;)
|
||
|
+ {
|
||
|
+ __asm__ volatile (""
|
||
|
+ :
|
||
|
+ : "m" (y[2]), "m" (d[2]), "m" (e[2]), "m" (z[2])
|
||
|
+ : "memory");
|
||
|
+ if (!--k)
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ __asm__ volatile (""
|
||
|
+ : "=m" (x[2]), "=m" (x[10]), "=m" (x[6]), "=m" (x[14])
|
||
|
+ : "m" (y[2]), "m" (y[6]), "m" (x[2]), "m" (x[6]), "m" (s1)
|
||
|
+ : "memory");
|
||
|
+}
|
||
|
+
|
||
|
+void
|
||
|
+bar (float *a)
|
||
|
+{
|
||
|
+ foo (4, a, a + 16, s2, s3);
|
||
|
+ foo (8, a, a + 32, s4, s5);
|
||
|
+}
|
||
|
+
|
||
|
+void
|
||
|
+baz (void)
|
||
|
+{
|
||
|
+ bar (s0);
|
||
|
+}
|
||
|
--- gcc/testsuite/gcc.target/i386/pr39543-3.c.jj 2009-03-25 16:41:29.000000000 +0100
|
||
|
+++ gcc/testsuite/gcc.target/i386/pr39543-3.c 2009-03-25 16:41:19.000000000 +0100
|
||
|
@@ -0,0 +1,42 @@
|
||
|
+/* PR rtl-optimization/39543 */
|
||
|
+/* { dg-do compile } */
|
||
|
+/* { dg-options "-O2" } */
|
||
|
+
|
||
|
+int s[128];
|
||
|
+
|
||
|
+void
|
||
|
+f1 (void)
|
||
|
+{
|
||
|
+ int i;
|
||
|
+ asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 %17"
|
||
|
+ : "=r" (i)
|
||
|
+ : "m" (s[0]), "m" (s[2]), "m" (s[4]), "m" (s[6]), "m" (s[8]),
|
||
|
+ "m" (s[10]), "m" (s[12]), "m" (s[14]), "m" (s[16]), "m" (s[18]),
|
||
|
+ "m" (s[20]), "m" (s[22]), "m" (s[24]), "m" (s[26]), "m" (s[28]),
|
||
|
+ "m" (s[30]), "m" (s[32]));
|
||
|
+ asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 %17"
|
||
|
+ : "=r" (i)
|
||
|
+ : "m" (s[0]), "m" (s[2]), "m" (s[4]), "m" (s[6]), "m" (s[8]),
|
||
|
+ "m" (s[10]), "m" (s[12]), "m" (s[14]), "m" (s[16]), "m" (s[18]),
|
||
|
+ "m" (s[20]), "m" (s[22]), "m" (s[24]), "m" (s[26]), "m" (s[28]),
|
||
|
+ "m" (s[30]), "m" (s[32]));
|
||
|
+}
|
||
|
+
|
||
|
+void
|
||
|
+f2 (int *q)
|
||
|
+{
|
||
|
+ int i;
|
||
|
+ int *p = q + 32;
|
||
|
+ asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 %17"
|
||
|
+ : "=r" (i)
|
||
|
+ : "m" (p[0]), "m" (p[2]), "m" (p[4]), "m" (p[6]), "m" (p[8]),
|
||
|
+ "m" (p[10]), "m" (p[12]), "m" (p[14]), "m" (p[16]), "m" (p[18]),
|
||
|
+ "m" (p[20]), "m" (p[22]), "m" (p[24]), "m" (p[26]), "m" (p[28]),
|
||
|
+ "m" (p[30]), "m" (p[32]));
|
||
|
+ asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 %17"
|
||
|
+ : "=r" (i)
|
||
|
+ : "m" (p[0]), "m" (p[2]), "m" (p[4]), "m" (p[6]), "m" (p[8]),
|
||
|
+ "m" (p[10]), "m" (p[12]), "m" (p[14]), "m" (p[16]), "m" (p[18]),
|
||
|
+ "m" (p[20]), "m" (p[22]), "m" (p[24]), "m" (p[26]), "m" (p[28]),
|
||
|
+ "m" (p[30]), "m" (p[32]));
|
||
|
+}
|