rs6000.c (rs6000_option_override_internal): Change rs6000_sched_costly_dep default to true_store_to_load_dep_costly.

* config/rs6000/rs6000.c (rs6000_option_override_internal): Change
	rs6000_sched_costly_dep default to true_store_to_load_dep_costly.
	(adjacent_mem_locations): Move some code to and call...
	(get_memref_parts): ...new function.
	(mem_locations_overlap): New function.
	(rs6000_adjust_priority): Adjust calls to is_load_insn/is_store_insn.
	(is_mem_ref): Rename to...
	(find_mem_ref): ...this. Return MEM rtx.
	(get_store_dest): Remove function.
	(is_load_insn1, is_load_insn, is_store_insn1, is_store_insn): Add
	new parameter and adjust calls.
	(rs6000_is_costly_dependence): Update calls for extra arg. Make sure
	mem refs overlap for true_store_to_load_dep_costly.
	(rs6000_sched_reorder2): Update calls for extra arg. Adjust args
	passed to adjacent_mem_locations.
	(is_costly_group): Walk resolved dependency list.
	(force_new_group): Emit group ending nop for Power6/Power7.
	* config/rs6000/rs6000.md (UNSPEC_GRP_END_NOP): New enum value.
	(group_ending_nop): New define_insn.

	* gcc.target/powerpc/lhs-1.c: New.
	* gcc.target/powerpc/lhs-2.c: New.
	* gcc.target/powerpc/lhs-3.c: New.

From-SVN: r187847
This commit is contained in:
Pat Haugen 2012-05-24 20:28:31 +00:00 committed by Pat Haugen
parent 44813fe043
commit 6fac31b418
7 changed files with 226 additions and 101 deletions

View File

@ -1,3 +1,25 @@
2012-05-24 Pat Haugen <pthaugen@us.ibm.com>
* config/rs6000/rs6000.c (rs6000_option_override_internal): Change
rs6000_sched_costly_dep default to true_store_to_load_dep_costly.
(adjacent_mem_locations): Move some code to and call...
(get_memref_parts): ...new function.
(mem_locations_overlap): New function.
(rs6000_adjust_priority): Adjust calls to is_load_insn/is_store_insn.
(is_mem_ref): Rename to...
(find_mem_ref): ...this. Return MEM rtx.
(get_store_dest): Remove function.
(is_load_insn1, is_load_insn, is_store_insn1, is_store_insn): Add
new parameter and adjust calls.
(rs6000_is_costly_dependence): Update calls for extra arg. Make sure
mem refs overlap for true_store_to_load_dep_costly.
(rs6000_sched_reorder2): Update calls for extra arg. Adjust args
passed to adjacent_mem_locations.
(is_costly_group): Walk resolved dependency list.
(force_new_group): Emit group ending nop for Power6/Power7.
* config/rs6000/rs6000.md (UNSPEC_GRP_END_NOP): New enum value.
(group_ending_nop): New define_insn.
2012-05-24 Dodji Seketeli <dodji@redhat.com>
Make unwound macro expansion trace less redundant

View File

@ -936,9 +936,8 @@ static int rs6000_debug_adjust_cost (rtx, rtx, rtx, int);
static bool is_microcoded_insn (rtx);
static bool is_nonpipeline_insn (rtx);
static bool is_cracked_insn (rtx);
static bool is_load_insn (rtx);
static rtx get_store_dest (rtx pat);
static bool is_store_insn (rtx);
static bool is_load_insn (rtx, rtx *);
static bool is_store_insn (rtx, rtx *);
static bool set_to_load_agen (rtx,rtx);
static bool insn_terminates_group_p (rtx , enum group_termination);
static bool insn_must_be_first_in_group (rtx);
@ -2831,7 +2830,7 @@ rs6000_option_override_internal (bool global_init_p)
/* Handle -msched-costly-dep option. */
rs6000_sched_costly_dep
= (rs6000_sched_groups ? store_to_load_dep_costly : no_dep_costly);
= (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
if (rs6000_sched_costly_dep_str)
{
@ -22786,49 +22785,78 @@ set_to_load_agen (rtx out_insn, rtx in_insn)
return false;
}
/* Try to determine base/offset/size parts of the given MEM.
Return true if successful, false if all the values couldn't
be determined.
This function only looks for REG or REG+CONST address forms.
REG+REG address form will return false. */
static bool
get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
HOST_WIDE_INT *size)
{
rtx addr_rtx;
if MEM_SIZE_KNOWN_P (mem)
*size = MEM_SIZE (mem);
else
return false;
if (GET_CODE (XEXP (mem, 0)) == PRE_MODIFY)
addr_rtx = XEXP (XEXP (mem, 0), 1);
else
addr_rtx = (XEXP (mem, 0));
if (GET_CODE (addr_rtx) == REG)
{
*base = addr_rtx;
*offset = 0;
}
else if (GET_CODE (addr_rtx) == PLUS
&& CONST_INT_P (XEXP (addr_rtx, 1)))
{
*base = XEXP (addr_rtx, 0);
*offset = INTVAL (XEXP (addr_rtx, 1));
}
else
return false;
return true;
}
/* The function returns true if the target storage location of
out_insn is adjacent to the target storage location of in_insn */
mem1 is adjacent to the target storage location of mem2 */
/* Return 1 if memory locations are adjacent. */
static bool
adjacent_mem_locations (rtx insn1, rtx insn2)
adjacent_mem_locations (rtx mem1, rtx mem2)
{
rtx reg1, reg2;
HOST_WIDE_INT off1, size1, off2, size2;
rtx a = get_store_dest (PATTERN (insn1));
rtx b = get_store_dest (PATTERN (insn2));
if (get_memref_parts (mem1, &reg1, &off1, &size1)
&& get_memref_parts (mem2, &reg2, &off2, &size2))
return ((REGNO (reg1) == REGNO (reg2))
&& ((off1 + size1 == off2)
|| (off2 + size2 == off1)));
if ((GET_CODE (XEXP (a, 0)) == REG
|| (GET_CODE (XEXP (a, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
&& (GET_CODE (XEXP (b, 0)) == REG
|| (GET_CODE (XEXP (b, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
{
HOST_WIDE_INT val0 = 0, val1 = 0, val_diff;
rtx reg0, reg1;
return false;
}
if (GET_CODE (XEXP (a, 0)) == PLUS)
{
reg0 = XEXP (XEXP (a, 0), 0);
val0 = INTVAL (XEXP (XEXP (a, 0), 1));
}
else
reg0 = XEXP (a, 0);
/* This function returns true if it can be determined that the two MEM
locations overlap by at least 1 byte based on base reg/offset/size. */
if (GET_CODE (XEXP (b, 0)) == PLUS)
{
reg1 = XEXP (XEXP (b, 0), 0);
val1 = INTVAL (XEXP (XEXP (b, 0), 1));
}
else
reg1 = XEXP (b, 0);
static bool
mem_locations_overlap (rtx mem1, rtx mem2)
{
rtx reg1, reg2;
HOST_WIDE_INT off1, size1, off2, size2;
val_diff = val1 - val0;
return ((REGNO (reg0) == REGNO (reg1))
&& ((MEM_SIZE_KNOWN_P (a) && val_diff == MEM_SIZE (a))
|| (MEM_SIZE_KNOWN_P (b) && val_diff == -MEM_SIZE (b))));
}
if (get_memref_parts (mem1, &reg1, &off1, &size1)
&& get_memref_parts (mem2, &reg2, &off2, &size2))
return ((REGNO (reg1) == REGNO (reg2))
&& (((off1 <= off2) && (off1 + size1 > off2))
|| ((off2 <= off1) && (off2 + size2 > off1))));
return false;
}
@ -22842,6 +22870,7 @@ adjacent_mem_locations (rtx insn1, rtx insn2)
static int
rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
{
rtx load_mem, str_mem;
/* On machines (like the 750) which have asymmetric integer units,
where one integer unit can do multiply and divides and the other
can't, reduce the priority of multiply/divide so it is scheduled
@ -22893,8 +22922,8 @@ rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
}
if (rs6000_cpu == PROCESSOR_POWER6
&& ((load_store_pendulum == -2 && is_load_insn (insn))
|| (load_store_pendulum == 2 && is_store_insn (insn))))
&& ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
|| (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
/* Attach highest priority to insn if the scheduler has just issued two
stores and this instruction is a load, or two loads and this instruction
is a store. Power6 wants loads and stores scheduled alternately
@ -23019,54 +23048,63 @@ rs6000_use_sched_lookahead_guard (rtx insn)
return 1;
}
/* Determine is PAT refers to memory. */
/* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
and return true. */
static bool
is_mem_ref (rtx pat)
find_mem_ref (rtx pat, rtx *mem_ref)
{
const char * fmt;
int i, j;
bool ret = false;
/* stack_tie does not produce any real memory traffic. */
if (tie_operand (pat, VOIDmode))
return false;
if (GET_CODE (pat) == MEM)
return true;
{
*mem_ref = pat;
return true;
}
/* Recursively process the pattern. */
fmt = GET_RTX_FORMAT (GET_CODE (pat));
for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0 && !ret; i--)
for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
{
if (fmt[i] == 'e')
ret |= is_mem_ref (XEXP (pat, i));
{
if (find_mem_ref (XEXP (pat, i), mem_ref))
return true;
}
else if (fmt[i] == 'E')
for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
ret |= is_mem_ref (XVECEXP (pat, i, j));
{
if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
return true;
}
}
return ret;
return false;
}
/* Determine if PAT is a PATTERN of a load insn. */
static bool
is_load_insn1 (rtx pat)
is_load_insn1 (rtx pat, rtx *load_mem)
{
if (!pat || pat == NULL_RTX)
return false;
if (GET_CODE (pat) == SET)
return is_mem_ref (SET_SRC (pat));
return find_mem_ref (SET_SRC (pat), load_mem);
if (GET_CODE (pat) == PARALLEL)
{
int i;
for (i = 0; i < XVECLEN (pat, 0); i++)
if (is_load_insn1 (XVECEXP (pat, 0, i)))
if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
return true;
}
@ -23076,7 +23114,7 @@ is_load_insn1 (rtx pat)
/* Determine if INSN loads from memory. */
static bool
is_load_insn (rtx insn)
is_load_insn (rtx insn, rtx *load_mem)
{
if (!insn || !INSN_P (insn))
return false;
@ -23084,26 +23122,26 @@ is_load_insn (rtx insn)
if (GET_CODE (insn) == CALL_INSN)
return false;
return is_load_insn1 (PATTERN (insn));
return is_load_insn1 (PATTERN (insn), load_mem);
}
/* Determine if PAT is a PATTERN of a store insn. */
static bool
is_store_insn1 (rtx pat)
is_store_insn1 (rtx pat, rtx *str_mem)
{
if (!pat || pat == NULL_RTX)
return false;
if (GET_CODE (pat) == SET)
return is_mem_ref (SET_DEST (pat));
return find_mem_ref (SET_DEST (pat), str_mem);
if (GET_CODE (pat) == PARALLEL)
{
int i;
for (i = 0; i < XVECLEN (pat, 0); i++)
if (is_store_insn1 (XVECEXP (pat, 0, i)))
if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
return true;
}
@ -23113,38 +23151,12 @@ is_store_insn1 (rtx pat)
/* Determine if INSN stores to memory. */
static bool
is_store_insn (rtx insn)
is_store_insn (rtx insn, rtx *str_mem)
{
if (!insn || !INSN_P (insn))
return false;
return is_store_insn1 (PATTERN (insn));
}
/* Return the dest of a store insn. */
static rtx
get_store_dest (rtx pat)
{
gcc_assert (is_store_insn1 (pat));
if (GET_CODE (pat) == SET)
return SET_DEST (pat);
else if (GET_CODE (pat) == PARALLEL)
{
int i;
for (i = 0; i < XVECLEN (pat, 0); i++)
{
rtx inner_pat = XVECEXP (pat, 0, i);
if (GET_CODE (inner_pat) == SET
&& is_mem_ref (SET_DEST (inner_pat)))
return inner_pat;
}
}
/* We shouldn't get here, because we should have either a simple
store insn or a store with update which are covered above. */
gcc_unreachable();
return is_store_insn1 (PATTERN (insn), str_mem);
}
/* Returns whether the dependence between INSN and NEXT is considered
@ -23155,6 +23167,7 @@ rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
{
rtx insn;
rtx next;
rtx load_mem, str_mem;
/* If the flag is not enabled - no dependence is considered costly;
allow all dependent insns in the same group.
@ -23172,15 +23185,16 @@ rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
next = DEP_CON (dep);
if (rs6000_sched_costly_dep == store_to_load_dep_costly
&& is_load_insn (next)
&& is_store_insn (insn))
&& is_load_insn (next, &load_mem)
&& is_store_insn (insn, &str_mem))
/* Prevent load after store in the same group. */
return true;
if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
&& is_load_insn (next)
&& is_store_insn (insn)
&& DEP_TYPE (dep) == REG_DEP_TRUE)
&& is_load_insn (next, &load_mem)
&& is_store_insn (insn, &str_mem)
&& DEP_TYPE (dep) == REG_DEP_TRUE
&& mem_locations_overlap(str_mem, load_mem))
/* Prevent load after store in the same group if it is a true
dependence. */
return true;
@ -23307,12 +23321,12 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
{
int pos;
int i;
rtx tmp;
rtx tmp, load_mem, str_mem;
if (is_store_insn (last_scheduled_insn))
if (is_store_insn (last_scheduled_insn, &str_mem))
/* Issuing a store, swing the load_store_pendulum to the left */
load_store_pendulum--;
else if (is_load_insn (last_scheduled_insn))
else if (is_load_insn (last_scheduled_insn, &load_mem))
/* Issuing a load, swing the load_store_pendulum to the right */
load_store_pendulum++;
else
@ -23331,7 +23345,7 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
while (pos >= 0)
{
if (is_load_insn (ready[pos]))
if (is_load_insn (ready[pos], &load_mem))
{
/* Found a load. Move it to the head of the ready list,
and adjust it's priority so that it is more likely to
@ -23357,7 +23371,7 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
while (pos >= 0)
{
if (is_load_insn (ready[pos])
if (is_load_insn (ready[pos], &load_mem)
&& !sel_sched_p ()
&& INSN_PRIORITY_KNOWN (ready[pos]))
{
@ -23384,15 +23398,16 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
while (pos >= 0)
{
if (is_store_insn (ready[pos]))
if (is_store_insn (ready[pos], &str_mem))
{
rtx str_mem2;
/* Maintain the index of the first store found on the
list */
if (first_store_pos == -1)
first_store_pos = pos;
if (is_store_insn (last_scheduled_insn)
&& adjacent_mem_locations (last_scheduled_insn,ready[pos]))
if (is_store_insn (last_scheduled_insn, &str_mem2)
&& adjacent_mem_locations (str_mem, str_mem2))
{
/* Found an adjacent store. Move it to the head of the
ready list, and adjust it's priority so that it is
@ -23436,7 +23451,7 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
while (pos >= 0)
{
if (is_store_insn (ready[pos])
if (is_store_insn (ready[pos], &str_mem)
&& !sel_sched_p ()
&& INSN_PRIORITY_KNOWN (ready[pos]))
{
@ -23720,7 +23735,7 @@ is_costly_group (rtx *group_insns, rtx next_insn)
if (!insn)
continue;
FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
{
rtx next = DEP_CON (dep);
@ -23784,12 +23799,20 @@ force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
if (can_issue_more && !is_branch_slot_insn (next_insn))
can_issue_more--;
while (can_issue_more > 0)
/* Power6 and Power7 have special group ending nop. */
if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7)
{
nop = gen_nop ();
nop = gen_group_ending_nop ();
emit_insn_before (nop, next_insn);
can_issue_more--;
can_issue_more = 0;
}
else
while (can_issue_more > 0)
{
nop = gen_nop ();
emit_insn_before (nop, next_insn);
can_issue_more--;
}
*group_end = true;
return 0;

View File

@ -126,6 +126,7 @@
UNSPEC_LFIWAX
UNSPEC_LFIWZX
UNSPEC_FCTIWUZ
UNSPEC_GRP_END_NOP
])
;;
@ -15594,6 +15595,16 @@
[(const_int 0)]
""
"{cror 0,0,0|nop}")
(define_insn "group_ending_nop"
[(unspec [(const_int 0)] UNSPEC_GRP_END_NOP)]
""
"*
{
if (rs6000_cpu_attr == CPU_POWER6)
return \"ori 1,1,0\";
return \"ori 2,2,0\";
}")
;; Define the subtract-one-and-jump insns, starting with the template
;; so loop.c knows what to generate.

View File

@ -1,3 +1,8 @@
2012-05-24 Pat Haugen <pthaugen@us.ibm.com>
* gcc.target/powerpc/lhs-1.c: New.
* gcc.target/powerpc/lhs-2.c: New.
* gcc.target/powerpc/lhs-3.c: New.
2012-05-24 Dodji Seketeli <dodji@redhat.com>
Make unwound macro expansion trace less redundant

View File

@ -0,0 +1,22 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-options "-O2 -mcpu=power5" } */
/* { dg-final { scan-assembler-times "nop" 3 } } */
/* Test generation of nops in load hit store situation. */
typedef union {
double val;
struct {
unsigned int w1;
unsigned int w2;
};
} words;
unsigned int f (double d)
{
words u;
u.val = d;
return u.w2;
}

View File

@ -0,0 +1,21 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-options "-O2 -mcpu=power6 -msched-groups" } */
/* { dg-final { scan-assembler "ori 1,1,0" } } */
/* Test generation of group ending nop in load hit store situation. */
typedef union {
double val;
struct {
unsigned int w1;
unsigned int w2;
};
} words;
unsigned int f (double d)
{
words u;
u.val = d;
return u.w2;
}

View File

@ -0,0 +1,21 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-options "-O2 -mcpu=power7" } */
/* { dg-final { scan-assembler "ori 2,2,0" } } */
/* Test generation of group ending nop in load hit store situation. */
typedef union {
double val;
struct {
unsigned int w1;
unsigned int w2;
};
} words;
unsigned int f (double d)
{
words u;
u.val = d;
return u.w2;
}