mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-02-24 07:39:13 +08:00
l_fma_float_?.c: Update.
* gcc.target/i386/l_fma_float_?.c: Update. * gcc.target/i386/l_fma_double_?.c: Update. * tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound, vect_do_peeling_for_alignment): Fix loop bound computation. * tree-vect-loop.c (vect_transform_loop): Maintain loop bounds. From-SVN: r193241
This commit is contained in:
parent
c8fef899e8
commit
22458c5af7
@ -1,3 +1,9 @@
|
||||
2012-11-06 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound,
|
||||
vect_do_peeling_for_alignment): Fix loop bound computation.
|
||||
* tree-vect-loop.c (vect_transform_loop): Maintain loop bounds.
|
||||
|
||||
2012-11-06 Oleg Endo <olegendo@gcc.gnu.org>
|
||||
|
||||
PR target/54089
|
||||
|
@ -1,3 +1,8 @@
|
||||
2012-11-06 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* gcc.target/i386/l_fma_float_?.c: Update.
|
||||
* gcc.target/i386/l_fma_double_?.c: Update.
|
||||
|
||||
2012-11-06 Oleg Endo <olegendo@gcc.gnu.org>
|
||||
|
||||
PR target/54089
|
||||
|
@ -16,11 +16,11 @@
|
||||
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
|
||||
|
@ -16,11 +16,11 @@
|
||||
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
|
||||
|
@ -16,11 +16,11 @@
|
||||
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
|
||||
|
@ -16,11 +16,11 @@
|
||||
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
|
||||
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
|
||||
|
@ -1954,9 +1954,16 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
|
||||
by ratio_mult_vf_name steps. */
|
||||
vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
|
||||
|
||||
max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
|
||||
/* For vectorization factor N, we need to copy last N-1 values in epilogue
|
||||
and this means N-2 loopback edge executions.
|
||||
|
||||
PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
|
||||
will execute at least LOOP_VINFO_VECT_FACTOR times. */
|
||||
max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|
||||
? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
|
||||
: LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
|
||||
if (check_profitability)
|
||||
max_iter = MAX (max_iter, (int) th);
|
||||
max_iter = MAX (max_iter, (int) th - 1);
|
||||
record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
|
||||
dump_printf (MSG_OPTIMIZED_LOCATIONS,
|
||||
"Setting upper bound of nb iterations for epilogue "
|
||||
@ -2186,9 +2193,11 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
|
||||
#ifdef ENABLE_CHECKING
|
||||
slpeel_verify_cfg_after_peeling (new_loop, loop);
|
||||
#endif
|
||||
max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
|
||||
/* For vectorization factor N, we need to copy at most N-1 values
|
||||
for alignment and this means N-2 loopback edge executions. */
|
||||
max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
|
||||
if (check_profitability)
|
||||
max_iter = MAX (max_iter, (int) th);
|
||||
max_iter = MAX (max_iter, (int) th - 1);
|
||||
record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
|
||||
dump_printf (MSG_OPTIMIZED_LOCATIONS,
|
||||
"Setting upper bound of nb iterations for prologue "
|
||||
|
@ -5448,10 +5448,16 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
||||
bool transform_pattern_stmt = false;
|
||||
bool check_profitability = false;
|
||||
int th;
|
||||
/* Record number of iterations before we started tampering with the profile. */
|
||||
gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===");
|
||||
|
||||
/* If profile is inprecise, we have chance to fix it up. */
|
||||
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
|
||||
expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
|
||||
|
||||
/* Use the more conservative vectorization threshold. If the number
|
||||
of iterations is constant assume the cost check has been performed
|
||||
by our caller. If the threshold makes all loops profitable that
|
||||
@ -5735,6 +5741,25 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
||||
|
||||
slpeel_make_loop_iterate_ntimes (loop, ratio);
|
||||
|
||||
/* Reduce loop iterations by the vectorization factor. */
|
||||
scale_loop_profile (loop, RDIV (REG_BR_PROB_BASE , vectorization_factor),
|
||||
expected_iterations / vectorization_factor);
|
||||
loop->nb_iterations_upper_bound
|
||||
= loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor),
|
||||
FLOOR_DIV_EXPR);
|
||||
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|
||||
&& loop->nb_iterations_upper_bound != double_int_zero)
|
||||
loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - double_int_one;
|
||||
if (loop->any_estimate)
|
||||
{
|
||||
loop->nb_iterations_estimate
|
||||
= loop->nb_iterations_estimate.udiv (double_int::from_uhwi (vectorization_factor),
|
||||
FLOOR_DIV_EXPR);
|
||||
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|
||||
&& loop->nb_iterations_estimate != double_int_zero)
|
||||
loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one;
|
||||
}
|
||||
|
||||
/* The memory tags and pointers in vectorized statements need to
|
||||
have their SSA forms updated. FIXME, why can't this be delayed
|
||||
until all the loops have been transformed? */
|
||||
|
Loading…
Reference in New Issue
Block a user