mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-02-26 01:25:55 +08:00
re PR tree-optimization/23109 (compiler generates wrong code leading to spurious division by zero with -funsafe-math-optimizations (instead of -ftrapping-math))
gcc: 2006-01-11 Paolo Bonzini <bonzini@gnu.org> PR tree-optimization/23109 PR tree-optimization/23948 PR tree-optimization/24123 * Makefile.in (tree-ssa-math-opts.o): Adjust dependencies. * tree-cfg.c (single_noncomplex_succ): New. * tree-flow.h (single_noncomplex_succ): Declare it. * tree-ssa-math-opts.c (enum place_reciprocal): Remove. * tree-ssa-math-opts.c (enum place_reciprocal): Remove. (struct occurrence, occ_head, occ_pool, is_divide_by, compute_merit, insert_bb, register_division_in, insert_reciprocals, replace_reciprocal, free_bb): New. (execute_cse_reciprocals_1): Rewritten. (execute_cse_reciprocals): Adjust calls to execute_cse_reciprocals_1. Do not commit any edge insertion. Always compute dominators and create the allocation pool. * target-def.h (TARGET_MIN_DIVISIONS_FOR_RECIP_MUL): New. * target.h (struct gcc_target): Add min_divistions_for_recip_mul. * targhooks.c (default_min_divistions_for_recip_mul): New. * targhooks.h (default_min_divistions_for_recip_mul): New prototype. * passes.c (init_optimization_passes): Run recip after tree loop optimizations. * doc/tm.texi (Misc): Document TARGET_MIN_DIVISIONS_FOR_RECIP_MUL. gcc/testsuite: 2006-01-11 Paolo Bonzini <bonzini@gnu.org> PR tree-optimization/23109 PR tree-optimization/23948 PR tree-optimization/24123 * gcc.dg/tree-ssa/recip-3.c, gcc.dg/tree-ssa/recip-4.c, gcc.dg/tree-ssa/recip-5.c, gcc.dg/tree-ssa/recip-6.c, gcc.dg/tree-ssa/recip-7.c, gcc.dg/tree-ssa/pr23109.c, g++.dg/tree-ssa/pr23948.C: New testcases. * gcc.dg/tree-ssa/recip-2.c, gcc.dg/tree-ssa/pr23234.c: Provide three divisions in order to do the optimization. From-SVN: r109578
This commit is contained in:
parent
4d779342f0
commit
bc23502b7f
@ -1,3 +1,29 @@
|
||||
2006-01-11 Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
PR tree-optimization/23109
|
||||
PR tree-optimization/23948
|
||||
PR tree-optimization/24123
|
||||
|
||||
* Makefile.in (tree-ssa-math-opts.o): Adjust dependencies.
|
||||
* tree-cfg.c (single_noncomplex_succ): New.
|
||||
* tree-flow.h (single_noncomplex_succ): Declare it.
|
||||
* tree-ssa-math-opts.c (enum place_reciprocal): Remove.
|
||||
* tree-ssa-math-opts.c (enum place_reciprocal): Remove.
|
||||
(struct occurrence, occ_head, occ_pool, is_divide_by, compute_merit,
|
||||
insert_bb, register_division_in, insert_reciprocals,
|
||||
replace_reciprocal, free_bb): New.
|
||||
(execute_cse_reciprocals_1): Rewritten.
|
||||
(execute_cse_reciprocals): Adjust calls to execute_cse_reciprocals_1.
|
||||
Do not commit any edge insertion. Always compute dominators and
|
||||
create the allocation pool.
|
||||
* target-def.h (TARGET_MIN_DIVISIONS_FOR_RECIP_MUL): New.
|
||||
* target.h (struct gcc_target): Add min_divistions_for_recip_mul.
|
||||
* targhooks.c (default_min_divistions_for_recip_mul): New.
|
||||
* targhooks.h (default_min_divistions_for_recip_mul): New prototype.
|
||||
* passes.c (init_optimization_passes): Run recip after tree loop
|
||||
optimizations.
|
||||
* doc/tm.texi (Misc): Document TARGET_MIN_DIVISIONS_FOR_RECIP_MUL.
|
||||
|
||||
2005-01-11 Danny Berlin <dberlin@dberlin.org>
|
||||
Kenneth Zadeck <zadeck@naturalbridge.com>
|
||||
|
||||
@ -151,31 +177,31 @@
|
||||
2006-01-10 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
|
||||
|
||||
PR target/20754
|
||||
* pa.md: Create separate 32 and 64-bit move patterns for SI, DI, SF
|
||||
and DF modes. Add alternatives to copy between general and floating
|
||||
point registers to the 32-bit patterns.
|
||||
* pa-64.h (SECONDARY_MEMORY_NEEDED_RTX): Delete undefine.
|
||||
* pa.h (SECONDARY_MEMORY_NEEDED_RTX): Delete define.
|
||||
* config/pa/pa.md: Create separate 32 and 64-bit move patterns
|
||||
for SI, DI, SF and DF modes. Add alternatives to copy between
|
||||
general and floating point registers to the 32-bit patterns.
|
||||
* config/pa/pa-64.h (SECONDARY_MEMORY_NEEDED_RTX): Delete undefine.
|
||||
* config/pa/pa.h (SECONDARY_MEMORY_NEEDED_RTX): Delete define.
|
||||
(SECONDARY_MEMORY_NEEDED): Secondary memory is only needed when
|
||||
generating 64-bit code.
|
||||
* pa.c (output_move_double): Handle copies between general and
|
||||
floating registers.
|
||||
* config/pa/pa.c (output_move_double): Handle copies between general
|
||||
and floating registers.
|
||||
|
||||
2006-01-10 Stuart Hastings <stuart@apple.com>
|
||||
|
||||
* gcc/config/i386/i386.md (set_got): Update.
|
||||
* config/i386/i386.md (set_got): Update.
|
||||
(set_got_labelled): New. (UNSPEC_LD_MPIC): New.
|
||||
(builtin_setjmp_receiver): Mach-O support.
|
||||
* gcc/config/i386/darwin.h (TARGET_ASM_FILE_END) Define.
|
||||
* config/i386/darwin.h (TARGET_ASM_FILE_END) Define.
|
||||
(GOT_SYMBOL_NAME): Define.
|
||||
(FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN): New.
|
||||
(TARGET_DEEP_BRANCH_PREDICTION): Remove.
|
||||
* gcc/config/i386/i386.c (override_options): Revise for Darwin.
|
||||
* config/i386/i386.c (override_options): Revise for Darwin.
|
||||
(USE_HIDDEN_LINKONCE): Enable for Mach-O. (ix86_file_end): Mach-O
|
||||
support. (darwin_x86_file_end): New. (output_set_got): Add label
|
||||
parameter, revise for Mach-O. (x86_output_mi_thunk): Likewise.
|
||||
* gcc/config/i386/i386-protos.h (output_set_got): Likewise.
|
||||
* gcc/config/darwin.c (machopic_legitimize_pic_address): Update
|
||||
* config/i386/i386-protos.h (output_set_got): Likewise.
|
||||
* config/darwin.c (machopic_legitimize_pic_address): Update
|
||||
regs_ever_live[].
|
||||
|
||||
2006-01-10 Kaz Kojima <kkojima@gcc.gnu.org>
|
||||
@ -604,7 +630,7 @@
|
||||
|
||||
2006-01-03 Adrian Straetling <straetling@de.ibm.com>
|
||||
|
||||
* gcc/builtins.c (get_builtin_sync_mem): New function.
|
||||
* builtins.c (get_builtin_sync_mem): New function.
|
||||
(expand_builtin_sync_operation, expand_builtin_compare_and_swap,
|
||||
expand_builtin_lock_test_and_set, expand_builtin_lock_release):
|
||||
Call get_builtin_sync_mem to generate mem rtx.
|
||||
|
@ -1970,7 +1970,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
|
||||
$(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \
|
||||
hard-reg-set.h
|
||||
tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \
|
||||
$(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H)
|
||||
$(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H) \
|
||||
alloc-pool.h $(BASIC_BLOCK_H) $(TARGET_H)
|
||||
tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
|
||||
$(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \
|
||||
function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
|
||||
|
@ -8893,6 +8893,15 @@ point number to a signed fixed point number also convert validly to an
|
||||
unsigned one.
|
||||
@end defmac
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_MIN_DIVISIONS_FOR_RECIP_MUL (enum machine_mode @var{mode})
|
||||
When @option{-ffast-math} is in effect, GCC tries to optimize
|
||||
divisions by the same divisor, by turning them into multiplications by
|
||||
the reciprocal. This target hook specifies the minimum number of divisions
|
||||
that should be there for GCC to perform the optimization for a variable
|
||||
of mode @var{mode}. The default implementation returns 3 if the machine
|
||||
has an instruction for the division, and 2 if it does not.
|
||||
@end deftypefn
|
||||
|
||||
@defmac MOVE_MAX
|
||||
The maximum number of bytes that a single instruction can move quickly
|
||||
between memory and registers or between two memory locations.
|
||||
|
@ -551,12 +551,12 @@ init_optimization_passes (void)
|
||||
we add may_alias right after fold builtins
|
||||
which can create arbitrary GIMPLE. */
|
||||
NEXT_PASS (pass_may_alias);
|
||||
NEXT_PASS (pass_cse_reciprocals);
|
||||
NEXT_PASS (pass_split_crit_edges);
|
||||
NEXT_PASS (pass_pre);
|
||||
NEXT_PASS (pass_may_alias);
|
||||
NEXT_PASS (pass_sink_code);
|
||||
NEXT_PASS (pass_tree_loop);
|
||||
NEXT_PASS (pass_cse_reciprocals);
|
||||
NEXT_PASS (pass_reassoc);
|
||||
NEXT_PASS (pass_dominator);
|
||||
|
||||
|
@ -336,6 +336,10 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#define TARGET_SHIFT_TRUNCATION_MASK default_shift_truncation_mask
|
||||
#endif
|
||||
|
||||
#ifndef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
|
||||
#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL default_min_divisions_for_recip_mul
|
||||
#endif
|
||||
|
||||
#ifndef TARGET_VALID_POINTER_MODE
|
||||
#define TARGET_VALID_POINTER_MODE default_valid_pointer_mode
|
||||
#endif
|
||||
@ -588,6 +592,7 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
TARGET_ENCODE_SECTION_INFO, \
|
||||
TARGET_STRIP_NAME_ENCODING, \
|
||||
TARGET_SHIFT_TRUNCATION_MASK, \
|
||||
TARGET_MIN_DIVISIONS_FOR_RECIP_MUL, \
|
||||
TARGET_VALID_POINTER_MODE, \
|
||||
TARGET_SCALAR_MODE_SUPPORTED_P, \
|
||||
TARGET_VECTOR_MODE_SUPPORTED_P, \
|
||||
|
@ -440,6 +440,11 @@ struct gcc_target
|
||||
return the mask that they apply. Return 0 otherwise. */
|
||||
unsigned HOST_WIDE_INT (* shift_truncation_mask) (enum machine_mode mode);
|
||||
|
||||
/* Return the number of divisions in the given MODE that should be present,
|
||||
so that it is profitable to turn the division into a multiplication by
|
||||
the reciprocal. */
|
||||
unsigned int (* min_divisions_for_recip_mul) (enum machine_mode mode);
|
||||
|
||||
/* True if MODE is valid for a pointer in __attribute__((mode("MODE"))). */
|
||||
bool (* valid_pointer_mode) (enum machine_mode mode);
|
||||
|
||||
|
@ -148,6 +148,14 @@ default_shift_truncation_mask (enum machine_mode mode)
|
||||
return SHIFT_COUNT_TRUNCATED ? GET_MODE_BITSIZE (mode) - 1 : 0;
|
||||
}
|
||||
|
||||
/* The default implementation of TARGET_MIN_DIVISIONS_FOR_RECIP_MUL. */
|
||||
|
||||
unsigned int
|
||||
default_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
|
||||
{
|
||||
return have_insn_for (DIV, mode) ? 3 : 2;
|
||||
}
|
||||
|
||||
/* Generic hook that takes a CUMULATIVE_ARGS pointer and returns true. */
|
||||
|
||||
bool
|
||||
|
@ -33,6 +33,7 @@ extern bool default_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
|
||||
extern enum machine_mode default_eh_return_filter_mode (void);
|
||||
extern unsigned HOST_WIDE_INT default_shift_truncation_mask
|
||||
(enum machine_mode);
|
||||
extern unsigned int default_min_divisions_for_recip_mul (enum machine_mode);
|
||||
|
||||
extern tree default_stack_protect_guard (void);
|
||||
extern tree default_external_stack_protect_fail (void);
|
||||
|
@ -1,3 +1,16 @@
|
||||
2006-01-11 Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
PR tree-optimization/23109
|
||||
PR tree-optimization/23948
|
||||
PR tree-optimization/24123
|
||||
|
||||
* gcc.dg/tree-ssa/recip-3.c, gcc.dg/tree-ssa/recip-4.c,
|
||||
gcc.dg/tree-ssa/recip-5.c, gcc.dg/tree-ssa/recip-6.c,
|
||||
gcc.dg/tree-ssa/recip-7.c, gcc.dg/tree-ssa/pr23109.c,
|
||||
g++.dg/tree-ssa/pr23948.C: New testcases.
|
||||
* gcc.dg/tree-ssa/recip-2.c, gcc.dg/tree-ssa/pr23234.c: Provide
|
||||
three divisions in order to do the optimization.
|
||||
|
||||
2005-01-11 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
PR c++/25632
|
||||
|
19
gcc/testsuite/g++.dg/tree-ssa/pr23948.C
Normal file
19
gcc/testsuite/g++.dg/tree-ssa/pr23948.C
Normal file
@ -0,0 +1,19 @@
|
||||
/* { dg-options "-O1 -ffast-math -fdump-tree-recip" } */
|
||||
/* { dg-do compile } */
|
||||
|
||||
struct MIOFILE {
|
||||
~MIOFILE();
|
||||
};
|
||||
double potentially_runnable_resource_share();
|
||||
void f1(double);
|
||||
int make_scheduler_request(double a, double b)
|
||||
{
|
||||
MIOFILE mf;
|
||||
double prrs = potentially_runnable_resource_share();
|
||||
f1(a/prrs);
|
||||
f1(1/prrs);
|
||||
f1(b/prrs);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
|
||||
/* { dg-final { cleanup-tree-dump "recip" } } */
|
34
gcc/testsuite/gcc.dg/tree-ssa/pr23109.c
Normal file
34
gcc/testsuite/gcc.dg/tree-ssa/pr23109.c
Normal file
@ -0,0 +1,34 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -funsafe-math-optimizations -fdump-tree-recip -fdump-tree-lim" } */
|
||||
|
||||
double F[2] = { 0., 0. }, e = 0.;
|
||||
|
||||
int main()
|
||||
{
|
||||
int i;
|
||||
double E, W, P, d;
|
||||
|
||||
/* make sure the program crashes on FP exception */
|
||||
unsigned short int Mask;
|
||||
|
||||
W = 1.;
|
||||
d = 2.*e;
|
||||
E = 1. - d;
|
||||
|
||||
for( i=0; i < 2; i++ )
|
||||
if( d > 0.01 )
|
||||
{
|
||||
P = ( W < E ) ? (W - E)/d : (E - W)/d;
|
||||
F[i] += P;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* LIM only performs the transformation in the no-trapping-math case. In
|
||||
the future we will do it for trapping-math as well in recip, check that
|
||||
this is not wrongly optimized. */
|
||||
/* { dg-final { scan-tree-dump-not "reciptmp" "lim" } } */
|
||||
/* { dg-final { scan-tree-dump-not "reciptmp" "recip" } } */
|
||||
/* { dg-final { cleanup-tree-dump "recip" } } */
|
||||
|
@ -9,6 +9,7 @@ double
|
||||
f1 (double a, double b, double c)
|
||||
{
|
||||
double y0;
|
||||
double y1;
|
||||
|
||||
if (a == 0.0)
|
||||
{
|
||||
@ -16,7 +17,8 @@ f1 (double a, double b, double c)
|
||||
return y0;
|
||||
}
|
||||
y0 = c / b;
|
||||
return y0;
|
||||
y1 = a / b;
|
||||
return y0 * y1;
|
||||
}
|
||||
|
||||
/* Labels may end up in the middle of a block. Also bad. */
|
||||
@ -24,6 +26,7 @@ double
|
||||
f2 (double a, double b, double c)
|
||||
{
|
||||
double y0;
|
||||
double y1;
|
||||
|
||||
a_label:
|
||||
another_label:
|
||||
@ -33,7 +36,8 @@ another_label:
|
||||
return y0;
|
||||
}
|
||||
y0 = c / b;
|
||||
return y0;
|
||||
y1 = a / b;
|
||||
return y0 * y1;
|
||||
}
|
||||
|
||||
/* Uses must still be dominated by their defs. */
|
||||
@ -41,6 +45,7 @@ double
|
||||
f3 (double a, double b, double c)
|
||||
{
|
||||
double y0;
|
||||
double y1;
|
||||
|
||||
y0 = -c / b;
|
||||
if (a == 0.0)
|
||||
@ -48,5 +53,6 @@ f3 (double a, double b, double c)
|
||||
return y0;
|
||||
}
|
||||
y0 = c / b;
|
||||
return y0;
|
||||
y1 = a / b;
|
||||
return y0 * y1;
|
||||
}
|
||||
|
@ -10,14 +10,19 @@ float e(float a, float b, float c, float d, float e, float f)
|
||||
}
|
||||
|
||||
/* The PHI nodes for these divisions should be combined. */
|
||||
d = d / a;
|
||||
e = e / a;
|
||||
f = f / a;
|
||||
|
||||
a = a / c;
|
||||
b = b / c;
|
||||
|
||||
return a + b + e + f;
|
||||
/* This should not be left as a multiplication. */
|
||||
c = 1 / c;
|
||||
|
||||
return a + b + c + d + e + f;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */
|
||||
/* { dg-final { scan-tree-dump-times " \\* " 5 "recip" } } */
|
||||
/* { dg-final { cleanup-tree-dump "recip" } } */
|
||||
|
27
gcc/testsuite/gcc.dg/tree-ssa/recip-3.c
Normal file
27
gcc/testsuite/gcc.dg/tree-ssa/recip-3.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -fno-trapping-math -funsafe-math-optimizations -fdump-tree-recip" } */
|
||||
|
||||
double F[2] = { 0.0, 0.0 }, e;
|
||||
|
||||
/* In this case the optimization is interesting. */
|
||||
float h ()
|
||||
{
|
||||
int i;
|
||||
double E, W, P, d;
|
||||
|
||||
W = 1.;
|
||||
d = 2.*e;
|
||||
E = 1. - d;
|
||||
|
||||
for( i=0; i < 2; i++ )
|
||||
if( d > 0.01 )
|
||||
{
|
||||
P = ( W < E ) ? (W - E)/d : (E - W)/d;
|
||||
F[i] += P;
|
||||
}
|
||||
|
||||
F[0] += E / d;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
|
||||
/* { dg-final { cleanup-tree-dump "recip" } } */
|
45
gcc/testsuite/gcc.dg/tree-ssa/recip-4.c
Normal file
45
gcc/testsuite/gcc.dg/tree-ssa/recip-4.c
Normal file
@ -0,0 +1,45 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -fno-trapping-math -funsafe-math-optimizations -fdump-tree-recip" } */
|
||||
|
||||
/* based on the test case in pr23109 */
|
||||
|
||||
double F[2] = { 0., 0. }, e = 0.;
|
||||
|
||||
/* Nope, we cannot prove the optimization is worthwhile in this case. */
|
||||
void f ()
|
||||
{
|
||||
int i;
|
||||
double E, W, P, d;
|
||||
|
||||
W = 1.;
|
||||
d = 2.*e;
|
||||
E = 1. - d;
|
||||
|
||||
if( d > 0.01 )
|
||||
{
|
||||
P = ( W < E ) ? (W - E)/d : (E - W)/d;
|
||||
F[i] += P;
|
||||
}
|
||||
}
|
||||
|
||||
/* We also cannot prove the optimization is worthwhile in this case. */
|
||||
float g ()
|
||||
{
|
||||
int i;
|
||||
double E, W, P, d;
|
||||
|
||||
W = 1.;
|
||||
d = 2.*e;
|
||||
E = 1. - d;
|
||||
|
||||
if( d > 0.01 )
|
||||
{
|
||||
P = ( W < E ) ? (W - E)/d : (E - W)/d;
|
||||
F[i] += P;
|
||||
}
|
||||
|
||||
return 1.0 / d;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "reciptmp" "recip" } } */
|
||||
/* { dg-final { cleanup-tree-dump "recip" } } */
|
31
gcc/testsuite/gcc.dg/tree-ssa/recip-5.c
Normal file
31
gcc/testsuite/gcc.dg/tree-ssa/recip-5.c
Normal file
@ -0,0 +1,31 @@
|
||||
/* { dg-options "-O1 -funsafe-math-optimizations -ftrapping-math -fdump-tree-recip -fdump-tree-optimized" } */
|
||||
/* { dg-do compile } */
|
||||
|
||||
/* Test the reciprocal optimizations together with trapping math. */
|
||||
|
||||
extern int f2();
|
||||
|
||||
double f1(double y, double z, double w, double j, double k)
|
||||
{
|
||||
double b, c, d, e, f, g;
|
||||
|
||||
if (f2 ())
|
||||
/* inserts one division here */
|
||||
b = 1 / y, c = z / y, d = j / y;
|
||||
else
|
||||
/* one division here */
|
||||
b = 3 / y, c = w / y, d = k / y;
|
||||
|
||||
/* and one here, that should be removed afterwards but is not right now */
|
||||
e = b / y;
|
||||
f = c / y;
|
||||
g = d / y;
|
||||
|
||||
return e + f + g;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " / " 3 "recip" } } */
|
||||
/* { dg-final { scan-tree-dump-times " / " 2 "optimized" { xfail *-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "recip" } } */
|
||||
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
||||
|
26
gcc/testsuite/gcc.dg/tree-ssa/recip-6.c
Normal file
26
gcc/testsuite/gcc.dg/tree-ssa/recip-6.c
Normal file
@ -0,0 +1,26 @@
|
||||
/* { dg-options "-O1 -funsafe-math-optimizations -fno-trapping-math -fdump-tree-recip" } */
|
||||
/* { dg-do compile } */
|
||||
|
||||
/* Test inserting in a block that does not contain a division. */
|
||||
|
||||
extern int f2();
|
||||
|
||||
double f1(double y, double z, double w)
|
||||
{
|
||||
double b, c, d, e, f;
|
||||
|
||||
if (g ())
|
||||
b = 1 / y, c = z / y;
|
||||
else
|
||||
b = 3 / y, c = w / y;
|
||||
|
||||
d = b / y;
|
||||
e = c / y;
|
||||
f = 1 / y;
|
||||
|
||||
return d + e + f;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
|
||||
/* { dg-final { cleanup-tree-dump "recip" } } */
|
||||
|
27
gcc/testsuite/gcc.dg/tree-ssa/recip-7.c
Normal file
27
gcc/testsuite/gcc.dg/tree-ssa/recip-7.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* { dg-options "-O1 -funsafe-math-optimizations -fno-trapping-math -fdump-tree-recip" } */
|
||||
/* { dg-do compile } */
|
||||
|
||||
/* Test inserting in a block that does not contain a division. */
|
||||
|
||||
extern double h();
|
||||
|
||||
double f(int x, double z, double w)
|
||||
{
|
||||
double b, c, d, e, f;
|
||||
double y = h ();
|
||||
|
||||
if (x)
|
||||
b = 1 / y, c = z / y;
|
||||
else
|
||||
b = 3 / y, c = w / y;
|
||||
|
||||
d = b / y;
|
||||
e = c / y;
|
||||
f = 1 / y;
|
||||
|
||||
return d + e + f;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
|
||||
/* { dg-final { cleanup-tree-dump "recip" } } */
|
||||
|
@ -1389,6 +1389,30 @@ tree_merge_blocks (basic_block a, basic_block b)
|
||||
}
|
||||
|
||||
|
||||
/* Return the one of two successors of BB that is not reachable by a
|
||||
reached by a complex edge, if there is one. Else, return BB. We use
|
||||
this in optimizations that use post-dominators for their heuristics,
|
||||
to catch the cases in C++ where function calls are involved. */
|
||||
|
||||
basic_block
|
||||
single_noncomplex_succ (basic_block bb)
|
||||
{
|
||||
edge e0, e1;
|
||||
if (EDGE_COUNT (bb->succs) != 2)
|
||||
return bb;
|
||||
|
||||
e0 = EDGE_SUCC (bb, 0);
|
||||
e1 = EDGE_SUCC (bb, 1);
|
||||
if (e0->flags & EDGE_COMPLEX)
|
||||
return e1->dest;
|
||||
if (e1->flags & EDGE_COMPLEX)
|
||||
return e0->dest;
|
||||
|
||||
return bb;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Walk the function tree removing unnecessary statements.
|
||||
|
||||
* Empty statement nodes are removed
|
||||
|
@ -487,6 +487,7 @@ extern bool is_ctrl_stmt (tree);
|
||||
extern bool is_ctrl_altering_stmt (tree);
|
||||
extern bool computed_goto_p (tree);
|
||||
extern bool simple_goto_p (tree);
|
||||
extern basic_block single_noncomplex_succ (basic_block bb);
|
||||
extern void tree_dump_bb (basic_block, FILE *, int);
|
||||
extern void debug_tree_bb (basic_block);
|
||||
extern basic_block debug_tree_bb_n (int);
|
||||
|
@ -35,7 +35,55 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
z = z * rmodulus;
|
||||
|
||||
We do this for loop invariant divisors, and with this pass whenever
|
||||
we notice that a division has the same divisor multiple times. */
|
||||
we notice that a division has the same divisor multiple times.
|
||||
|
||||
Of course, like in PRE, we don't insert a division if a dominator
|
||||
already has one. However, this cannot be done as an extension of
|
||||
PRE for several reasons.
|
||||
|
||||
First of all, with some experiments it was found out that the
|
||||
transformation is not always useful if there are only two divisions
|
||||
hy the same divisor. This is probably because modern processors
|
||||
can pipeline the divisions; on older, in-order processors it should
|
||||
still be effective to optimize two divisions by the same number.
|
||||
We make this a param, and it shall be called N in the remainder of
|
||||
this comment.
|
||||
|
||||
Second, if trapping math is active, we have less freedom on where
|
||||
to insert divisions: we can only do so in basic blocks that already
|
||||
contain one. (If divisions don't trap, instead, we can insert
|
||||
divisions elsewhere, which will be in blocks that are common dominators
|
||||
of those that have the division).
|
||||
|
||||
We really don't want to compute the reciprocal unless a division will
|
||||
be found. To do this, we won't insert the division in a basic block
|
||||
that has less than N divisions *post-dominating* it.
|
||||
|
||||
The algorithm constructs a subset of the dominator tree, holding the
|
||||
blocks containing the divisions and the common dominators to them,
|
||||
and walk it twice. The first walk is in post-order, and it annotates
|
||||
each block with the number of divisions that post-dominate it: this
|
||||
gives information on where divisions can be inserted profitably.
|
||||
The second walk is in pre-order, and it inserts divisions as explained
|
||||
above, and replaces divisions by multiplications.
|
||||
|
||||
In the best case, the cost of the pass is O(n_statements). In the
|
||||
worst-case, the cost is due to creating the dominator tree subset,
|
||||
with a cost of O(n_basic_blocks ^ 2); however this can only happen
|
||||
for n_statements / n_basic_blocks statements. So, the amortized cost
|
||||
of creating the dominator tree subset is O(n_basic_blocks) and the
|
||||
worst-case cost of the pass is O(n_statements * n_basic_blocks).
|
||||
|
||||
More practically, the cost will be small because there are few
|
||||
divisions, and they tend to be in the same basic block, so insert_bb
|
||||
is called very few times.
|
||||
|
||||
If we did this using domwalk.c, an efficient implementation would have
|
||||
to work on all the variables in a single pass, because we could not
|
||||
work on just a subset of the dominator tree, as we do now, and the
|
||||
cost would also be something like O(n_statements * n_basic_blocks).
|
||||
The data structures would be more complex in order to work on all the
|
||||
variables in a single pass. */
|
||||
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
@ -47,6 +95,348 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
#include "real.h"
|
||||
#include "timevar.h"
|
||||
#include "tree-pass.h"
|
||||
#include "alloc-pool.h"
|
||||
#include "basic-block.h"
|
||||
#include "target.h"
|
||||
|
||||
|
||||
/* This structure represents one basic block that either computes a
|
||||
division, or is a common dominator for basic block that compute a
|
||||
division. */
|
||||
struct occurrence {
|
||||
/* The basic block represented by this structure. */
|
||||
basic_block bb;
|
||||
|
||||
/* If non-NULL, the SSA_NAME holding the definition for a reciprocal
|
||||
inserted in BB. */
|
||||
tree recip_def;
|
||||
|
||||
/* If non-NULL, the MODIFY_EXPR for a reciprocal computation that
|
||||
was inserted in BB. */
|
||||
tree recip_def_stmt;
|
||||
|
||||
/* Pointer to a list of "struct occurrence"s for blocks dominated
|
||||
by BB. */
|
||||
struct occurrence *children;
|
||||
|
||||
/* Pointer to the next "struct occurrence"s in the list of blocks
|
||||
sharing a common dominator. */
|
||||
struct occurrence *next;
|
||||
|
||||
/* The number of divisions that are in BB before compute_merit. The
|
||||
number of divisions that are in BB or post-dominate it after
|
||||
compute_merit. */
|
||||
int num_divisions;
|
||||
|
||||
/* True if the basic block has a division, false if it is a common
|
||||
dominator for basic blocks that do. If it is false and trapping
|
||||
math is active, BB is not a candidate for inserting a reciprocal. */
|
||||
bool bb_has_division;
|
||||
};
|
||||
|
||||
|
||||
/* The instance of "struct occurrence" representing the highest
|
||||
interesting block in the dominator tree. */
|
||||
static struct occurrence *occ_head;
|
||||
|
||||
/* Allocation pool for getting instances of "struct occurrence". */
|
||||
static alloc_pool occ_pool;
|
||||
|
||||
|
||||
|
||||
/* Allocate and return a new struct occurrence for basic block BB, and
|
||||
whose children list is headed by CHILDREN. */
|
||||
static struct occurrence *
|
||||
occ_new (basic_block bb, struct occurrence *children)
|
||||
{
|
||||
struct occurrence *occ;
|
||||
|
||||
occ = bb->aux = pool_alloc (occ_pool);
|
||||
memset (occ, 0, sizeof (struct occurrence));
|
||||
|
||||
occ->bb = bb;
|
||||
occ->children = children;
|
||||
return occ;
|
||||
}
|
||||
|
||||
|
||||
/* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
|
||||
list of "struct occurrence"s, one per basic block, having IDOM as
|
||||
their common dominator.
|
||||
|
||||
We try to insert NEW_OCC as deep as possible in the tree, and we also
|
||||
insert any other block that is a common dominator for BB and one
|
||||
block already in the tree. */
|
||||
|
||||
static void
|
||||
insert_bb (struct occurrence *new_occ, basic_block idom,
|
||||
struct occurrence **p_head)
|
||||
{
|
||||
struct occurrence *occ, **p_occ;
|
||||
|
||||
for (p_occ = p_head; (occ = *p_occ) != NULL; )
|
||||
{
|
||||
basic_block bb = new_occ->bb, occ_bb = occ->bb;
|
||||
basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
|
||||
if (dom == bb)
|
||||
{
|
||||
/* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
|
||||
from its list. */
|
||||
*p_occ = occ->next;
|
||||
occ->next = new_occ->children;
|
||||
new_occ->children = occ;
|
||||
|
||||
/* Try the next block (it may as well be dominated by BB). */
|
||||
}
|
||||
|
||||
else if (dom == occ_bb)
|
||||
{
|
||||
/* OCC_BB dominates BB. Tail recurse to look deeper. */
|
||||
insert_bb (new_occ, dom, &occ->children);
|
||||
return;
|
||||
}
|
||||
|
||||
else if (dom != idom)
|
||||
{
|
||||
gcc_assert (!dom->aux);
|
||||
|
||||
/* There is a dominator between IDOM and BB, add it and make
|
||||
two children out of NEW_OCC and OCC. First, remove OCC from
|
||||
its list. */
|
||||
*p_occ = occ->next;
|
||||
new_occ->next = occ;
|
||||
occ->next = NULL;
|
||||
|
||||
/* None of the previous blocks has DOM as a dominator: if we tail
|
||||
recursed, we would reexamine them uselessly. Just switch BB with
|
||||
DOM, and go on looking for blocks dominated by DOM. */
|
||||
new_occ = occ_new (dom, new_occ);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
/* Nothing special, go on with the next element. */
|
||||
p_occ = &occ->next;
|
||||
}
|
||||
}
|
||||
|
||||
/* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
|
||||
new_occ->next = *p_head;
|
||||
*p_head = new_occ;
|
||||
}
|
||||
|
||||
/* Register that we found a division in BB. */
|
||||
|
||||
static inline void
|
||||
register_division_in (basic_block bb)
|
||||
{
|
||||
struct occurrence *occ;
|
||||
|
||||
occ = (struct occurrence *) bb->aux;
|
||||
if (!occ)
|
||||
{
|
||||
occ = occ_new (bb, NULL);
|
||||
insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head);
|
||||
}
|
||||
|
||||
occ->bb_has_division = true;
|
||||
occ->num_divisions++;
|
||||
}
|
||||
|
||||
|
||||
/* Compute the number of divisions that postdominate each block in OCC and
|
||||
its children. */
|
||||
|
||||
static void
|
||||
compute_merit (struct occurrence *occ)
|
||||
{
|
||||
struct occurrence *occ_child;
|
||||
basic_block dom = occ->bb;
|
||||
|
||||
for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
|
||||
{
|
||||
basic_block bb;
|
||||
if (occ_child->children)
|
||||
compute_merit (occ_child);
|
||||
|
||||
if (flag_exceptions)
|
||||
bb = single_noncomplex_succ (dom);
|
||||
else
|
||||
bb = dom;
|
||||
|
||||
if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
|
||||
occ->num_divisions += occ_child->num_divisions;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Return whether USE_STMT is a floating-point division by DEF. */
|
||||
static inline bool
|
||||
is_division_by (tree use_stmt, tree def)
|
||||
{
|
||||
return TREE_CODE (use_stmt) == MODIFY_EXPR
|
||||
&& TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
|
||||
&& TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def;
|
||||
}
|
||||
|
||||
/* Walk the subset of the dominator tree rooted at OCC, setting the
|
||||
RECIP_DEF field to a definition of 1.0 / DEF that can be used in
|
||||
the given basic block. The field may be left NULL, of course,
|
||||
if it is not possible or profitable to do the optimization.
|
||||
|
||||
DEF_BSI is an iterator pointing at the statement defining DEF.
|
||||
If RECIP_DEF is set, a dominator already has a computation that can
|
||||
be used. */
|
||||
|
||||
static void
|
||||
insert_reciprocals (block_stmt_iterator *def_bsi, struct occurrence *occ,
|
||||
tree def, tree recip_def, int threshold)
|
||||
{
|
||||
tree type, new_stmt;
|
||||
block_stmt_iterator bsi;
|
||||
struct occurrence *occ_child;
|
||||
|
||||
if (!recip_def
|
||||
&& (occ->bb_has_division || !flag_trapping_math)
|
||||
&& occ->num_divisions >= threshold)
|
||||
{
|
||||
/* Make a variable with the replacement and substitute it. */
|
||||
type = TREE_TYPE (def);
|
||||
recip_def = make_rename_temp (type, "reciptmp");
|
||||
new_stmt = build2 (MODIFY_EXPR, void_type_node, recip_def,
|
||||
fold_build2 (RDIV_EXPR, type,
|
||||
build_real (type, dconst1), def));
|
||||
|
||||
|
||||
if (occ->bb_has_division)
|
||||
{
|
||||
/* Case 1: insert before an existing division. */
|
||||
bsi = bsi_after_labels (occ->bb);
|
||||
while (!bsi_end_p (bsi) && !is_division_by (bsi_stmt (bsi), def))
|
||||
bsi_next (&bsi);
|
||||
|
||||
bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
|
||||
}
|
||||
else if (def_bsi && occ->bb == def_bsi->bb)
|
||||
{
|
||||
/* Case 2: insert right after the definition. Note that this will
|
||||
never happen if the definition statement can throw, because in
|
||||
that case the sole successor of the statement's basic block will
|
||||
dominate all the uses as well. */
|
||||
bsi_insert_after (def_bsi, new_stmt, BSI_NEW_STMT);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Case 3: insert in a basic block not containing defs/uses. */
|
||||
bsi = bsi_after_labels (occ->bb);
|
||||
bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
|
||||
}
|
||||
|
||||
occ->recip_def_stmt = new_stmt;
|
||||
}
|
||||
|
||||
occ->recip_def = recip_def;
|
||||
for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
|
||||
insert_reciprocals (def_bsi, occ_child, def, recip_def, threshold);
|
||||
}
|
||||
|
||||
|
||||
/* Replace the division at USE_P with a multiplication by the reciprocal, if
|
||||
possible. */
|
||||
|
||||
static inline void
|
||||
replace_reciprocal (use_operand_p use_p)
|
||||
{
|
||||
tree use_stmt = USE_STMT (use_p);
|
||||
basic_block bb = bb_for_stmt (use_stmt);
|
||||
struct occurrence *occ = (struct occurrence *) bb->aux;
|
||||
|
||||
if (occ->recip_def && use_stmt != occ->recip_def_stmt)
|
||||
{
|
||||
TREE_SET_CODE (TREE_OPERAND (use_stmt, 1), MULT_EXPR);
|
||||
SET_USE (use_p, occ->recip_def);
|
||||
fold_stmt_inplace (use_stmt);
|
||||
update_stmt (use_stmt);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Free OCC and return one more "struct occurrence" to be freed. */
|
||||
|
||||
static struct occurrence *
|
||||
free_bb (struct occurrence *occ)
|
||||
{
|
||||
struct occurrence *child, *next;
|
||||
|
||||
/* First get the two pointers hanging off OCC. */
|
||||
next = occ->next;
|
||||
child = occ->children;
|
||||
occ->bb->aux = NULL;
|
||||
pool_free (occ_pool, occ);
|
||||
|
||||
/* Now ensure that we don't recurse unless it is necessary. */
|
||||
if (!child)
|
||||
return next;
|
||||
else
|
||||
{
|
||||
while (next)
|
||||
next = free_bb (next);
|
||||
|
||||
return child;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Look for floating-point divisions among DEF's uses, and try to
|
||||
replace them by multiplications with the reciprocal. Add
|
||||
as many statements computing the reciprocal as needed.
|
||||
|
||||
DEF must be a GIMPLE register of a floating-point type. */
|
||||
|
||||
static void
|
||||
execute_cse_reciprocals_1 (block_stmt_iterator *def_bsi, tree def)
|
||||
{
|
||||
use_operand_p use_p;
|
||||
imm_use_iterator use_iter;
|
||||
struct occurrence *occ;
|
||||
int count = 0, threshold;
|
||||
|
||||
gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
|
||||
|
||||
FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
|
||||
{
|
||||
tree use_stmt = USE_STMT (use_p);
|
||||
if (is_division_by (use_stmt, def))
|
||||
{
|
||||
register_division_in (bb_for_stmt (use_stmt));
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Do the expensive part only if we can hope to optimize something. */
|
||||
threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
|
||||
if (count >= threshold)
|
||||
{
|
||||
for (occ = occ_head; occ; occ = occ->next)
|
||||
{
|
||||
compute_merit (occ);
|
||||
insert_reciprocals (def_bsi, occ, def, NULL, threshold);
|
||||
}
|
||||
|
||||
FOR_EACH_IMM_USE_SAFE (use_p, use_iter, def)
|
||||
{
|
||||
tree use_stmt = USE_STMT (use_p);
|
||||
if (is_division_by (use_stmt, def))
|
||||
replace_reciprocal (use_p);
|
||||
}
|
||||
}
|
||||
|
||||
for (occ = occ_head; occ; )
|
||||
occ = free_bb (occ);
|
||||
|
||||
occ_head = NULL;
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
gate_cse_reciprocals (void)
|
||||
@ -54,135 +444,58 @@ gate_cse_reciprocals (void)
|
||||
return optimize && !optimize_size && flag_unsafe_math_optimizations;
|
||||
}
|
||||
|
||||
/* Where to put the statement computing a reciprocal. */
|
||||
enum place_reciprocal
|
||||
{
|
||||
PR_BEFORE_BSI, /* Put it using bsi_insert_before. */
|
||||
PR_AFTER_BSI, /* Put it using bsi_insert_after. */
|
||||
PR_ON_ENTRY_EDGE /* Put it on the edge between the entry
|
||||
and the first basic block. */
|
||||
};
|
||||
|
||||
/* Check if DEF's uses include more than one floating-point division,
|
||||
and if so replace them by multiplications with the reciprocal. Add
|
||||
the statement computing the reciprocal according to WHERE.
|
||||
|
||||
Does not check the type of DEF, nor that DEF is a GIMPLE register.
|
||||
This is done in the caller for speed, because otherwise this routine
|
||||
would be called for every definition and phi node. */
|
||||
static void
|
||||
execute_cse_reciprocals_1 (block_stmt_iterator *bsi, tree def,
|
||||
enum place_reciprocal where)
|
||||
{
|
||||
use_operand_p use_p;
|
||||
imm_use_iterator use_iter;
|
||||
tree t, new_stmt, type;
|
||||
int count = 0;
|
||||
bool ok = !flag_trapping_math;
|
||||
|
||||
/* Find uses. */
|
||||
FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
|
||||
{
|
||||
tree use_stmt = USE_STMT (use_p);
|
||||
if (TREE_CODE (use_stmt) == MODIFY_EXPR
|
||||
&& TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
|
||||
&& TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
|
||||
{
|
||||
++count;
|
||||
/* Check if this use post-dominates the insertion point. */
|
||||
if (ok || dominated_by_p (CDI_POST_DOMINATORS, bsi->bb,
|
||||
bb_for_stmt (use_stmt)))
|
||||
ok = true;
|
||||
}
|
||||
if (count >= 2 && ok)
|
||||
break;
|
||||
}
|
||||
|
||||
if (count < 2 || !ok)
|
||||
return;
|
||||
|
||||
/* Make a variable with the replacement and substitute it. */
|
||||
type = TREE_TYPE (def);
|
||||
t = make_rename_temp (type, "reciptmp");
|
||||
new_stmt = build2 (MODIFY_EXPR, void_type_node, t,
|
||||
fold_build2 (RDIV_EXPR, type, build_real (type, dconst1),
|
||||
def));
|
||||
|
||||
if (where == PR_BEFORE_BSI)
|
||||
bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
|
||||
else if (where == PR_AFTER_BSI)
|
||||
bsi_insert_after (bsi, new_stmt, BSI_NEW_STMT);
|
||||
else if (where == PR_ON_ENTRY_EDGE)
|
||||
bsi_insert_on_edge (single_succ_edge (ENTRY_BLOCK_PTR), new_stmt);
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
FOR_EACH_IMM_USE_SAFE (use_p, use_iter, def)
|
||||
{
|
||||
tree use_stmt = USE_STMT (use_p);
|
||||
if (use_stmt != new_stmt
|
||||
&& TREE_CODE (use_stmt) == MODIFY_EXPR
|
||||
&& TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
|
||||
&& TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
|
||||
{
|
||||
TREE_SET_CODE (TREE_OPERAND (use_stmt, 1), MULT_EXPR);
|
||||
SET_USE (use_p, t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Go through all the floating-point SSA_NAMEs, and call
|
||||
execute_cse_reciprocals_1 on each of them. */
|
||||
static void
|
||||
execute_cse_reciprocals (void)
|
||||
{
|
||||
basic_block bb;
|
||||
tree arg;
|
||||
|
||||
if (flag_trapping_math)
|
||||
calculate_dominance_info (CDI_POST_DOMINATORS);
|
||||
occ_pool = create_alloc_pool ("dominators for recip",
|
||||
sizeof (struct occurrence),
|
||||
n_basic_blocks / 3 + 1);
|
||||
|
||||
if (single_succ_p (ENTRY_BLOCK_PTR))
|
||||
for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
|
||||
if (default_def (arg))
|
||||
{
|
||||
block_stmt_iterator bsi;
|
||||
bsi = bsi_start (single_succ (ENTRY_BLOCK_PTR));
|
||||
execute_cse_reciprocals_1 (&bsi, default_def (arg),
|
||||
PR_ON_ENTRY_EDGE);
|
||||
}
|
||||
calculate_dominance_info (CDI_DOMINATORS | CDI_POST_DOMINATORS);
|
||||
|
||||
#ifdef ENABLE_CHECKING
|
||||
FOR_EACH_BB (bb)
|
||||
gcc_assert (!bb->aux);
|
||||
#endif
|
||||
|
||||
for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
|
||||
if (default_def (arg)
|
||||
&& FLOAT_TYPE_P (TREE_TYPE (arg))
|
||||
&& is_gimple_reg (arg))
|
||||
execute_cse_reciprocals_1 (NULL, default_def (arg));
|
||||
|
||||
FOR_EACH_BB (bb)
|
||||
{
|
||||
block_stmt_iterator bsi;
|
||||
tree phi, def;
|
||||
for (bsi = bsi_start (bb);
|
||||
!bsi_end_p (bsi) && TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR;
|
||||
bsi_next (&bsi))
|
||||
;
|
||||
|
||||
for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
|
||||
{
|
||||
def = PHI_RESULT (phi);
|
||||
if (FLOAT_TYPE_P (TREE_TYPE (def))
|
||||
&& is_gimple_reg (def))
|
||||
execute_cse_reciprocals_1 (&bsi, def, PR_BEFORE_BSI);
|
||||
execute_cse_reciprocals_1 (NULL, def);
|
||||
}
|
||||
|
||||
for (; !bsi_end_p (bsi); bsi_next (&bsi))
|
||||
for (bsi = bsi_after_labels (bb); !bsi_end_p (bsi); bsi_next (&bsi))
|
||||
{
|
||||
tree stmt = bsi_stmt (bsi);
|
||||
if (TREE_CODE (stmt) == MODIFY_EXPR
|
||||
&& (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
|
||||
&& FLOAT_TYPE_P (TREE_TYPE (def))
|
||||
&& TREE_CODE (def) == SSA_NAME)
|
||||
execute_cse_reciprocals_1 (&bsi, def, PR_AFTER_BSI);
|
||||
execute_cse_reciprocals_1 (&bsi, def);
|
||||
}
|
||||
}
|
||||
|
||||
if (flag_trapping_math)
|
||||
free_dominance_info (CDI_POST_DOMINATORS);
|
||||
|
||||
if (single_succ_p (ENTRY_BLOCK_PTR))
|
||||
bsi_commit_one_edge_insert (single_succ_edge (ENTRY_BLOCK_PTR), NULL);
|
||||
free_dominance_info (CDI_DOMINATORS | CDI_POST_DOMINATORS);
|
||||
free_alloc_pool (occ_pool);
|
||||
}
|
||||
|
||||
struct tree_opt_pass pass_cse_reciprocals =
|
||||
|
Loading…
Reference in New Issue
Block a user