mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-31 15:11:04 +08:00
invoke.texi: Document -ftree-loop-distribution.
* doc/invoke.texi: Document -ftree-loop-distribution. * tree-loop-distribution.c: New. * tree-pass.h (pass_loop_distribution): New. * graphds.h (struct graph): Add htab_t indices. * timevar.def (TV_TREE_LOOP_DISTRIBUTION): New. * tree-vectorizer.c (rename_variables_in_loop): Extern. (slpeel_tree_duplicate_loop_to_edge_cfg): Init PENDING_STMT to NULL. * tree-vectorizer.h (tree_duplicate_loop_on_edge): Declared. * tree-data-ref.c (debug_data_dependence_relations): New. (dump_data_dependence_relation): Also print data references. (free_data_ref): Extern. (same_access_functions): Moved... (find_vertex_for_stmt): Renamed rdg_vertex_for_stmt. (dump_rdg_vertex, debug_rdg_vertex, dump_rdg_component, debug_rdg_component, dump_rdg, debug_rdg, dot_rdg_1, dot_rdg, struct rdg_vertex_info, rdg_vertex_for_stmt): New. (create_rdg_edge_for_ddr, create_rdg_vertices): Cleaned up. (stmts_from_loop): Skip LABEL_EXPR. (hash_stmt_vertex_info, eq_stmt_vertex_info, hash_stmt_vertex_del): New. (build_rdg): Initialize rdg->indices htab. (free_rdg, stores_from_loop, ref_base_address, rdg_defs_used_in_other_loops_p, have_similar_memory_accesses, have_similar_memory_accesses_1, ref_base_address_1, remove_similar_memory_refs): New. * tree-data-ref.h: Depend on tree-chrec.h. (debug_data_dependence_relations, free_data_ref): Declared. (same_access_functions): ... here. (ddr_is_anti_dependent, ddrs_have_anti_deps, ddr_dependence_level): New. (struct rdg_vertex): Add has_mem_write and has_mem_reads. (RDGV_HAS_MEM_WRITE, RDGV_HAS_MEM_READS, RDG_STMT, RDG_MEM_WRITE_STMT, RDG_MEM_READS_STMT): New. (dump_rdg_vertex, debug_rdg_vertex, dump_rdg_component, debug_rdg_component, dump_rdg, debug_rdg, dot_rdg, rdg_vertex_for_stmt): Declared. (struct rdg_edge): Add level. (RDGE_LEVEL): New. (free_rdg, stores_from_loop, remove_similar_memory_refs, rdg_defs_used_in_other_loops_p, have_similar_memory_accesses): Declared. (rdg_has_similar_memory_accesses): New. * tree-vect-analyze.c: Remove unused static decls. * lambda.h (dependence_level): New. * common.opt (ftree-loop-distribution): New. * tree-flow.h (mark_virtual_ops_in_bb, slpeel_tree_duplicate_loop_to_edge_cfg, rename_variables_in_loop): Declared. * Makefile.in (TREE_DATA_REF_H): Depend on tree-chrec.h. (OBJS-common): Add tree-loop-distribution.o. (tree-loop-distribution.o): New rule. * tree-cfg.c (mark_virtual_ops_in_bb): New. (mark_virtual_ops_in_region): Use mark_virtual_ops_in_bb. * passes.c (init_optimization_passes): Schedule pass_loop_distribution. * testsuite/gcc.dg/tree-ssa/ldist-{1..12}.c: New. From-SVN: r132745
This commit is contained in:
parent
dde75838e9
commit
dea61d9282
@ -1,3 +1,57 @@
|
||||
2008-02-28 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
* doc/invoke.texi: Document -ftree-loop-distribution.
|
||||
* tree-loop-distribution.c: New.
|
||||
* tree-pass.h (pass_loop_distribution): New.
|
||||
* graphds.h (struct graph): Add htab_t indices.
|
||||
* timevar.def (TV_TREE_LOOP_DISTRIBUTION): New.
|
||||
* tree-vectorizer.c (rename_variables_in_loop): Extern.
|
||||
(slpeel_tree_duplicate_loop_to_edge_cfg): Init PENDING_STMT to NULL.
|
||||
* tree-vectorizer.h (tree_duplicate_loop_on_edge): Declared.
|
||||
* tree-data-ref.c (debug_data_dependence_relations): New.
|
||||
(dump_data_dependence_relation): Also print data references.
|
||||
(free_data_ref): Extern.
|
||||
(same_access_functions): Moved...
|
||||
(find_vertex_for_stmt): Renamed rdg_vertex_for_stmt.
|
||||
(dump_rdg_vertex, debug_rdg_vertex, dump_rdg_component,
|
||||
debug_rdg_component, dump_rdg, debug_rdg, dot_rdg_1, dot_rdg,
|
||||
struct rdg_vertex_info, rdg_vertex_for_stmt): New.
|
||||
(create_rdg_edge_for_ddr, create_rdg_vertices): Cleaned up.
|
||||
(stmts_from_loop): Skip LABEL_EXPR.
|
||||
(hash_stmt_vertex_info, eq_stmt_vertex_info, hash_stmt_vertex_del): New.
|
||||
(build_rdg): Initialize rdg->indices htab.
|
||||
(free_rdg, stores_from_loop, ref_base_address,
|
||||
rdg_defs_used_in_other_loops_p, have_similar_memory_accesses,
|
||||
have_similar_memory_accesses_1, ref_base_address_1,
|
||||
remove_similar_memory_refs): New.
|
||||
* tree-data-ref.h: Depend on tree-chrec.h.
|
||||
(debug_data_dependence_relations, free_data_ref): Declared.
|
||||
(same_access_functions): ... here.
|
||||
(ddr_is_anti_dependent, ddrs_have_anti_deps, ddr_dependence_level): New.
|
||||
(struct rdg_vertex): Add has_mem_write and has_mem_reads.
|
||||
(RDGV_HAS_MEM_WRITE, RDGV_HAS_MEM_READS, RDG_STMT,
|
||||
RDG_MEM_WRITE_STMT, RDG_MEM_READS_STMT): New.
|
||||
(dump_rdg_vertex, debug_rdg_vertex, dump_rdg_component,
|
||||
debug_rdg_component, dump_rdg, debug_rdg, dot_rdg,
|
||||
rdg_vertex_for_stmt): Declared.
|
||||
(struct rdg_edge): Add level.
|
||||
(RDGE_LEVEL): New.
|
||||
(free_rdg, stores_from_loop, remove_similar_memory_refs,
|
||||
rdg_defs_used_in_other_loops_p, have_similar_memory_accesses): Declared.
|
||||
(rdg_has_similar_memory_accesses): New.
|
||||
* tree-vect-analyze.c: Remove unused static decls.
|
||||
* lambda.h (dependence_level): New.
|
||||
* common.opt (ftree-loop-distribution): New.
|
||||
* tree-flow.h (mark_virtual_ops_in_bb,
|
||||
slpeel_tree_duplicate_loop_to_edge_cfg,
|
||||
rename_variables_in_loop): Declared.
|
||||
* Makefile.in (TREE_DATA_REF_H): Depend on tree-chrec.h.
|
||||
(OBJS-common): Add tree-loop-distribution.o.
|
||||
(tree-loop-distribution.o): New rule.
|
||||
* tree-cfg.c (mark_virtual_ops_in_bb): New.
|
||||
(mark_virtual_ops_in_region): Use mark_virtual_ops_in_bb.
|
||||
* passes.c (init_optimization_passes): Schedule pass_loop_distribution.
|
||||
|
||||
2008-02-28 Joseph Myers <joseph@codesourcery.com>
|
||||
|
||||
PR target/33963
|
||||
|
@ -838,7 +838,7 @@ DIAGNOSTIC_H = diagnostic.h diagnostic.def $(PRETTY_PRINT_H) options.h
|
||||
C_PRETTY_PRINT_H = c-pretty-print.h $(PRETTY_PRINT_H) $(C_COMMON_H) $(TREE_H)
|
||||
SCEV_H = tree-scalar-evolution.h $(GGC_H) tree-chrec.h $(PARAMS_H)
|
||||
LAMBDA_H = lambda.h $(TREE_H) vec.h $(GGC_H)
|
||||
TREE_DATA_REF_H = tree-data-ref.h $(LAMBDA_H) omega.h graphds.h
|
||||
TREE_DATA_REF_H = tree-data-ref.h $(LAMBDA_H) omega.h graphds.h tree-chrec.h
|
||||
VARRAY_H = varray.h $(MACHMODE_H) $(SYSTEM_H) coretypes.h $(TM_H)
|
||||
TREE_INLINE_H = tree-inline.h $(VARRAY_H) pointer-set.h
|
||||
REAL_H = real.h $(MACHMODE_H)
|
||||
@ -1156,6 +1156,7 @@ OBJS-common = \
|
||||
tree-if-conv.o \
|
||||
tree-into-ssa.o \
|
||||
tree-iterator.o \
|
||||
tree-loop-distribution.o \
|
||||
tree-loop-linear.o \
|
||||
tree-nested.o \
|
||||
tree-nrv.o \
|
||||
@ -2283,6 +2284,11 @@ tree-loop-linear.o: tree-loop-linear.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
||||
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
|
||||
tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) $(LAMBDA_H) \
|
||||
$(TARGET_H) tree-chrec.h $(OBSTACK_H)
|
||||
tree-loop-distribution.o: tree-loop-distribution.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
||||
$(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \
|
||||
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
|
||||
tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) \
|
||||
$(TARGET_H) tree-chrec.h tree-vectorizer.h
|
||||
tree-parloops.o: tree-parloops.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
|
||||
$(TREE_FLOW_H) $(TREE_H) $(RTL_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) $(GGC_H) \
|
||||
$(DIAGNOSTIC_H) tree-pass.h $(SCEV_H) langhooks.h gt-tree-parloops.h \
|
||||
|
@ -1098,6 +1098,10 @@ ftree-fre
|
||||
Common Report Var(flag_tree_fre) Optimization
|
||||
Enable Full Redundancy Elimination (FRE) on trees
|
||||
|
||||
ftree-loop-distribution
|
||||
Common Report Var(flag_tree_loop_distribution)
|
||||
Enable loop distribution on trees
|
||||
|
||||
ftree-loop-im
|
||||
Common Report Var(flag_tree_loop_im) Init(1) Optimization
|
||||
Enable loop invariant motion on trees
|
||||
|
@ -354,6 +354,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-fstrict-aliasing -fstrict-overflow -fthread-jumps -ftracer -ftree-ccp @gol
|
||||
-ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-dce @gol
|
||||
-ftree-dominator-opts -ftree-dse -ftree-fre -ftree-loop-im @gol
|
||||
-ftree-loop-distribution @gol
|
||||
-ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
|
||||
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-reassoc -ftree-salias @gol
|
||||
-ftree-sink -ftree-sra -ftree-store-ccp -ftree-ter @gol
|
||||
@ -5928,6 +5929,11 @@ performance and allow further loop optimizations to take place.
|
||||
Compare the results of several data dependence analyzers. This option
|
||||
is used for debugging the data dependence analyzers.
|
||||
|
||||
@item -ftree-loop-distribution
|
||||
Perform loop distribution. This flag can improve cache performance on
|
||||
big loop bodies and allow further loop optimizations, like
|
||||
parallelization or vectorization, to take place.
|
||||
|
||||
@item -ftree-loop-im
|
||||
@opindex ftree-loop-im
|
||||
Perform loop invariant motion on trees. This pass moves only invariants that
|
||||
|
@ -47,6 +47,7 @@ struct graph
|
||||
int n_vertices; /* Number of vertices. */
|
||||
struct vertex *vertices;
|
||||
/* The vertices. */
|
||||
htab_t indices; /* Fast lookup for indices. */
|
||||
};
|
||||
|
||||
struct graph *new_graph (int);
|
||||
|
17
gcc/lambda.h
17
gcc/lambda.h
@ -469,5 +469,22 @@ build_linear_expr (tree type, lambda_vector coefs, VEC (tree, heap) *ivs)
|
||||
return expr;
|
||||
}
|
||||
|
||||
/* Returns the dependence level for a vector DIST of size LENGTH.
|
||||
LEVEL = 0 means a lexicographic dependence, i.e. a dependence due
|
||||
to the sequence of statements, not carried by any loop. */
|
||||
|
||||
|
||||
static inline unsigned
|
||||
dependence_level (lambda_vector dist_vect, int length)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < length; i++)
|
||||
if (dist_vect[i] != 0)
|
||||
return i + 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* LAMBDA_H */
|
||||
|
||||
|
@ -625,6 +625,7 @@ init_optimization_passes (void)
|
||||
NEXT_PASS (pass_empty_loop);
|
||||
NEXT_PASS (pass_record_bounds);
|
||||
NEXT_PASS (pass_check_data_deps);
|
||||
NEXT_PASS (pass_loop_distribution);
|
||||
NEXT_PASS (pass_linear_transform);
|
||||
NEXT_PASS (pass_iv_canon);
|
||||
NEXT_PASS (pass_if_conversion);
|
||||
|
@ -1,3 +1,20 @@
|
||||
2008-02-28 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-1.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-1a.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-2.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-3.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-4.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-5.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-6.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-7.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-8.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-9.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-10.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-11.c: New.
|
||||
* testsuite/gcc.dg/tree-ssa/ldist-12.c: New.
|
||||
* testsuite/gfortran.dg/ldist-1.f90: New.
|
||||
|
||||
2008-02-28 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* gcc.dg/pr34351.c: Compile for x86 targets only. Use %ebx register.
|
||||
|
38
gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c
Normal file
38
gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c
Normal file
@ -0,0 +1,38 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
void foo (int * __restrict__ ia,
|
||||
int * __restrict__ ib,
|
||||
int * __restrict__ oxa,
|
||||
int * __restrict__ oxb,
|
||||
int * __restrict__ oya,
|
||||
int * __restrict__ oyb)
|
||||
{
|
||||
int i;
|
||||
long int mya[52];
|
||||
long int myb[52];
|
||||
|
||||
for (i=0; i < 52; i++)
|
||||
{
|
||||
mya[i] = ia[i] * oxa[i] + ib[i] * oxb[i];
|
||||
myb[i] = -ia[i] * oxb[i] + ib[i] * oxa[i];
|
||||
oya[i] = mya[i] >> 10;
|
||||
oyb[i] = myb[i] >> 10;
|
||||
}
|
||||
|
||||
/* This loop was distributed, but it is not anymore due to the cost
|
||||
model changes: the result of a distribution would look like this:
|
||||
|
||||
| for (i=0; i < 52; i++)
|
||||
| oya[i] = ia[i] * oxa[i] + ib[i] * oxb[i] >> 10;
|
||||
|
|
||||
| for (i=0; i < 52; i++)
|
||||
| oyb[i] = -ia[i] * oxb[i] + ib[i] * oxa[i] >> 10;
|
||||
|
||||
and in this the array IA is read in both tasks. For maximizing
|
||||
the cache reuse, ldist does not distributes this loop anymore.
|
||||
*/
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
24
gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c
Normal file
24
gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c
Normal file
@ -0,0 +1,24 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
int loop1 (int k)
|
||||
{
|
||||
unsigned int i;
|
||||
int a[1000], b[1000], c[1000];
|
||||
|
||||
for (i = 1; i < 1000; i ++)
|
||||
{
|
||||
a[i] = c[i]; /* S1 */
|
||||
b[i] = a[i-1]+1; /* S2 */
|
||||
}
|
||||
/* Dependences:
|
||||
S1->S2 (flow, level 1)
|
||||
|
||||
One partition as A is used in both S1 and S2.
|
||||
*/
|
||||
|
||||
return a[1000-2] + b[1000-1] + c[1000-2];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
33
gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c
Normal file
33
gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c
Normal file
@ -0,0 +1,33 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
void foo (int * __restrict__ ia,
|
||||
int * __restrict__ ib,
|
||||
int * __restrict__ oxa,
|
||||
int * __restrict__ oxb,
|
||||
int * __restrict__ oya,
|
||||
int * __restrict__ oyb)
|
||||
{
|
||||
int i;
|
||||
long int mya[52];
|
||||
long int myb[52];
|
||||
|
||||
for (i=0; i < 52; i++)
|
||||
{
|
||||
mya[i] = ia[i] * oxa[i] + ib[i] * oxb[i];
|
||||
myb[i] = -ia[i] * oxb[i] + ib[i] * oxa[i];
|
||||
oya[i] = 0;
|
||||
oyb[i] = myb[i] >> 10;
|
||||
}
|
||||
|
||||
/* This loop should be distributed, and the result should look like
|
||||
this:
|
||||
| memset (oya, 0, 208);
|
||||
| for (i=0; i < 52; i++)
|
||||
| oyb[i] = -ia[i] * oxb[i] + ib[i] * oxa[i] >> 10;
|
||||
*/
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
|
||||
/* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
22
gcc/testsuite/gcc.dg/tree-ssa/ldist-12.c
Normal file
22
gcc/testsuite/gcc.dg/tree-ssa/ldist-12.c
Normal file
@ -0,0 +1,22 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
int foo (int * __restrict__ ia,
|
||||
int * __restrict__ ib,
|
||||
int * __restrict__ oxa,
|
||||
int * __restrict__ oxb)
|
||||
{
|
||||
int i;
|
||||
int oya[52], oyb[52];
|
||||
|
||||
for (i=0; i < 52; i++)
|
||||
{
|
||||
oya[i] = (ia[i] * oxa[i]) >> 10;
|
||||
oyb[i] = (ib[i] * oxb[i]) >> 10;
|
||||
}
|
||||
|
||||
return oya[22] + oyb[21];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
22
gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c
Normal file
22
gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c
Normal file
@ -0,0 +1,22 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
int foo (int * __restrict__ ia,
|
||||
int * __restrict__ ib,
|
||||
int * __restrict__ oxa,
|
||||
int * __restrict__ oxb)
|
||||
{
|
||||
int i;
|
||||
int oya[52], oyb[52];
|
||||
|
||||
for (i=0; i < 52; i++)
|
||||
{
|
||||
oya[i] = (ia[i] * oxa[i] + ib[i] * oxb[i]) >> 10;
|
||||
oyb[i] = (-ia[i] * oxb[i] + ib[i] * oxa[i]) >> 10;
|
||||
}
|
||||
|
||||
return oya[22] + oyb[21];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
31
gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c
Normal file
31
gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c
Normal file
@ -0,0 +1,31 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
void foo (int * __restrict__ a,
|
||||
int * __restrict__ b,
|
||||
int * __restrict__ c)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=1; i < 10; i++)
|
||||
{
|
||||
a[i] += c[i];
|
||||
b[i] = a[i - 1] + 1;
|
||||
}
|
||||
|
||||
/* This loop is not distributed because the cost of spliting it:
|
||||
|
||||
| for (i=1; i < N; i++)
|
||||
| a[i] += c[i];
|
||||
|
|
||||
| for (i=1; i < N; i++)
|
||||
| b[i] = a[i - 1] + 1;
|
||||
|
||||
is higher due to data in array A that is written and then read in
|
||||
another task. The cost model should forbid the transformation in
|
||||
this case.
|
||||
*/
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
34
gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c
Normal file
34
gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c
Normal file
@ -0,0 +1,34 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
int loop1 (int k)
|
||||
{
|
||||
unsigned int i;
|
||||
int a[10000], b[10000], c[10000], d[10000];
|
||||
|
||||
a[0] = k; a[3] = k*2;
|
||||
c[1] = k+1;
|
||||
for (i = 2; i < (10000-1); i ++)
|
||||
{
|
||||
a[i] = k * i; /* S1 */
|
||||
b[i] = a[i-2] + k; /* S2 */
|
||||
c[i] = b[i] + a[i+1]; /* S3 */
|
||||
d[i] = c[i-1] + k + i; /* S4 */
|
||||
}
|
||||
/*
|
||||
Dependences:
|
||||
S1 -> S2 (flow, level 1)
|
||||
S1 -> S3 (anti, level 1)
|
||||
S2 -> S3 (flow, level 0)
|
||||
S3 -> S4 (flow, level 1)
|
||||
|
||||
There are three partitions: {S1, S3}, {S2} and {S4}.
|
||||
|
||||
The cost model should fuse together all the partitions, as they
|
||||
are reusing the same data, ending on a single partition.
|
||||
*/
|
||||
return a[10000-2] + b[10000-1] + c[10000-2] + d[10000-2];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 3 loops" 0 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
28
gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c
Normal file
28
gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c
Normal file
@ -0,0 +1,28 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
int loop1 (int k)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int j;
|
||||
int a[100], b[100][100];
|
||||
|
||||
a[0] = k;
|
||||
for (i = 1; i < 100; i ++)
|
||||
{
|
||||
for (j = 0; j < 100; j++)
|
||||
{
|
||||
a[j] = k * i;
|
||||
b[i][j] = a[j-1] + k;
|
||||
}
|
||||
}
|
||||
|
||||
return b[100-1][0];
|
||||
}
|
||||
|
||||
/* We used to distribute also innermost loops, but these could produce
|
||||
too much code in the outer loop, degrading performance of scalar
|
||||
code. So this test is XFAILed because the cost model of the stand
|
||||
alone distribution pass has evolved. */
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
33
gcc/testsuite/gcc.dg/tree-ssa/ldist-5.c
Normal file
33
gcc/testsuite/gcc.dg/tree-ssa/ldist-5.c
Normal file
@ -0,0 +1,33 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
int loop1 (int k)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int j;
|
||||
int a[100][100], b[100][100], c[100][100], d[100][100];
|
||||
|
||||
a[0][0] = k;
|
||||
for (i = 1; i < 100; i ++)
|
||||
for (j = 1; j < (100-1); j++)
|
||||
{
|
||||
a[i][j] = k * i; /* S1 */
|
||||
b[i][j] = a[i][j-1] + k; /* S2 */
|
||||
c[i][j] = b[i][j] + a[i][j+1]; /* S3 */
|
||||
d[i][j] = c[i][j] + k + i; /* S4 */
|
||||
}
|
||||
/* Dependences:
|
||||
S1->S2 (flow, level 2)
|
||||
S1->S3 (anti, level 2)
|
||||
S2->S3 (flow, level 0)
|
||||
S3->S4 (flow, level 0)
|
||||
*/
|
||||
|
||||
return a[100-1][100-1] + b[100-1][100-1] + c[100-1][100-1] + d[100-1][100-1];
|
||||
}
|
||||
|
||||
/* FIXME: This is XFAILed because of a data dependence analysis
|
||||
problem: the dependence test fails with a "don't know" relation. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" { xfail *-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
38
gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c
Normal file
38
gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c
Normal file
@ -0,0 +1,38 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
int loop1 (int k)
|
||||
{
|
||||
unsigned int i;
|
||||
int a[1000], b[1000], c[1000], d[1000];
|
||||
|
||||
for (i = 2; i < (1000-1); i ++) {
|
||||
a[i] = k * i; /* S1 */
|
||||
b[i] = a[i-2] + k; /* S2 */
|
||||
c[i] = b[i-1] + a[i+1]; /* S3 */
|
||||
d[i] = c[i-1] + k + i; /* S4 */
|
||||
}
|
||||
/* Dependences:
|
||||
S1->S2 (flow, level 1)
|
||||
S2->S3 (flow, level 1)
|
||||
S3->S1 (anti, level 1)
|
||||
S3->S4 (flow, level 1)
|
||||
|
||||
There are two partitions: {S1, S2, S3} and {S4}.
|
||||
|
||||
{S1, S2, S3} have to be in the same partition because:
|
||||
- S1 (i) has to be executed before S2 (i+2), as S1 produces a[i] that is then consumed 2 iterations later by S2.
|
||||
- S2 (i) has to be executed before S3 (i+1), as S2 produces b[i] that is then consumed one iteration later by S3,
|
||||
- S3 (i) has to be executed before S1 (i+1), as a[i+1] has to execute before the update to a[i],
|
||||
|
||||
{S4} is the consumer partition: it consumes the values from array "c" produced in S3.
|
||||
|
||||
The cost model should fuse all the tasks together as the cost of
|
||||
fetching data from caches is too high.
|
||||
*/
|
||||
|
||||
return a[1000-2] + b[1000-1] + c[1000-2] + d[1000-2];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
32
gcc/testsuite/gcc.dg/tree-ssa/ldist-7.c
Normal file
32
gcc/testsuite/gcc.dg/tree-ssa/ldist-7.c
Normal file
@ -0,0 +1,32 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
int loop1 (int k)
|
||||
{
|
||||
unsigned int i, z;
|
||||
int a[1000], b[1000], c[1000], d[1000];
|
||||
|
||||
for (i = 2; i < (1000-1); i ++) {
|
||||
z = a[i+1]; /* S1 */
|
||||
a[i] = k * i; /* S2 */
|
||||
b[i] = a[i-2] + k; /* S3 */
|
||||
c[i] = b[i-1] + z; /* S4 */
|
||||
d[i] = c[i-1] + b[i+1] + k + i; /* S5 */
|
||||
}
|
||||
/* Dependences:
|
||||
S1->S2 (anti, level 1)
|
||||
S1->S4 (flow, level 1, scalar)
|
||||
S2->S3 (flow, level 1)
|
||||
S3->S4 (flow, level 1)
|
||||
S4->S5 (flow, level 1)
|
||||
S5->S3 (anti, level 1)
|
||||
|
||||
There is a single partition: {S1, S2, S3, S4, S5}, because of the
|
||||
scalar dependence z between the two partitions {S1, S2} and {S3, S4, S5}.
|
||||
*/
|
||||
|
||||
return a[1000-2] + b[1000-1] + c[1000-2] + d[1000-2];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed" 0 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
34
gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c
Normal file
34
gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c
Normal file
@ -0,0 +1,34 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
int loop1 (int k)
|
||||
{
|
||||
unsigned int i;
|
||||
int a[1000], b[1000], c[1000], d[1000];
|
||||
|
||||
for (i = 2; i < (1000-1); i ++)
|
||||
{
|
||||
a[i] = k * i; /* S1 */
|
||||
b[i] = a[i+1] + k; /* S2 */
|
||||
c[i] = a[i-1] + b[i-1] + d[i-1]; /* S3 */
|
||||
d[i] = a[i-1] + b[i+1] + k + i; /* S4 */
|
||||
}
|
||||
/* Dependences:
|
||||
S1->S2 (anti, level 1)
|
||||
S1->S3 (flow, level 1)
|
||||
S1->S4 (flow, level 1)
|
||||
S2->S3 (flow, level 1)
|
||||
S2->S4 (anti, level 1)
|
||||
S4->S3 (flow, level 1)
|
||||
|
||||
Two partitions: {S1, S2, S4} produce information that is consumed in {S3}.
|
||||
|
||||
So that means that the current cost model will also fuse these
|
||||
two partitions into a single one for avoiding cache misses.
|
||||
*/
|
||||
|
||||
return a[1000-2] + b[1000-1] + c[1000-2] + d[1000-2];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
26
gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c
Normal file
26
gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c
Normal file
@ -0,0 +1,26 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
|
||||
|
||||
int loop1 (int k)
|
||||
{
|
||||
unsigned int i;
|
||||
int a[1000], b[1000];
|
||||
|
||||
for (i = 1; i < (1000-1); i ++) {
|
||||
a[i] = a[i+1] + a[i-1]; /* S1 */
|
||||
b[i] = a[i-1] + k; /* S2 */
|
||||
}
|
||||
/*
|
||||
Dependences:
|
||||
S1->S2 (flow, level 1)
|
||||
S1->S1 (anti, level 1)
|
||||
S1->S1 (flow, level 1)
|
||||
|
||||
One partition, because of the cost of cache misses.
|
||||
*/
|
||||
|
||||
return a[1000-2] + b[1000-1];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
33
gcc/testsuite/gfortran.dg/ldist-1.f90
Normal file
33
gcc/testsuite/gfortran.dg/ldist-1.f90
Normal file
@ -0,0 +1,33 @@
|
||||
! { dg-do compile }
|
||||
! { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" }
|
||||
|
||||
Subroutine PADEC(DKS,DKDS,HVAR,WM,WG,FN,NS,AN,BN,CN,IT)
|
||||
IMPLICIT REAL*8 (A-H, O-Z)
|
||||
DIMENSION DKS(*),DKDS(*),HVAR(*)
|
||||
COMPLEX*16 WM(*),WG(*),FN(*),AN(*),BN(*),CN(*)
|
||||
COMPLEX*16 H2,CONST
|
||||
COMMON/STRCH/ALP,BET,DH,ZH,UG,VG,T1,T2,DT,TOL,ALPHA ,HAMP,BUMP
|
||||
Parameter (F1 = .8333333333333333D0, F2 = .0833333333333333D0)
|
||||
|
||||
SS=DT/(2.0D0)
|
||||
|
||||
do J=2,NS
|
||||
BS=SS*DKS(J)*HVAR(J)*HVAR(J)
|
||||
AN(J)=F1+2.*BS
|
||||
BN(J)=F2-BS
|
||||
CN(J)=F2-BS
|
||||
H2=WM(J+1)
|
||||
|
||||
if(J.EQ.NS) then
|
||||
CONST=CN(J)*H2
|
||||
else
|
||||
CONST=(0.D0,0.D0)
|
||||
endif
|
||||
FN(J)=(BS+F2)*(H2)+(F1-2.D0*BS)-CONST
|
||||
end do
|
||||
|
||||
return
|
||||
end Subroutine PADEC
|
||||
|
||||
! { dg-final { scan-tree-dump-times "distributed: split to 4 loops" 1 "ldist" } }
|
||||
! { dg-final { cleanup-tree-dump "ldist" } }
|
@ -123,6 +123,7 @@ DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
|
||||
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
|
||||
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
|
||||
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution")
|
||||
DEFTIMEVAR (TV_CHECK_DATA_DEPS , "tree check data dependences")
|
||||
DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
|
||||
|
@ -5646,22 +5646,30 @@ move_stmt_r (tree *tp, int *walk_subtrees, void *data)
|
||||
/* Marks virtual operands of all statements in basic blocks BBS for
|
||||
renaming. */
|
||||
|
||||
static void
|
||||
mark_virtual_ops_in_region (VEC (basic_block,heap) *bbs)
|
||||
void
|
||||
mark_virtual_ops_in_bb (basic_block bb)
|
||||
{
|
||||
tree phi;
|
||||
block_stmt_iterator bsi;
|
||||
|
||||
for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
|
||||
mark_virtual_ops_for_renaming (phi);
|
||||
|
||||
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
|
||||
mark_virtual_ops_for_renaming (bsi_stmt (bsi));
|
||||
}
|
||||
|
||||
/* Marks virtual operands of all statements in basic blocks BBS for
|
||||
renaming. */
|
||||
|
||||
static void
|
||||
mark_virtual_ops_in_region (VEC (basic_block,heap) *bbs)
|
||||
{
|
||||
basic_block bb;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; VEC_iterate (basic_block, bbs, i, bb); i++)
|
||||
{
|
||||
for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
|
||||
mark_virtual_ops_for_renaming (phi);
|
||||
|
||||
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
|
||||
mark_virtual_ops_for_renaming (bsi_stmt (bsi));
|
||||
}
|
||||
mark_virtual_ops_in_bb (bb);
|
||||
}
|
||||
|
||||
/* Move basic block BB from function CFUN to function DEST_FN. The
|
||||
|
@ -88,7 +88,6 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tree-dump.h"
|
||||
#include "timevar.h"
|
||||
#include "cfgloop.h"
|
||||
#include "tree-chrec.h"
|
||||
#include "tree-data-ref.h"
|
||||
#include "tree-scalar-evolution.h"
|
||||
#include "tree-pass.h"
|
||||
@ -157,6 +156,14 @@ dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
|
||||
dump_data_reference (file, dr);
|
||||
}
|
||||
|
||||
/* Dump to STDERR all the dependence relations from DDRS. */
|
||||
|
||||
void
|
||||
debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
|
||||
{
|
||||
dump_data_dependence_relations (stderr, ddrs);
|
||||
}
|
||||
|
||||
/* Dump into FILE all the dependence relations from DDRS. */
|
||||
|
||||
void
|
||||
@ -354,6 +361,10 @@ dump_data_dependence_relation (FILE *outf,
|
||||
dra = DDR_A (ddr);
|
||||
drb = DDR_B (ddr);
|
||||
fprintf (outf, "(Data Dep: \n");
|
||||
|
||||
dump_data_reference (outf, dra);
|
||||
dump_data_reference (outf, drb);
|
||||
|
||||
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
|
||||
fprintf (outf, " (don't know)\n");
|
||||
|
||||
@ -808,7 +819,7 @@ dr_address_invariant_p (struct data_reference *dr)
|
||||
|
||||
/* Frees data reference DR. */
|
||||
|
||||
static void
|
||||
void
|
||||
free_data_ref (data_reference_p dr)
|
||||
{
|
||||
BITMAP_FREE (DR_VOPS (dr));
|
||||
@ -2787,22 +2798,6 @@ build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return true when the DDR contains two data references that have the
|
||||
same access functions. */
|
||||
|
||||
static bool
|
||||
same_access_functions (const struct data_dependence_relation *ddr)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
|
||||
if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
|
||||
DR_ACCESS_FN (DDR_B (ddr), i)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return true when the DDR contains only constant access functions. */
|
||||
|
||||
static bool
|
||||
@ -4371,48 +4366,219 @@ free_data_refs (VEC (data_reference_p, heap) *datarefs)
|
||||
|
||||
|
||||
|
||||
/* Returns the index of STMT in RDG. */
|
||||
/* Dump vertex I in RDG to FILE. */
|
||||
|
||||
static int
|
||||
find_vertex_for_stmt (const struct graph *rdg, const_tree stmt)
|
||||
void
|
||||
dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
|
||||
{
|
||||
struct vertex *v = &(rdg->vertices[i]);
|
||||
struct graph_edge *e;
|
||||
|
||||
fprintf (file, "(vertex %d: (%s%s) (in:", i,
|
||||
RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
|
||||
RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
|
||||
|
||||
if (v->pred)
|
||||
for (e = v->pred; e; e = e->pred_next)
|
||||
fprintf (file, " %d", e->src);
|
||||
|
||||
fprintf (file, ") (out:");
|
||||
|
||||
if (v->succ)
|
||||
for (e = v->succ; e; e = e->succ_next)
|
||||
fprintf (file, " %d", e->dest);
|
||||
|
||||
fprintf (file, ") \n");
|
||||
print_generic_stmt (file, RDGV_STMT (v), TDF_VOPS|TDF_MEMSYMS);
|
||||
fprintf (file, ")\n");
|
||||
}
|
||||
|
||||
/* Call dump_rdg_vertex on stderr. */
|
||||
|
||||
void
|
||||
debug_rdg_vertex (struct graph *rdg, int i)
|
||||
{
|
||||
dump_rdg_vertex (stderr, rdg, i);
|
||||
}
|
||||
|
||||
/* Dump component C of RDG to FILE. If DUMPED is non-null, set the
|
||||
dumped vertices to that bitmap. */
|
||||
|
||||
void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < rdg->n_vertices; i++)
|
||||
if (RDGV_STMT (&(rdg->vertices[i])) == stmt)
|
||||
return i;
|
||||
fprintf (file, "(%d\n", c);
|
||||
|
||||
gcc_unreachable ();
|
||||
return 0;
|
||||
for (i = 0; i < rdg->n_vertices; i++)
|
||||
if (rdg->vertices[i].component == c)
|
||||
{
|
||||
if (dumped)
|
||||
bitmap_set_bit (dumped, i);
|
||||
|
||||
dump_rdg_vertex (file, rdg, i);
|
||||
}
|
||||
|
||||
fprintf (file, ")\n");
|
||||
}
|
||||
|
||||
/* Creates an edge in RDG for each distance vector from DDR. */
|
||||
/* Call dump_rdg_vertex on stderr. */
|
||||
|
||||
void
|
||||
debug_rdg_component (struct graph *rdg, int c)
|
||||
{
|
||||
dump_rdg_component (stderr, rdg, c, NULL);
|
||||
}
|
||||
|
||||
/* Dump the reduced dependence graph RDG to FILE. */
|
||||
|
||||
void
|
||||
dump_rdg (FILE *file, struct graph *rdg)
|
||||
{
|
||||
int i;
|
||||
bitmap dumped = BITMAP_ALLOC (NULL);
|
||||
|
||||
fprintf (file, "(rdg\n");
|
||||
|
||||
for (i = 0; i < rdg->n_vertices; i++)
|
||||
if (!bitmap_bit_p (dumped, i))
|
||||
dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
|
||||
|
||||
fprintf (file, ")\n");
|
||||
BITMAP_FREE (dumped);
|
||||
}
|
||||
|
||||
/* Call dump_rdg on stderr. */
|
||||
|
||||
void
|
||||
debug_rdg (struct graph *rdg)
|
||||
{
|
||||
dump_rdg (stderr, rdg);
|
||||
}
|
||||
|
||||
static void
|
||||
dot_rdg_1 (FILE *file, struct graph *rdg)
|
||||
{
|
||||
int i;
|
||||
|
||||
fprintf (file, "digraph RDG {\n");
|
||||
|
||||
for (i = 0; i < rdg->n_vertices; i++)
|
||||
{
|
||||
struct vertex *v = &(rdg->vertices[i]);
|
||||
struct graph_edge *e;
|
||||
|
||||
/* Highlight reads from memory. */
|
||||
if (RDG_MEM_READS_STMT (rdg, i))
|
||||
fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
|
||||
|
||||
/* Highlight stores to memory. */
|
||||
if (RDG_MEM_WRITE_STMT (rdg, i))
|
||||
fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
|
||||
|
||||
if (v->succ)
|
||||
for (e = v->succ; e; e = e->succ_next)
|
||||
switch (RDGE_TYPE (e))
|
||||
{
|
||||
case input_dd:
|
||||
fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
|
||||
break;
|
||||
|
||||
case output_dd:
|
||||
fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
|
||||
break;
|
||||
|
||||
case flow_dd:
|
||||
/* These are the most common dependences: don't print these. */
|
||||
fprintf (file, "%d -> %d \n", i, e->dest);
|
||||
break;
|
||||
|
||||
case anti_dd:
|
||||
fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
fprintf (file, "}\n\n");
|
||||
}
|
||||
|
||||
/* Display SCOP using dotty. */
|
||||
|
||||
void
|
||||
dot_rdg (struct graph *rdg)
|
||||
{
|
||||
FILE *file = fopen ("/tmp/rdg.dot", "w");
|
||||
gcc_assert (file != NULL);
|
||||
|
||||
dot_rdg_1 (file, rdg);
|
||||
fclose (file);
|
||||
|
||||
system ("dotty /tmp/rdg.dot");
|
||||
}
|
||||
|
||||
|
||||
/* This structure is used for recording the mapping statement index in
|
||||
the RDG. */
|
||||
|
||||
struct rdg_vertex_info GTY(())
|
||||
{
|
||||
tree stmt;
|
||||
int index;
|
||||
};
|
||||
|
||||
/* Returns the index of STMT in RDG. */
|
||||
|
||||
int
|
||||
rdg_vertex_for_stmt (struct graph *rdg, tree stmt)
|
||||
{
|
||||
struct rdg_vertex_info rvi, *slot;
|
||||
|
||||
rvi.stmt = stmt;
|
||||
slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
|
||||
|
||||
if (!slot)
|
||||
return -1;
|
||||
|
||||
return slot->index;
|
||||
}
|
||||
|
||||
/* Creates an edge in RDG for each distance vector from DDR. The
|
||||
order that we keep track of in the RDG is the order in which
|
||||
statements have to be executed. */
|
||||
|
||||
static void
|
||||
create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
|
||||
{
|
||||
int va, vb;
|
||||
data_reference_p dra;
|
||||
data_reference_p drb;
|
||||
struct graph_edge *e;
|
||||
int va, vb;
|
||||
data_reference_p dra = DDR_A (ddr);
|
||||
data_reference_p drb = DDR_B (ddr);
|
||||
unsigned level = ddr_dependence_level (ddr);
|
||||
|
||||
if (DDR_REVERSED_P (ddr))
|
||||
/* For non scalar dependences, when the dependence is REVERSED,
|
||||
statement B has to be executed before statement A. */
|
||||
if (level > 0
|
||||
&& !DDR_REVERSED_P (ddr))
|
||||
{
|
||||
dra = DDR_B (ddr);
|
||||
drb = DDR_A (ddr);
|
||||
}
|
||||
else
|
||||
{
|
||||
dra = DDR_A (ddr);
|
||||
drb = DDR_B (ddr);
|
||||
data_reference_p tmp = dra;
|
||||
dra = drb;
|
||||
drb = tmp;
|
||||
}
|
||||
|
||||
va = find_vertex_for_stmt (rdg, DR_STMT (dra));
|
||||
vb = find_vertex_for_stmt (rdg, DR_STMT (drb));
|
||||
va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
|
||||
vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
|
||||
|
||||
if (va < 0 || vb < 0)
|
||||
return;
|
||||
|
||||
e = add_edge (rdg, va, vb);
|
||||
e->data = XNEW (struct rdg_edge);
|
||||
|
||||
RDGE_LEVEL (e) = level;
|
||||
|
||||
/* Determines the type of the data dependence. */
|
||||
if (DR_IS_READ (dra) && DR_IS_READ (drb))
|
||||
RDGE_TYPE (e) = input_dd;
|
||||
@ -4435,9 +4601,13 @@ create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
|
||||
|
||||
FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
|
||||
{
|
||||
int use = find_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
|
||||
struct graph_edge *e = add_edge (rdg, idef, use);
|
||||
struct graph_edge *e;
|
||||
int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
|
||||
|
||||
if (use < 0)
|
||||
continue;
|
||||
|
||||
e = add_edge (rdg, idef, use);
|
||||
e->data = XNEW (struct rdg_edge);
|
||||
RDGE_TYPE (e) = flow_dd;
|
||||
}
|
||||
@ -4458,8 +4628,8 @@ create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
|
||||
create_rdg_edge_for_ddr (rdg, ddr);
|
||||
|
||||
for (i = 0; i < rdg->n_vertices; i++)
|
||||
FOR_EACH_PHI_OR_STMT_DEF (def_p, RDGV_STMT (&(rdg->vertices[i])),
|
||||
iter, SSA_OP_ALL_DEFS)
|
||||
FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
|
||||
iter, SSA_OP_DEF)
|
||||
create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
|
||||
}
|
||||
|
||||
@ -4468,19 +4638,50 @@ create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
|
||||
static void
|
||||
create_rdg_vertices (struct graph *rdg, VEC (tree, heap) *stmts)
|
||||
{
|
||||
int i;
|
||||
tree s;
|
||||
int i, j;
|
||||
tree stmt;
|
||||
|
||||
for (i = 0; VEC_iterate (tree, stmts, i, s); i++)
|
||||
for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
|
||||
{
|
||||
VEC (data_ref_loc, heap) *references;
|
||||
data_ref_loc *ref;
|
||||
struct vertex *v = &(rdg->vertices[i]);
|
||||
struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
|
||||
struct rdg_vertex_info **slot;
|
||||
|
||||
rvi->stmt = stmt;
|
||||
rvi->index = i;
|
||||
slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
|
||||
|
||||
if (!*slot)
|
||||
*slot = rvi;
|
||||
else
|
||||
free (rvi);
|
||||
|
||||
v->data = XNEW (struct rdg_vertex);
|
||||
RDGV_STMT (v) = s;
|
||||
RDG_STMT (rdg, i) = stmt;
|
||||
|
||||
RDG_MEM_WRITE_STMT (rdg, i) = false;
|
||||
RDG_MEM_READS_STMT (rdg, i) = false;
|
||||
if (TREE_CODE (stmt) == PHI_NODE)
|
||||
continue;
|
||||
|
||||
get_references_in_stmt (stmt, &references);
|
||||
for (j = 0; VEC_iterate (data_ref_loc, references, j, ref); j++)
|
||||
if (!ref->is_read)
|
||||
RDG_MEM_WRITE_STMT (rdg, i) = true;
|
||||
else
|
||||
RDG_MEM_READS_STMT (rdg, i) = true;
|
||||
|
||||
VEC_free (data_ref_loc, heap, references);
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize STMTS with all the statements and PHI nodes of LOOP. */
|
||||
/* Initialize STMTS with all the statements of LOOP. When
|
||||
INCLUDE_PHIS is true, include also the PHI nodes. The order in
|
||||
which we discover statements is important as
|
||||
generate_loops_for_partition is using the same traversal for
|
||||
identifying statements. */
|
||||
|
||||
static void
|
||||
stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
|
||||
@ -4490,7 +4691,7 @@ stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
|
||||
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
{
|
||||
tree phi;
|
||||
tree phi, stmt;
|
||||
basic_block bb = bbs[i];
|
||||
block_stmt_iterator bsi;
|
||||
|
||||
@ -4498,7 +4699,8 @@ stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
|
||||
VEC_safe_push (tree, heap, *stmts, phi);
|
||||
|
||||
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
|
||||
VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
|
||||
if (TREE_CODE (stmt = bsi_stmt (bsi)) != LABEL_EXPR)
|
||||
VEC_safe_push (tree, heap, *stmts, stmt);
|
||||
}
|
||||
|
||||
free (bbs);
|
||||
@ -4519,8 +4721,39 @@ known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Build a Reduced Dependence Graph with one vertex per statement of the
|
||||
loop nest and one edge per data dependence or scalar dependence. */
|
||||
/* Computes a hash function for element ELT. */
|
||||
|
||||
static hashval_t
|
||||
hash_stmt_vertex_info (const void *elt)
|
||||
{
|
||||
struct rdg_vertex_info *rvi = (struct rdg_vertex_info *) elt;
|
||||
tree stmt = rvi->stmt;
|
||||
|
||||
return htab_hash_pointer (stmt);
|
||||
}
|
||||
|
||||
/* Compares database elements E1 and E2. */
|
||||
|
||||
static int
|
||||
eq_stmt_vertex_info (const void *e1, const void *e2)
|
||||
{
|
||||
const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
|
||||
const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
|
||||
|
||||
return elt1->stmt == elt2->stmt;
|
||||
}
|
||||
|
||||
/* Free the element E. */
|
||||
|
||||
static void
|
||||
hash_stmt_vertex_del (void *e)
|
||||
{
|
||||
free (e);
|
||||
}
|
||||
|
||||
/* Build the Reduced Dependence Graph (RDG) with one vertex per
|
||||
statement of the loop nest, and one edge per data dependence or
|
||||
scalar dependence. */
|
||||
|
||||
struct graph *
|
||||
build_rdg (struct loop *loop)
|
||||
@ -4529,7 +4762,7 @@ build_rdg (struct loop *loop)
|
||||
struct graph *rdg = NULL;
|
||||
VEC (ddr_p, heap) *dependence_relations;
|
||||
VEC (data_reference_p, heap) *datarefs;
|
||||
VEC (tree, heap) *stmts = VEC_alloc (tree, heap, 10);
|
||||
VEC (tree, heap) *stmts = VEC_alloc (tree, heap, nb_data_refs);
|
||||
|
||||
dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
|
||||
datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
|
||||
@ -4537,12 +4770,15 @@ build_rdg (struct loop *loop)
|
||||
false,
|
||||
&datarefs,
|
||||
&dependence_relations);
|
||||
|
||||
|
||||
if (!known_dependences_p (dependence_relations))
|
||||
goto end_rdg;
|
||||
|
||||
stmts_from_loop (loop, &stmts);
|
||||
rdg = new_graph (VEC_length (tree, stmts));
|
||||
|
||||
rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
|
||||
eq_stmt_vertex_info, hash_stmt_vertex_del);
|
||||
create_rdg_vertices (rdg, stmts);
|
||||
create_rdg_edges (rdg, dependence_relations);
|
||||
|
||||
@ -4553,3 +4789,197 @@ build_rdg (struct loop *loop)
|
||||
|
||||
return rdg;
|
||||
}
|
||||
|
||||
/* Free the reduced dependence graph RDG. */
|
||||
|
||||
void
|
||||
free_rdg (struct graph *rdg)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < rdg->n_vertices; i++)
|
||||
free (rdg->vertices[i].data);
|
||||
|
||||
htab_delete (rdg->indices);
|
||||
free_graph (rdg);
|
||||
}
|
||||
|
||||
/* Initialize STMTS with all the statements of LOOP that contain a
|
||||
store to memory. */
|
||||
|
||||
void
|
||||
stores_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
|
||||
{
|
||||
unsigned int i;
|
||||
basic_block *bbs = get_loop_body_in_dom_order (loop);
|
||||
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
{
|
||||
basic_block bb = bbs[i];
|
||||
block_stmt_iterator bsi;
|
||||
|
||||
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
|
||||
if (!ZERO_SSA_OPERANDS (bsi_stmt (bsi), SSA_OP_VDEF))
|
||||
VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
|
||||
}
|
||||
|
||||
free (bbs);
|
||||
}
|
||||
|
||||
/* For a data reference REF, return the declaration of its base
|
||||
address or NULL_TREE if the base is not determined. */
|
||||
|
||||
static inline tree
|
||||
ref_base_address (tree stmt, data_ref_loc *ref)
|
||||
{
|
||||
tree base = NULL_TREE;
|
||||
tree base_address;
|
||||
struct data_reference *dr = XCNEW (struct data_reference);
|
||||
|
||||
DR_STMT (dr) = stmt;
|
||||
DR_REF (dr) = *ref->pos;
|
||||
dr_analyze_innermost (dr);
|
||||
base_address = DR_BASE_ADDRESS (dr);
|
||||
|
||||
if (!base_address)
|
||||
goto end;
|
||||
|
||||
switch (TREE_CODE (base_address))
|
||||
{
|
||||
case ADDR_EXPR:
|
||||
base = TREE_OPERAND (base_address, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
base = base_address;
|
||||
break;
|
||||
}
|
||||
|
||||
end:
|
||||
free_data_ref (dr);
|
||||
return base;
|
||||
}
|
||||
|
||||
/* Determines whether the statement from vertex V of the RDG has a
|
||||
definition used outside the loop that contains this statement. */
|
||||
|
||||
bool
|
||||
rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
|
||||
{
|
||||
tree stmt = RDG_STMT (rdg, v);
|
||||
struct loop *loop = loop_containing_stmt (stmt);
|
||||
use_operand_p imm_use_p;
|
||||
imm_use_iterator iterator;
|
||||
ssa_op_iter it;
|
||||
def_operand_p def_p;
|
||||
|
||||
if (!loop)
|
||||
return true;
|
||||
|
||||
FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
|
||||
{
|
||||
FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
|
||||
{
|
||||
if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Determines whether statements S1 and S2 access to similar memory
|
||||
locations. Two memory accesses are considered similar when they
|
||||
have the same base address declaration, i.e. when their
|
||||
ref_base_address is the same. */
|
||||
|
||||
bool
|
||||
have_similar_memory_accesses (tree s1, tree s2)
|
||||
{
|
||||
bool res = false;
|
||||
unsigned i, j;
|
||||
VEC (data_ref_loc, heap) *refs1, *refs2;
|
||||
data_ref_loc *ref1, *ref2;
|
||||
|
||||
get_references_in_stmt (s1, &refs1);
|
||||
get_references_in_stmt (s2, &refs2);
|
||||
|
||||
for (i = 0; VEC_iterate (data_ref_loc, refs1, i, ref1); i++)
|
||||
{
|
||||
tree base1 = ref_base_address (s1, ref1);
|
||||
|
||||
if (base1)
|
||||
for (j = 0; VEC_iterate (data_ref_loc, refs2, j, ref2); j++)
|
||||
if (base1 == ref_base_address (s2, ref2))
|
||||
{
|
||||
res = true;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
end:
|
||||
VEC_free (data_ref_loc, heap, refs1);
|
||||
VEC_free (data_ref_loc, heap, refs2);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Helper function for the hashtab. */
|
||||
|
||||
static int
|
||||
have_similar_memory_accesses_1 (const void *s1, const void *s2)
|
||||
{
|
||||
return have_similar_memory_accesses ((tree) s1, (tree) s2);
|
||||
}
|
||||
|
||||
/* Helper function for the hashtab. */
|
||||
|
||||
static hashval_t
|
||||
ref_base_address_1 (const void *s)
|
||||
{
|
||||
tree stmt = (tree) s;
|
||||
unsigned i;
|
||||
VEC (data_ref_loc, heap) *refs;
|
||||
data_ref_loc *ref;
|
||||
hashval_t res = 0;
|
||||
|
||||
get_references_in_stmt (stmt, &refs);
|
||||
|
||||
for (i = 0; VEC_iterate (data_ref_loc, refs, i, ref); i++)
|
||||
if (!ref->is_read)
|
||||
{
|
||||
res = htab_hash_pointer (ref_base_address (stmt, ref));
|
||||
break;
|
||||
}
|
||||
|
||||
VEC_free (data_ref_loc, heap, refs);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Try to remove duplicated write data references from STMTS. */
|
||||
|
||||
void
|
||||
remove_similar_memory_refs (VEC (tree, heap) **stmts)
|
||||
{
|
||||
unsigned i;
|
||||
tree stmt;
|
||||
htab_t seen = htab_create (VEC_length (tree, *stmts), ref_base_address_1,
|
||||
have_similar_memory_accesses_1, NULL);
|
||||
|
||||
for (i = 0; VEC_iterate (tree, *stmts, i, stmt); )
|
||||
{
|
||||
void **slot;
|
||||
|
||||
slot = htab_find_slot (seen, stmt, INSERT);
|
||||
|
||||
if (*slot)
|
||||
VEC_ordered_remove (tree, *stmts, i);
|
||||
else
|
||||
{
|
||||
*slot = (void *) stmt;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
htab_delete (seen);
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "graphds.h"
|
||||
#include "lambda.h"
|
||||
#include "omega.h"
|
||||
#include "tree-chrec.h"
|
||||
|
||||
/*
|
||||
innermost_loop_behavior describes the evolution of the address of the memory
|
||||
@ -38,6 +39,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
Example 1 Example 2
|
||||
data-ref a[j].b[i][j] *(p + x + 16B + 4B * j)
|
||||
|
||||
|
||||
innermost_loop_behavior
|
||||
base_address &a p
|
||||
offset i * D_i x
|
||||
@ -319,26 +321,107 @@ extern void debug_data_dependence_relation (struct data_dependence_relation *);
|
||||
extern void dump_data_dependence_relation (FILE *,
|
||||
struct data_dependence_relation *);
|
||||
extern void dump_data_dependence_relations (FILE *, VEC (ddr_p, heap) *);
|
||||
extern void debug_data_dependence_relations (VEC (ddr_p, heap) *);
|
||||
extern void dump_data_dependence_direction (FILE *,
|
||||
enum data_dependence_direction);
|
||||
extern void free_dependence_relation (struct data_dependence_relation *);
|
||||
extern void free_dependence_relations (VEC (ddr_p, heap) *);
|
||||
extern void free_data_ref (data_reference_p);
|
||||
extern void free_data_refs (VEC (data_reference_p, heap) *);
|
||||
struct data_reference *create_data_ref (struct loop *, tree, tree, bool);
|
||||
bool find_loop_nest (struct loop *, VEC (loop_p, heap) **);
|
||||
void compute_all_dependences (VEC (data_reference_p, heap) *,
|
||||
VEC (ddr_p, heap) **, VEC (loop_p, heap) *, bool);
|
||||
|
||||
/* Return true when the DDR contains two data references that have the
|
||||
same access functions. */
|
||||
|
||||
static inline bool
|
||||
same_access_functions (const struct data_dependence_relation *ddr)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
|
||||
if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
|
||||
DR_ACCESS_FN (DDR_B (ddr), i)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return true when DDR is an anti-dependence relation. */
|
||||
|
||||
static inline bool
|
||||
ddr_is_anti_dependent (ddr_p ddr)
|
||||
{
|
||||
return (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
|
||||
&& DR_IS_READ (DDR_A (ddr))
|
||||
&& !DR_IS_READ (DDR_B (ddr))
|
||||
&& !same_access_functions (ddr));
|
||||
}
|
||||
|
||||
/* Return true when DEPENDENCE_RELATIONS contains an anti-dependence. */
|
||||
|
||||
static inline bool
|
||||
ddrs_have_anti_deps (VEC (ddr_p, heap) *dependence_relations)
|
||||
{
|
||||
unsigned i;
|
||||
ddr_p ddr;
|
||||
|
||||
for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
|
||||
if (ddr_is_anti_dependent (ddr))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return the dependence level for the DDR relation. */
|
||||
|
||||
static inline unsigned
|
||||
ddr_dependence_level (ddr_p ddr)
|
||||
{
|
||||
unsigned vector;
|
||||
unsigned level = 0;
|
||||
|
||||
if (DDR_DIST_VECTS (ddr))
|
||||
level = dependence_level (DDR_DIST_VECT (ddr, 0), DDR_NB_LOOPS (ddr));
|
||||
|
||||
for (vector = 1; vector < DDR_NUM_DIST_VECTS (ddr); vector++)
|
||||
level = MIN (level, dependence_level (DDR_DIST_VECT (ddr, vector),
|
||||
DDR_NB_LOOPS (ddr)));
|
||||
return level;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* A RDG vertex representing a statement. */
|
||||
/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
|
||||
typedef struct rdg_vertex
|
||||
{
|
||||
/* The statement represented by this vertex. */
|
||||
tree stmt;
|
||||
|
||||
/* True when the statement contains a write to memory. */
|
||||
bool has_mem_write;
|
||||
|
||||
/* True when the statement contains a read from memory. */
|
||||
bool has_mem_reads;
|
||||
} *rdg_vertex_p;
|
||||
|
||||
#define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
|
||||
#define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
|
||||
#define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
|
||||
#define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
|
||||
#define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
|
||||
#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
|
||||
#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
|
||||
|
||||
void dump_rdg_vertex (FILE *, struct graph *, int);
|
||||
void debug_rdg_vertex (struct graph *, int);
|
||||
void dump_rdg_component (FILE *, struct graph *, int, bitmap);
|
||||
void debug_rdg_component (struct graph *, int);
|
||||
void dump_rdg (FILE *, struct graph *);
|
||||
void debug_rdg (struct graph *);
|
||||
void dot_rdg (struct graph *);
|
||||
int rdg_vertex_for_stmt (struct graph *, tree);
|
||||
|
||||
/* Data dependence type. */
|
||||
|
||||
@ -363,11 +446,17 @@ typedef struct rdg_edge
|
||||
{
|
||||
/* Type of the dependence. */
|
||||
enum rdg_dep_type type;
|
||||
|
||||
/* Levels of the dependence: the depth of the loops that
|
||||
carry the dependence. */
|
||||
unsigned level;
|
||||
} *rdg_edge_p;
|
||||
|
||||
#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
|
||||
#define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
|
||||
|
||||
struct graph *build_rdg (struct loop *);
|
||||
void free_rdg (struct graph *);
|
||||
|
||||
/* Return the index of the variable VAR in the LOOP_NEST array. */
|
||||
|
||||
@ -385,6 +474,21 @@ index_in_loop_nest (int var, VEC (loop_p, heap) *loop_nest)
|
||||
return var_index;
|
||||
}
|
||||
|
||||
void stores_from_loop (struct loop *, VEC (tree, heap) **);
|
||||
void remove_similar_memory_refs (VEC (tree, heap) **);
|
||||
bool rdg_defs_used_in_other_loops_p (struct graph *, int);
|
||||
bool have_similar_memory_accesses (tree, tree);
|
||||
|
||||
/* Determines whether RDG vertices V1 and V2 access to similar memory
|
||||
locations, in which case they have to be in the same partition. */
|
||||
|
||||
static inline bool
|
||||
rdg_has_similar_memory_accesses (struct graph *rdg, int v1, int v2)
|
||||
{
|
||||
return have_similar_memory_accesses (RDG_STMT (rdg, v1),
|
||||
RDG_STMT (rdg, v2));
|
||||
}
|
||||
|
||||
/* In lambda-code.c */
|
||||
bool lambda_transform_legal_p (lambda_trans_matrix, int, VEC (ddr_p, heap) *);
|
||||
|
||||
|
@ -792,6 +792,7 @@ extern void end_recording_case_labels (void);
|
||||
extern basic_block move_sese_region_to_fn (struct function *, basic_block,
|
||||
basic_block);
|
||||
void remove_edge_and_dominated_blocks (edge);
|
||||
void mark_virtual_ops_in_bb (basic_block);
|
||||
|
||||
/* In tree-cfgcleanup.c */
|
||||
extern bitmap cfgcleanup_altered_bbs;
|
||||
@ -1022,6 +1023,8 @@ bool tree_duplicate_loop_to_header_edge (struct loop *, edge,
|
||||
unsigned int, sbitmap,
|
||||
edge, VEC (edge, heap) **,
|
||||
int);
|
||||
struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, edge);
|
||||
void rename_variables_in_loop (struct loop *);
|
||||
struct loop *tree_ssa_loop_version (struct loop *, tree,
|
||||
basic_block *);
|
||||
tree expand_simple_operations (tree);
|
||||
|
1173
gcc/tree-loop-distribution.c
Normal file
1173
gcc/tree-loop-distribution.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -265,6 +265,7 @@ extern struct tree_opt_pass pass_scev_cprop;
|
||||
extern struct tree_opt_pass pass_empty_loop;
|
||||
extern struct tree_opt_pass pass_record_bounds;
|
||||
extern struct tree_opt_pass pass_if_conversion;
|
||||
extern struct tree_opt_pass pass_loop_distribution;
|
||||
extern struct tree_opt_pass pass_vectorize;
|
||||
extern struct tree_opt_pass pass_complete_unroll;
|
||||
extern struct tree_opt_pass pass_parallelize_loops;
|
||||
|
@ -41,28 +41,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "toplev.h"
|
||||
#include "recog.h"
|
||||
|
||||
/* Main analysis functions. */
|
||||
static bool vect_analyze_data_refs (loop_vec_info);
|
||||
static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
|
||||
static void vect_analyze_scalar_cycles (loop_vec_info);
|
||||
static bool vect_analyze_data_ref_accesses (loop_vec_info);
|
||||
static bool vect_analyze_data_ref_dependences (loop_vec_info);
|
||||
static bool vect_analyze_data_refs_alignment (loop_vec_info);
|
||||
static bool vect_compute_data_refs_alignment (loop_vec_info);
|
||||
static bool vect_enhance_data_refs_alignment (loop_vec_info);
|
||||
static bool vect_analyze_operations (loop_vec_info);
|
||||
static bool vect_determine_vectorization_factor (loop_vec_info);
|
||||
|
||||
/* Utility functions for the analyses. */
|
||||
static bool exist_non_indexing_operands_for_use_p (tree, tree);
|
||||
static tree vect_get_loop_niters (struct loop *, tree *);
|
||||
static bool vect_analyze_data_ref_dependence
|
||||
(struct data_dependence_relation *, loop_vec_info);
|
||||
static bool vect_compute_data_ref_alignment (struct data_reference *);
|
||||
static bool vect_analyze_data_ref_access (struct data_reference *);
|
||||
static bool vect_can_advance_ivs_p (loop_vec_info);
|
||||
static void vect_update_misalignment_for_peel
|
||||
(struct data_reference *, struct data_reference *, int npeel);
|
||||
|
||||
/* Function vect_determine_vectorization_factor
|
||||
|
||||
|
@ -146,25 +146,9 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tree-vectorizer.h"
|
||||
#include "tree-pass.h"
|
||||
|
||||
/*************************************************************************
|
||||
Simple Loop Peeling Utilities
|
||||
*************************************************************************/
|
||||
static void slpeel_update_phis_for_duplicate_loop
|
||||
(struct loop *, struct loop *, bool after);
|
||||
static void slpeel_update_phi_nodes_for_guard1
|
||||
(edge, struct loop *, bool, basic_block *, bitmap *);
|
||||
static void slpeel_update_phi_nodes_for_guard2
|
||||
(edge, struct loop *, bool, basic_block *);
|
||||
static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
|
||||
|
||||
static void rename_use_op (use_operand_p);
|
||||
static void rename_variables_in_bb (basic_block);
|
||||
static void rename_variables_in_loop (struct loop *);
|
||||
|
||||
/*************************************************************************
|
||||
General Vectorization Utilities
|
||||
*************************************************************************/
|
||||
static void vect_set_dump_settings (void);
|
||||
|
||||
/* vect_dump will be set to stderr or dump_file if exist. */
|
||||
FILE *vect_dump;
|
||||
@ -241,7 +225,7 @@ rename_variables_in_bb (basic_block bb)
|
||||
|
||||
/* Renames variables in new generated LOOP. */
|
||||
|
||||
static void
|
||||
void
|
||||
rename_variables_in_loop (struct loop *loop)
|
||||
{
|
||||
unsigned i;
|
||||
@ -806,7 +790,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
|
||||
/* Given LOOP this function generates a new copy of it and puts it
|
||||
on E which is either the entry or exit of LOOP. */
|
||||
|
||||
static struct loop *
|
||||
struct loop *
|
||||
slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
|
||||
{
|
||||
struct loop *new_loop;
|
||||
@ -871,6 +855,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
|
||||
if (at_exit) /* Add the loop copy at exit. */
|
||||
{
|
||||
redirect_edge_and_branch_force (e, new_loop->header);
|
||||
PENDING_STMT (e) = NULL;
|
||||
set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
|
||||
if (was_imm_dom)
|
||||
set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
|
||||
@ -888,6 +873,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
|
||||
new_exit_e = EDGE_SUCC (new_loop->header, 1);
|
||||
|
||||
redirect_edge_and_branch_force (new_exit_e, loop->header);
|
||||
PENDING_STMT (new_exit_e) = NULL;
|
||||
set_immediate_dominator (CDI_DOMINATORS, loop->header,
|
||||
new_exit_e->src);
|
||||
|
||||
@ -901,6 +887,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
|
||||
}
|
||||
|
||||
redirect_edge_and_branch_force (entry_e, new_loop->header);
|
||||
PENDING_STMT (entry_e) = NULL;
|
||||
set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
|
||||
}
|
||||
|
||||
|
@ -630,6 +630,7 @@ extern struct loop *slpeel_tree_peel_loop_to_edge
|
||||
(struct loop *, edge, tree, tree, bool, unsigned int, bool);
|
||||
extern void set_prologue_iterations (basic_block, tree,
|
||||
struct loop *, unsigned int);
|
||||
struct loop *tree_duplicate_loop_on_edge (struct loop *, edge);
|
||||
extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
|
||||
extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
|
||||
#ifdef ENABLE_CHECKING
|
||||
|
Loading…
x
Reference in New Issue
Block a user