Fix PR43023: fuse_partitions_with_similar_memory_accesses.

2010-12-10  Sebastian Pop  <sebastian.pop@amd.com>

	PR tree-optimization/43023
	* tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p):
	Removed.
	(stores_zero_from_loop): Call stmt_stores_zero.
	* tree-data-ref.h (stmt_stores_zero): New.
	* tree-loop-distribution.c (generate_memset_zero): Do not return a
	boolean.  Call gcc_assert on stride_of_unit_type_p.
	(generate_builtin): Call stmt_stores_zero.
	(rdg_flag_all_uses): Removed.
	(rdg_flag_similar_memory_accesses): Removed.
	(build_rdg_partition_for_component): Removed parameter
	other_stores.  Removed call to rdg_flag_similar_memory_accesses.
	(can_generate_builtin): New.
	(similar_memory_accesses): New.
	(fuse_partitions_with_similar_memory_accesses): New.
	(rdg_build_partitions): Call
	fuse_partitions_with_similar_memory_accesses.

	* gfortran.dg/ldist-1.f90: Adjust pattern.
	* gfortran.dg/ldist-pr43023.f90: New.

From-SVN: r167697
This commit is contained in:
Sebastian Pop 2010-12-10 19:16:48 +00:00
parent b595b1a11e
commit cfee318d13
7 changed files with 172 additions and 129 deletions

View File

@ -1,3 +1,23 @@
2010-12-10 Sebastian Pop <sebastian.pop@amd.com>
PR tree-optimization/43023
* tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p):
Removed.
(stores_zero_from_loop): Call stmt_stores_zero.
* tree-data-ref.h (stmt_stores_zero): New.
* tree-loop-distribution.c (generate_memset_zero): Do not return a
boolean. Call gcc_assert on stride_of_unit_type_p.
(generate_builtin): Call stmt_stores_zero.
(rdg_flag_all_uses): Removed.
(rdg_flag_similar_memory_accesses): Removed.
(build_rdg_partition_for_component): Removed parameter
other_stores. Removed call to rdg_flag_similar_memory_accesses.
(can_generate_builtin): New.
(similar_memory_accesses): New.
(fuse_partitions_with_similar_memory_accesses): New.
(rdg_build_partitions): Call
fuse_partitions_with_similar_memory_accesses.
2010-12-10 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/46804
@ -108,9 +128,9 @@
(abshi2): Delete.
(neghi2, negqi2): Use PDPint iterator.
* config/pdp11/pdp11.c (find_addr_reg, output_move_double,
output_move_quad): Delete.
output_move_quad): Delete.
(pdp11_expand_operands, output_move_multiple): New functions.
2010-12-09 Joseph Myers <joseph@codesourcery.com>
* config/vax/linux.h (WCHAR_TYPE, WCHAR_TYPE_SIZE): Define.

View File

@ -1,3 +1,9 @@
2010-12-10 Sebastian Pop <sebastian.pop@amd.com>
PR tree-optimization/43023
* gfortran.dg/ldist-1.f90: Adjust pattern.
* gfortran.dg/ldist-pr43023.f90: New.
2010-12-10 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/46804
@ -45,8 +51,8 @@
* obj-c++.dg/class-extension-3.mm: New.
* obj-c++.dg/property/at-property-26.mm: New.
* obj-c++.dg/property/at-property-27.mm: New.
* obj-c++.dg/property/at-property-28.mm: New.
* obj-c++.dg/property/at-property-28.mm: New.
2010-12-09 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
PR target/46057
@ -113,12 +119,12 @@
* obj-c++.dg/exceptions-7.mm: New.
* obj-c++.dg/exceptions-3.mm: Adjust for new C++ messages.
* obj-c++.dg/exceptions-5.mm: Same change.
2010-12-08 Nicola Pero <nicola.pero@meta-innovation.com>
* objc.dg/foreach-6.m: Updated location of error messages.
* objc.dg/foreach-7.m: Same change.
2010-12-08 Richard Guenther <rguenther@suse.de>
Sebastian Pop <sebastian.pop@amd.com>

View File

@ -29,5 +29,8 @@ Subroutine PADEC(DKS,DKDS,HVAR,WM,WG,FN,NS,AN,BN,CN,IT)
return
end Subroutine PADEC
! { dg-final { scan-tree-dump-times "distributed: split to 4 loops" 1 "ldist" } }
! There are 5 legal partitions in this code. Based on the data
! locality heuristic, this loop should not be split.
! { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } }
! { dg-final { cleanup-tree-dump "ldist" } }

View File

@ -0,0 +1,31 @@
! { dg-do compile }
! { dg-options "-O2 -ftree-loop-distribution" }
MODULE NFT_mod
implicit none
integer :: Nangle
real:: Z0
real, dimension(:,:), allocatable :: Angle
real, dimension(:), allocatable :: exth, ezth, hxth, hyth, hyphi
CONTAINS
SUBROUTINE NFT_Init()
real :: th, fi
integer :: n
do n = 1,Nangle
th = Angle(n,1)
fi = Angle(n,2)
exth(n) = cos(fi)*cos(th)
ezth(n) = -sin(th)
hxth(n) = -sin(fi)
hyth(n) = cos(fi)
hyphi(n) = -sin(fi)
end do
END SUBROUTINE NFT_Init
END MODULE NFT_mod

View File

@ -4509,7 +4509,7 @@ dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
for (e = v->succ; e; e = e->succ_next)
fprintf (file, " %d", e->dest);
fprintf (file, ") \n");
fprintf (file, ")\n");
print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
fprintf (file, ")\n");
}
@ -4976,16 +4976,27 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
free (bbs);
}
/* Returns true when STMT is an assignment that contains a data
reference on its LHS with a stride of the same size as its unit
type. */
/* Returns true when the statement at STMT is of the form "A[i] = 0"
that contains a data reference on its LHS with a stride of the same
size as its unit type. */
static bool
mem_write_stride_of_same_size_as_unit_type_p (gimple stmt)
bool
stmt_with_adjacent_zero_store_dr_p (gimple stmt)
{
struct data_reference *dr = XCNEW (struct data_reference);
tree op0 = gimple_assign_lhs (stmt);
tree op0, op1;
bool res;
struct data_reference *dr;
if (!stmt
|| !gimple_vdef (stmt)
|| !is_gimple_assign (stmt)
|| !gimple_assign_single_p (stmt)
|| !(op1 = gimple_assign_rhs1 (stmt))
|| !(integer_zerop (op1) || real_zerop (op1)))
return false;
dr = XCNEW (struct data_reference);
op0 = gimple_assign_lhs (stmt);
DR_STMT (dr) = stmt;
DR_REF (dr) = op0;
@ -5007,18 +5018,12 @@ stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
basic_block bb;
gimple_stmt_iterator si;
gimple stmt;
tree op;
basic_block *bbs = get_loop_body_in_dom_order (loop);
for (i = 0; i < loop->num_nodes; i++)
for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
if ((stmt = gsi_stmt (si))
&& gimple_vdef (stmt)
&& is_gimple_assign (stmt)
&& gimple_assign_rhs_code (stmt) == INTEGER_CST
&& (op = gimple_assign_rhs1 (stmt))
&& (integer_zerop (op) || real_zerop (op))
&& mem_write_stride_of_same_size_as_unit_type_p (stmt))
&& stmt_with_adjacent_zero_store_dr_p (stmt))
VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
free (bbs);

View File

@ -602,6 +602,7 @@ void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **);
void remove_similar_memory_refs (VEC (gimple, heap) **);
bool rdg_defs_used_in_other_loops_p (struct graph *, int);
bool have_similar_memory_accesses (gimple, gimple);
bool stmt_with_adjacent_zero_store_dr_p (gimple);
/* Returns true when STRIDE is equal in absolute value to the size of
the unit type of TYPE. */

View File

@ -241,7 +241,7 @@ build_size_arg_loc (location_t loc, tree nb_iter, tree op,
/* Generate a call to memset. Return true when the operation succeeded. */
static bool
static void
generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
gimple_stmt_iterator bsi)
{
@ -255,11 +255,8 @@ generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
DR_STMT (dr) = stmt;
DR_REF (dr) = op0;
if (!dr_analyze_innermost (dr))
goto end;
if (!stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)))
goto end;
res = dr_analyze_innermost (dr);
gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
@ -286,14 +283,11 @@ generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
gimple_seq_add_stmt (&stmt_list, fn_call);
gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
res = true;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "generated memset zero\n");
end:
free_data_ref (dr);
return res;
}
/* Tries to generate a builtin function for the instructions of LOOP
@ -307,7 +301,6 @@ generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
unsigned i, x = 0;
basic_block *bbs;
gimple write = NULL;
tree op0, op1;
gimple_stmt_iterator bsi;
tree nb_iter = number_of_exit_cond_executions (loop);
@ -343,26 +336,17 @@ generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
}
}
if (!write)
goto end;
op0 = gimple_assign_lhs (write);
op1 = gimple_assign_rhs1 (write);
if (!(TREE_CODE (op0) == ARRAY_REF
|| TREE_CODE (op0) == MEM_REF))
if (!stmt_with_adjacent_zero_store_dr_p (write))
goto end;
/* The new statements will be placed before LOOP. */
bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
if (gimple_assign_rhs_code (write) == INTEGER_CST
&& (integer_zerop (op1) || real_zerop (op1)))
res = generate_memset_zero (write, op0, nb_iter, bsi);
generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
res = true;
/* If this is the last partition for which we generate code, we have
to destroy the loop. */
if (res && !copy_p)
if (!copy_p)
{
unsigned nbbs = loop->num_nodes;
edge exit = single_exit (loop);
@ -504,24 +488,6 @@ has_upstream_mem_writes (int u)
static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
bitmap, bool *);
/* Flag all the uses of U. */
static void
rdg_flag_all_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
bitmap processed, bool *part_has_writes)
{
struct graph_edge *e;
for (e = rdg->vertices[u].succ; e; e = e->succ_next)
if (!bitmap_bit_p (processed, e->dest))
{
rdg_flag_vertex_and_dependent (rdg, e->dest, partition, loops,
processed, part_has_writes);
rdg_flag_all_uses (rdg, e->dest, partition, loops, processed,
part_has_writes);
}
}
/* Flag the uses of U stopping following the information from
upstream_mem_writes. */
@ -689,68 +655,13 @@ rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition,
}
}
/* Flag all the nodes of RDG containing memory accesses that could
potentially belong to arrays already accessed in the current
PARTITION. */
static void
rdg_flag_similar_memory_accesses (struct graph *rdg, bitmap partition,
bitmap loops, bitmap processed,
VEC (int, heap) **other_stores)
{
bool foo;
unsigned i, n;
int j, k, kk;
bitmap_iterator ii;
struct graph_edge *e;
EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
if (RDG_MEM_WRITE_STMT (rdg, i)
|| RDG_MEM_READS_STMT (rdg, i))
{
for (j = 0; j < rdg->n_vertices; j++)
if (!bitmap_bit_p (processed, j)
&& (RDG_MEM_WRITE_STMT (rdg, j)
|| RDG_MEM_READS_STMT (rdg, j))
&& rdg_has_similar_memory_accesses (rdg, i, j))
{
/* Flag first the node J itself, and all the nodes that
are needed to compute J. */
rdg_flag_vertex_and_dependent (rdg, j, partition, loops,
processed, &foo);
/* When J is a read, we want to coalesce in the same
PARTITION all the nodes that are using J: this is
needed for better cache locality. */
rdg_flag_all_uses (rdg, j, partition, loops, processed, &foo);
/* Remove from OTHER_STORES the vertex that we flagged. */
if (RDG_MEM_WRITE_STMT (rdg, j))
FOR_EACH_VEC_ELT (int, *other_stores, k, kk)
if (kk == j)
{
VEC_unordered_remove (int, *other_stores, k);
break;
}
}
/* If the node I has two uses, then keep these together in the
same PARTITION. */
for (n = 0, e = rdg->vertices[i].succ; e; e = e->succ_next, n++);
if (n > 1)
rdg_flag_all_uses (rdg, i, partition, loops, processed, &foo);
}
}
/* Returns a bitmap in which all the statements needed for computing
the strongly connected component C of the RDG are flagged, also
including the loop exit conditions. */
static bitmap
build_rdg_partition_for_component (struct graph *rdg, rdgc c,
bool *part_has_writes,
VEC (int, heap) **other_stores)
bool *part_has_writes)
{
int i, v;
bitmap partition = BITMAP_ALLOC (NULL);
@ -762,14 +673,6 @@ build_rdg_partition_for_component (struct graph *rdg, rdgc c,
rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
part_has_writes);
/* Also iterate on the array of stores not in the starting vertices,
and determine those vertices that have some memory affinity with
the current nodes in the component: these are stores to the same
arrays, i.e. we're taking care of cache locality. */
if (!flag_tree_loop_distribute_patterns)
rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
other_stores);
rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
BITMAP_FREE (processed);
@ -832,6 +735,79 @@ rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
BITMAP_FREE (saved_components);
}
/* Returns true when it is possible to generate a builtin pattern for
the PARTITION of RDG. For the moment we detect only the memset
zero pattern. */
static bool
can_generate_builtin (struct graph *rdg, bitmap partition)
{
unsigned i;
bitmap_iterator bi;
int nb_reads = 0;
int nb_writes = 0;
int stores_zero = 0;
EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi)
if (RDG_MEM_READS_STMT (rdg, i))
nb_reads++;
else if (RDG_MEM_WRITE_STMT (rdg, i))
{
nb_writes++;
if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg, i)))
stores_zero++;
}
return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
}
/* Returns true when PARTITION1 and PARTITION2 have similar memory
accesses in RDG. */
static bool
similar_memory_accesses (struct graph *rdg, bitmap partition1,
bitmap partition2)
{
unsigned i, j;
bitmap_iterator bi, bj;
EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi)
if (RDG_MEM_WRITE_STMT (rdg, i)
|| RDG_MEM_READS_STMT (rdg, i))
EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj)
if (RDG_MEM_WRITE_STMT (rdg, j)
|| RDG_MEM_READS_STMT (rdg, j))
if (rdg_has_similar_memory_accesses (rdg, i, j))
return true;
return false;
}
/* Fuse all the partitions from PARTITIONS that contain similar memory
references, i.e., we're taking care of cache locality. This
function does not fuse those partitions that contain patterns that
can be code generated with builtins. */
static void
fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
VEC (bitmap, heap) **partitions)
{
int p1, p2;
bitmap partition1, partition2;
FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1)
if (!can_generate_builtin (rdg, partition1))
FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2)
if (p1 != p2
&& !can_generate_builtin (rdg, partition2)
&& similar_memory_accesses (rdg, partition1, partition2))
{
bitmap_ior_into (partition1, partition2);
VEC_ordered_remove (bitmap, *partitions, p2);
p2--;
}
}
/* Aggregate several components into a useful partition that is
registered in the PARTITIONS vector. Partitions will be
distributed in different loops. */
@ -854,8 +830,7 @@ rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
if (bitmap_bit_p (processed, v))
continue;
np = build_rdg_partition_for_component (rdg, x, &part_has_writes,
other_stores);
np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
bitmap_ior_into (partition, np);
bitmap_ior_into (processed, np);
BITMAP_FREE (np);
@ -901,6 +876,8 @@ rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
VEC_safe_push (bitmap, heap, *partitions, partition);
else
BITMAP_FREE (partition);
fuse_partitions_with_similar_memory_accesses (rdg, partitions);
}
/* Dump to FILE the PARTITIONS. */