mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-18 14:30:42 +08:00
Fix PR43023: fuse_partitions_with_similar_memory_accesses.
2010-12-10 Sebastian Pop <sebastian.pop@amd.com> PR tree-optimization/43023 * tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): Removed. (stores_zero_from_loop): Call stmt_stores_zero. * tree-data-ref.h (stmt_stores_zero): New. * tree-loop-distribution.c (generate_memset_zero): Do not return a boolean. Call gcc_assert on stride_of_unit_type_p. (generate_builtin): Call stmt_stores_zero. (rdg_flag_all_uses): Removed. (rdg_flag_similar_memory_accesses): Removed. (build_rdg_partition_for_component): Removed parameter other_stores. Removed call to rdg_flag_similar_memory_accesses. (can_generate_builtin): New. (similar_memory_accesses): New. (fuse_partitions_with_similar_memory_accesses): New. (rdg_build_partitions): Call fuse_partitions_with_similar_memory_accesses. * gfortran.dg/ldist-1.f90: Adjust pattern. * gfortran.dg/ldist-pr43023.f90: New. From-SVN: r167697
This commit is contained in:
parent
b595b1a11e
commit
cfee318d13
@ -1,3 +1,23 @@
|
||||
2010-12-10 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
PR tree-optimization/43023
|
||||
* tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p):
|
||||
Removed.
|
||||
(stores_zero_from_loop): Call stmt_stores_zero.
|
||||
* tree-data-ref.h (stmt_stores_zero): New.
|
||||
* tree-loop-distribution.c (generate_memset_zero): Do not return a
|
||||
boolean. Call gcc_assert on stride_of_unit_type_p.
|
||||
(generate_builtin): Call stmt_stores_zero.
|
||||
(rdg_flag_all_uses): Removed.
|
||||
(rdg_flag_similar_memory_accesses): Removed.
|
||||
(build_rdg_partition_for_component): Removed parameter
|
||||
other_stores. Removed call to rdg_flag_similar_memory_accesses.
|
||||
(can_generate_builtin): New.
|
||||
(similar_memory_accesses): New.
|
||||
(fuse_partitions_with_similar_memory_accesses): New.
|
||||
(rdg_build_partitions): Call
|
||||
fuse_partitions_with_similar_memory_accesses.
|
||||
|
||||
2010-12-10 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR rtl-optimization/46804
|
||||
@ -108,9 +128,9 @@
|
||||
(abshi2): Delete.
|
||||
(neghi2, negqi2): Use PDPint iterator.
|
||||
* config/pdp11/pdp11.c (find_addr_reg, output_move_double,
|
||||
output_move_quad): Delete.
|
||||
output_move_quad): Delete.
|
||||
(pdp11_expand_operands, output_move_multiple): New functions.
|
||||
|
||||
|
||||
2010-12-09 Joseph Myers <joseph@codesourcery.com>
|
||||
|
||||
* config/vax/linux.h (WCHAR_TYPE, WCHAR_TYPE_SIZE): Define.
|
||||
|
@ -1,3 +1,9 @@
|
||||
2010-12-10 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
PR tree-optimization/43023
|
||||
* gfortran.dg/ldist-1.f90: Adjust pattern.
|
||||
* gfortran.dg/ldist-pr43023.f90: New.
|
||||
|
||||
2010-12-10 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR rtl-optimization/46804
|
||||
@ -45,8 +51,8 @@
|
||||
* obj-c++.dg/class-extension-3.mm: New.
|
||||
* obj-c++.dg/property/at-property-26.mm: New.
|
||||
* obj-c++.dg/property/at-property-27.mm: New.
|
||||
* obj-c++.dg/property/at-property-28.mm: New.
|
||||
|
||||
* obj-c++.dg/property/at-property-28.mm: New.
|
||||
|
||||
2010-12-09 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
|
||||
|
||||
PR target/46057
|
||||
@ -113,12 +119,12 @@
|
||||
* obj-c++.dg/exceptions-7.mm: New.
|
||||
* obj-c++.dg/exceptions-3.mm: Adjust for new C++ messages.
|
||||
* obj-c++.dg/exceptions-5.mm: Same change.
|
||||
|
||||
|
||||
2010-12-08 Nicola Pero <nicola.pero@meta-innovation.com>
|
||||
|
||||
* objc.dg/foreach-6.m: Updated location of error messages.
|
||||
* objc.dg/foreach-7.m: Same change.
|
||||
|
||||
|
||||
2010-12-08 Richard Guenther <rguenther@suse.de>
|
||||
Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
|
@ -29,5 +29,8 @@ Subroutine PADEC(DKS,DKDS,HVAR,WM,WG,FN,NS,AN,BN,CN,IT)
|
||||
return
|
||||
end Subroutine PADEC
|
||||
|
||||
! { dg-final { scan-tree-dump-times "distributed: split to 4 loops" 1 "ldist" } }
|
||||
! There are 5 legal partitions in this code. Based on the data
|
||||
! locality heuristic, this loop should not be split.
|
||||
|
||||
! { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } }
|
||||
! { dg-final { cleanup-tree-dump "ldist" } }
|
||||
|
31
gcc/testsuite/gfortran.dg/ldist-pr43023.f90
Normal file
31
gcc/testsuite/gfortran.dg/ldist-pr43023.f90
Normal file
@ -0,0 +1,31 @@
|
||||
! { dg-do compile }
|
||||
! { dg-options "-O2 -ftree-loop-distribution" }
|
||||
|
||||
MODULE NFT_mod
|
||||
|
||||
implicit none
|
||||
integer :: Nangle
|
||||
real:: Z0
|
||||
real, dimension(:,:), allocatable :: Angle
|
||||
real, dimension(:), allocatable :: exth, ezth, hxth, hyth, hyphi
|
||||
|
||||
CONTAINS
|
||||
|
||||
SUBROUTINE NFT_Init()
|
||||
|
||||
real :: th, fi
|
||||
integer :: n
|
||||
|
||||
do n = 1,Nangle
|
||||
th = Angle(n,1)
|
||||
fi = Angle(n,2)
|
||||
|
||||
exth(n) = cos(fi)*cos(th)
|
||||
ezth(n) = -sin(th)
|
||||
hxth(n) = -sin(fi)
|
||||
hyth(n) = cos(fi)
|
||||
hyphi(n) = -sin(fi)
|
||||
end do
|
||||
END SUBROUTINE NFT_Init
|
||||
|
||||
END MODULE NFT_mod
|
@ -4509,7 +4509,7 @@ dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
|
||||
for (e = v->succ; e; e = e->succ_next)
|
||||
fprintf (file, " %d", e->dest);
|
||||
|
||||
fprintf (file, ") \n");
|
||||
fprintf (file, ")\n");
|
||||
print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
|
||||
fprintf (file, ")\n");
|
||||
}
|
||||
@ -4976,16 +4976,27 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
|
||||
free (bbs);
|
||||
}
|
||||
|
||||
/* Returns true when STMT is an assignment that contains a data
|
||||
reference on its LHS with a stride of the same size as its unit
|
||||
type. */
|
||||
/* Returns true when the statement at STMT is of the form "A[i] = 0"
|
||||
that contains a data reference on its LHS with a stride of the same
|
||||
size as its unit type. */
|
||||
|
||||
static bool
|
||||
mem_write_stride_of_same_size_as_unit_type_p (gimple stmt)
|
||||
bool
|
||||
stmt_with_adjacent_zero_store_dr_p (gimple stmt)
|
||||
{
|
||||
struct data_reference *dr = XCNEW (struct data_reference);
|
||||
tree op0 = gimple_assign_lhs (stmt);
|
||||
tree op0, op1;
|
||||
bool res;
|
||||
struct data_reference *dr;
|
||||
|
||||
if (!stmt
|
||||
|| !gimple_vdef (stmt)
|
||||
|| !is_gimple_assign (stmt)
|
||||
|| !gimple_assign_single_p (stmt)
|
||||
|| !(op1 = gimple_assign_rhs1 (stmt))
|
||||
|| !(integer_zerop (op1) || real_zerop (op1)))
|
||||
return false;
|
||||
|
||||
dr = XCNEW (struct data_reference);
|
||||
op0 = gimple_assign_lhs (stmt);
|
||||
|
||||
DR_STMT (dr) = stmt;
|
||||
DR_REF (dr) = op0;
|
||||
@ -5007,18 +5018,12 @@ stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
|
||||
basic_block bb;
|
||||
gimple_stmt_iterator si;
|
||||
gimple stmt;
|
||||
tree op;
|
||||
basic_block *bbs = get_loop_body_in_dom_order (loop);
|
||||
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
|
||||
if ((stmt = gsi_stmt (si))
|
||||
&& gimple_vdef (stmt)
|
||||
&& is_gimple_assign (stmt)
|
||||
&& gimple_assign_rhs_code (stmt) == INTEGER_CST
|
||||
&& (op = gimple_assign_rhs1 (stmt))
|
||||
&& (integer_zerop (op) || real_zerop (op))
|
||||
&& mem_write_stride_of_same_size_as_unit_type_p (stmt))
|
||||
&& stmt_with_adjacent_zero_store_dr_p (stmt))
|
||||
VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
|
||||
|
||||
free (bbs);
|
||||
|
@ -602,6 +602,7 @@ void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **);
|
||||
void remove_similar_memory_refs (VEC (gimple, heap) **);
|
||||
bool rdg_defs_used_in_other_loops_p (struct graph *, int);
|
||||
bool have_similar_memory_accesses (gimple, gimple);
|
||||
bool stmt_with_adjacent_zero_store_dr_p (gimple);
|
||||
|
||||
/* Returns true when STRIDE is equal in absolute value to the size of
|
||||
the unit type of TYPE. */
|
||||
|
@ -241,7 +241,7 @@ build_size_arg_loc (location_t loc, tree nb_iter, tree op,
|
||||
|
||||
/* Generate a call to memset. Return true when the operation succeeded. */
|
||||
|
||||
static bool
|
||||
static void
|
||||
generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
|
||||
gimple_stmt_iterator bsi)
|
||||
{
|
||||
@ -255,11 +255,8 @@ generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
|
||||
|
||||
DR_STMT (dr) = stmt;
|
||||
DR_REF (dr) = op0;
|
||||
if (!dr_analyze_innermost (dr))
|
||||
goto end;
|
||||
|
||||
if (!stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)))
|
||||
goto end;
|
||||
res = dr_analyze_innermost (dr);
|
||||
gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
|
||||
|
||||
nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
|
||||
addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
|
||||
@ -286,14 +283,11 @@ generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
|
||||
fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
|
||||
gimple_seq_add_stmt (&stmt_list, fn_call);
|
||||
gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
|
||||
res = true;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "generated memset zero\n");
|
||||
|
||||
end:
|
||||
free_data_ref (dr);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Tries to generate a builtin function for the instructions of LOOP
|
||||
@ -307,7 +301,6 @@ generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
|
||||
unsigned i, x = 0;
|
||||
basic_block *bbs;
|
||||
gimple write = NULL;
|
||||
tree op0, op1;
|
||||
gimple_stmt_iterator bsi;
|
||||
tree nb_iter = number_of_exit_cond_executions (loop);
|
||||
|
||||
@ -343,26 +336,17 @@ generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
|
||||
}
|
||||
}
|
||||
|
||||
if (!write)
|
||||
goto end;
|
||||
|
||||
op0 = gimple_assign_lhs (write);
|
||||
op1 = gimple_assign_rhs1 (write);
|
||||
|
||||
if (!(TREE_CODE (op0) == ARRAY_REF
|
||||
|| TREE_CODE (op0) == MEM_REF))
|
||||
if (!stmt_with_adjacent_zero_store_dr_p (write))
|
||||
goto end;
|
||||
|
||||
/* The new statements will be placed before LOOP. */
|
||||
bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
|
||||
|
||||
if (gimple_assign_rhs_code (write) == INTEGER_CST
|
||||
&& (integer_zerop (op1) || real_zerop (op1)))
|
||||
res = generate_memset_zero (write, op0, nb_iter, bsi);
|
||||
generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
|
||||
res = true;
|
||||
|
||||
/* If this is the last partition for which we generate code, we have
|
||||
to destroy the loop. */
|
||||
if (res && !copy_p)
|
||||
if (!copy_p)
|
||||
{
|
||||
unsigned nbbs = loop->num_nodes;
|
||||
edge exit = single_exit (loop);
|
||||
@ -504,24 +488,6 @@ has_upstream_mem_writes (int u)
|
||||
static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
|
||||
bitmap, bool *);
|
||||
|
||||
/* Flag all the uses of U. */
|
||||
|
||||
static void
|
||||
rdg_flag_all_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
|
||||
bitmap processed, bool *part_has_writes)
|
||||
{
|
||||
struct graph_edge *e;
|
||||
|
||||
for (e = rdg->vertices[u].succ; e; e = e->succ_next)
|
||||
if (!bitmap_bit_p (processed, e->dest))
|
||||
{
|
||||
rdg_flag_vertex_and_dependent (rdg, e->dest, partition, loops,
|
||||
processed, part_has_writes);
|
||||
rdg_flag_all_uses (rdg, e->dest, partition, loops, processed,
|
||||
part_has_writes);
|
||||
}
|
||||
}
|
||||
|
||||
/* Flag the uses of U stopping following the information from
|
||||
upstream_mem_writes. */
|
||||
|
||||
@ -689,68 +655,13 @@ rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition,
|
||||
}
|
||||
}
|
||||
|
||||
/* Flag all the nodes of RDG containing memory accesses that could
|
||||
potentially belong to arrays already accessed in the current
|
||||
PARTITION. */
|
||||
|
||||
static void
|
||||
rdg_flag_similar_memory_accesses (struct graph *rdg, bitmap partition,
|
||||
bitmap loops, bitmap processed,
|
||||
VEC (int, heap) **other_stores)
|
||||
{
|
||||
bool foo;
|
||||
unsigned i, n;
|
||||
int j, k, kk;
|
||||
bitmap_iterator ii;
|
||||
struct graph_edge *e;
|
||||
|
||||
EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
|
||||
if (RDG_MEM_WRITE_STMT (rdg, i)
|
||||
|| RDG_MEM_READS_STMT (rdg, i))
|
||||
{
|
||||
for (j = 0; j < rdg->n_vertices; j++)
|
||||
if (!bitmap_bit_p (processed, j)
|
||||
&& (RDG_MEM_WRITE_STMT (rdg, j)
|
||||
|| RDG_MEM_READS_STMT (rdg, j))
|
||||
&& rdg_has_similar_memory_accesses (rdg, i, j))
|
||||
{
|
||||
/* Flag first the node J itself, and all the nodes that
|
||||
are needed to compute J. */
|
||||
rdg_flag_vertex_and_dependent (rdg, j, partition, loops,
|
||||
processed, &foo);
|
||||
|
||||
/* When J is a read, we want to coalesce in the same
|
||||
PARTITION all the nodes that are using J: this is
|
||||
needed for better cache locality. */
|
||||
rdg_flag_all_uses (rdg, j, partition, loops, processed, &foo);
|
||||
|
||||
/* Remove from OTHER_STORES the vertex that we flagged. */
|
||||
if (RDG_MEM_WRITE_STMT (rdg, j))
|
||||
FOR_EACH_VEC_ELT (int, *other_stores, k, kk)
|
||||
if (kk == j)
|
||||
{
|
||||
VEC_unordered_remove (int, *other_stores, k);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* If the node I has two uses, then keep these together in the
|
||||
same PARTITION. */
|
||||
for (n = 0, e = rdg->vertices[i].succ; e; e = e->succ_next, n++);
|
||||
|
||||
if (n > 1)
|
||||
rdg_flag_all_uses (rdg, i, partition, loops, processed, &foo);
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns a bitmap in which all the statements needed for computing
|
||||
the strongly connected component C of the RDG are flagged, also
|
||||
including the loop exit conditions. */
|
||||
|
||||
static bitmap
|
||||
build_rdg_partition_for_component (struct graph *rdg, rdgc c,
|
||||
bool *part_has_writes,
|
||||
VEC (int, heap) **other_stores)
|
||||
bool *part_has_writes)
|
||||
{
|
||||
int i, v;
|
||||
bitmap partition = BITMAP_ALLOC (NULL);
|
||||
@ -762,14 +673,6 @@ build_rdg_partition_for_component (struct graph *rdg, rdgc c,
|
||||
rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
|
||||
part_has_writes);
|
||||
|
||||
/* Also iterate on the array of stores not in the starting vertices,
|
||||
and determine those vertices that have some memory affinity with
|
||||
the current nodes in the component: these are stores to the same
|
||||
arrays, i.e. we're taking care of cache locality. */
|
||||
if (!flag_tree_loop_distribute_patterns)
|
||||
rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
|
||||
other_stores);
|
||||
|
||||
rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
|
||||
|
||||
BITMAP_FREE (processed);
|
||||
@ -832,6 +735,79 @@ rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
|
||||
BITMAP_FREE (saved_components);
|
||||
}
|
||||
|
||||
/* Returns true when it is possible to generate a builtin pattern for
|
||||
the PARTITION of RDG. For the moment we detect only the memset
|
||||
zero pattern. */
|
||||
|
||||
static bool
|
||||
can_generate_builtin (struct graph *rdg, bitmap partition)
|
||||
{
|
||||
unsigned i;
|
||||
bitmap_iterator bi;
|
||||
int nb_reads = 0;
|
||||
int nb_writes = 0;
|
||||
int stores_zero = 0;
|
||||
|
||||
EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi)
|
||||
if (RDG_MEM_READS_STMT (rdg, i))
|
||||
nb_reads++;
|
||||
else if (RDG_MEM_WRITE_STMT (rdg, i))
|
||||
{
|
||||
nb_writes++;
|
||||
if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg, i)))
|
||||
stores_zero++;
|
||||
}
|
||||
|
||||
return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
|
||||
}
|
||||
|
||||
/* Returns true when PARTITION1 and PARTITION2 have similar memory
|
||||
accesses in RDG. */
|
||||
|
||||
static bool
|
||||
similar_memory_accesses (struct graph *rdg, bitmap partition1,
|
||||
bitmap partition2)
|
||||
{
|
||||
unsigned i, j;
|
||||
bitmap_iterator bi, bj;
|
||||
|
||||
EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi)
|
||||
if (RDG_MEM_WRITE_STMT (rdg, i)
|
||||
|| RDG_MEM_READS_STMT (rdg, i))
|
||||
EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj)
|
||||
if (RDG_MEM_WRITE_STMT (rdg, j)
|
||||
|| RDG_MEM_READS_STMT (rdg, j))
|
||||
if (rdg_has_similar_memory_accesses (rdg, i, j))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Fuse all the partitions from PARTITIONS that contain similar memory
|
||||
references, i.e., we're taking care of cache locality. This
|
||||
function does not fuse those partitions that contain patterns that
|
||||
can be code generated with builtins. */
|
||||
|
||||
static void
|
||||
fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
|
||||
VEC (bitmap, heap) **partitions)
|
||||
{
|
||||
int p1, p2;
|
||||
bitmap partition1, partition2;
|
||||
|
||||
FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1)
|
||||
if (!can_generate_builtin (rdg, partition1))
|
||||
FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2)
|
||||
if (p1 != p2
|
||||
&& !can_generate_builtin (rdg, partition2)
|
||||
&& similar_memory_accesses (rdg, partition1, partition2))
|
||||
{
|
||||
bitmap_ior_into (partition1, partition2);
|
||||
VEC_ordered_remove (bitmap, *partitions, p2);
|
||||
p2--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Aggregate several components into a useful partition that is
|
||||
registered in the PARTITIONS vector. Partitions will be
|
||||
distributed in different loops. */
|
||||
@ -854,8 +830,7 @@ rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
|
||||
if (bitmap_bit_p (processed, v))
|
||||
continue;
|
||||
|
||||
np = build_rdg_partition_for_component (rdg, x, &part_has_writes,
|
||||
other_stores);
|
||||
np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
|
||||
bitmap_ior_into (partition, np);
|
||||
bitmap_ior_into (processed, np);
|
||||
BITMAP_FREE (np);
|
||||
@ -901,6 +876,8 @@ rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
|
||||
VEC_safe_push (bitmap, heap, *partitions, partition);
|
||||
else
|
||||
BITMAP_FREE (partition);
|
||||
|
||||
fuse_partitions_with_similar_memory_accesses (rdg, partitions);
|
||||
}
|
||||
|
||||
/* Dump to FILE the PARTITIONS. */
|
||||
|
Loading…
x
Reference in New Issue
Block a user