diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9b4be737ab47..038980644aee 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2010-12-10 Sebastian Pop + + PR tree-optimization/43023 + * tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): + Removed. + (stores_zero_from_loop): Call stmt_stores_zero. + * tree-data-ref.h (stmt_stores_zero): New. + * tree-loop-distribution.c (generate_memset_zero): Do not return a + boolean. Call gcc_assert on stride_of_unit_type_p. + (generate_builtin): Call stmt_stores_zero. + (rdg_flag_all_uses): Removed. + (rdg_flag_similar_memory_accesses): Removed. + (build_rdg_partition_for_component): Removed parameter + other_stores. Removed call to rdg_flag_similar_memory_accesses. + (can_generate_builtin): New. + (similar_memory_accesses): New. + (fuse_partitions_with_similar_memory_accesses): New. + (rdg_build_partitions): Call + fuse_partitions_with_similar_memory_accesses. + 2010-12-10 Jakub Jelinek PR rtl-optimization/46804 @@ -108,9 +128,9 @@ (abshi2): Delete. (neghi2, negqi2): Use PDPint iterator. * config/pdp11/pdp11.c (find_addr_reg, output_move_double, - output_move_quad): Delete. + output_move_quad): Delete. (pdp11_expand_operands, output_move_multiple): New functions. - + 2010-12-09 Joseph Myers * config/vax/linux.h (WCHAR_TYPE, WCHAR_TYPE_SIZE): Define. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 47022afe9c9b..7bb46f3a7b5f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2010-12-10 Sebastian Pop + + PR tree-optimization/43023 + * gfortran.dg/ldist-1.f90: Adjust pattern. + * gfortran.dg/ldist-pr43023.f90: New. + 2010-12-10 Jakub Jelinek PR rtl-optimization/46804 @@ -45,8 +51,8 @@ * obj-c++.dg/class-extension-3.mm: New. * obj-c++.dg/property/at-property-26.mm: New. * obj-c++.dg/property/at-property-27.mm: New. - * obj-c++.dg/property/at-property-28.mm: New. - + * obj-c++.dg/property/at-property-28.mm: New. + 2010-12-09 John David Anglin PR target/46057 @@ -113,12 +119,12 @@ * obj-c++.dg/exceptions-7.mm: New. * obj-c++.dg/exceptions-3.mm: Adjust for new C++ messages. * obj-c++.dg/exceptions-5.mm: Same change. - + 2010-12-08 Nicola Pero * objc.dg/foreach-6.m: Updated location of error messages. * objc.dg/foreach-7.m: Same change. - + 2010-12-08 Richard Guenther Sebastian Pop diff --git a/gcc/testsuite/gfortran.dg/ldist-1.f90 b/gcc/testsuite/gfortran.dg/ldist-1.f90 index dd1f02a176b1..bbce2f355e12 100644 --- a/gcc/testsuite/gfortran.dg/ldist-1.f90 +++ b/gcc/testsuite/gfortran.dg/ldist-1.f90 @@ -29,5 +29,8 @@ Subroutine PADEC(DKS,DKDS,HVAR,WM,WG,FN,NS,AN,BN,CN,IT) return end Subroutine PADEC -! { dg-final { scan-tree-dump-times "distributed: split to 4 loops" 1 "ldist" } } +! There are 5 legal partitions in this code. Based on the data +! locality heuristic, this loop should not be split. + +! { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } } ! { dg-final { cleanup-tree-dump "ldist" } } diff --git a/gcc/testsuite/gfortran.dg/ldist-pr43023.f90 b/gcc/testsuite/gfortran.dg/ldist-pr43023.f90 new file mode 100644 index 000000000000..3e2d04c94901 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/ldist-pr43023.f90 @@ -0,0 +1,31 @@ +! { dg-do compile } +! { dg-options "-O2 -ftree-loop-distribution" } + +MODULE NFT_mod + +implicit none +integer :: Nangle +real:: Z0 +real, dimension(:,:), allocatable :: Angle +real, dimension(:), allocatable :: exth, ezth, hxth, hyth, hyphi + +CONTAINS + +SUBROUTINE NFT_Init() + +real :: th, fi +integer :: n + +do n = 1,Nangle + th = Angle(n,1) + fi = Angle(n,2) + + exth(n) = cos(fi)*cos(th) + ezth(n) = -sin(th) + hxth(n) = -sin(fi) + hyth(n) = cos(fi) + hyphi(n) = -sin(fi) +end do +END SUBROUTINE NFT_Init + +END MODULE NFT_mod diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 4dfcd5cafd4d..9a813702a6a1 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -4509,7 +4509,7 @@ dump_rdg_vertex (FILE *file, struct graph *rdg, int i) for (e = v->succ; e; e = e->succ_next) fprintf (file, " %d", e->dest); - fprintf (file, ") \n"); + fprintf (file, ")\n"); print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS); fprintf (file, ")\n"); } @@ -4976,16 +4976,27 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) free (bbs); } -/* Returns true when STMT is an assignment that contains a data - reference on its LHS with a stride of the same size as its unit - type. */ +/* Returns true when the statement at STMT is of the form "A[i] = 0" + that contains a data reference on its LHS with a stride of the same + size as its unit type. */ -static bool -mem_write_stride_of_same_size_as_unit_type_p (gimple stmt) +bool +stmt_with_adjacent_zero_store_dr_p (gimple stmt) { - struct data_reference *dr = XCNEW (struct data_reference); - tree op0 = gimple_assign_lhs (stmt); + tree op0, op1; bool res; + struct data_reference *dr; + + if (!stmt + || !gimple_vdef (stmt) + || !is_gimple_assign (stmt) + || !gimple_assign_single_p (stmt) + || !(op1 = gimple_assign_rhs1 (stmt)) + || !(integer_zerop (op1) || real_zerop (op1))) + return false; + + dr = XCNEW (struct data_reference); + op0 = gimple_assign_lhs (stmt); DR_STMT (dr) = stmt; DR_REF (dr) = op0; @@ -5007,18 +5018,12 @@ stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) basic_block bb; gimple_stmt_iterator si; gimple stmt; - tree op; basic_block *bbs = get_loop_body_in_dom_order (loop); for (i = 0; i < loop->num_nodes; i++) for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) if ((stmt = gsi_stmt (si)) - && gimple_vdef (stmt) - && is_gimple_assign (stmt) - && gimple_assign_rhs_code (stmt) == INTEGER_CST - && (op = gimple_assign_rhs1 (stmt)) - && (integer_zerop (op) || real_zerop (op)) - && mem_write_stride_of_same_size_as_unit_type_p (stmt)) + && stmt_with_adjacent_zero_store_dr_p (stmt)) VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si)); free (bbs); diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index d929f315bf1b..b4f317f99ed8 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -602,6 +602,7 @@ void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **); void remove_similar_memory_refs (VEC (gimple, heap) **); bool rdg_defs_used_in_other_loops_p (struct graph *, int); bool have_similar_memory_accesses (gimple, gimple); +bool stmt_with_adjacent_zero_store_dr_p (gimple); /* Returns true when STRIDE is equal in absolute value to the size of the unit type of TYPE. */ diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 357f51fe2756..b60320945d46 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -241,7 +241,7 @@ build_size_arg_loc (location_t loc, tree nb_iter, tree op, /* Generate a call to memset. Return true when the operation succeeded. */ -static bool +static void generate_memset_zero (gimple stmt, tree op0, tree nb_iter, gimple_stmt_iterator bsi) { @@ -255,11 +255,8 @@ generate_memset_zero (gimple stmt, tree op0, tree nb_iter, DR_STMT (dr) = stmt; DR_REF (dr) = op0; - if (!dr_analyze_innermost (dr)) - goto end; - - if (!stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))) - goto end; + res = dr_analyze_innermost (dr); + gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))); nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); @@ -286,14 +283,11 @@ generate_memset_zero (gimple stmt, tree op0, tree nb_iter, fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes); gimple_seq_add_stmt (&stmt_list, fn_call); gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING); - res = true; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "generated memset zero\n"); - end: free_data_ref (dr); - return res; } /* Tries to generate a builtin function for the instructions of LOOP @@ -307,7 +301,6 @@ generate_builtin (struct loop *loop, bitmap partition, bool copy_p) unsigned i, x = 0; basic_block *bbs; gimple write = NULL; - tree op0, op1; gimple_stmt_iterator bsi; tree nb_iter = number_of_exit_cond_executions (loop); @@ -343,26 +336,17 @@ generate_builtin (struct loop *loop, bitmap partition, bool copy_p) } } - if (!write) - goto end; - - op0 = gimple_assign_lhs (write); - op1 = gimple_assign_rhs1 (write); - - if (!(TREE_CODE (op0) == ARRAY_REF - || TREE_CODE (op0) == MEM_REF)) + if (!stmt_with_adjacent_zero_store_dr_p (write)) goto end; /* The new statements will be placed before LOOP. */ bsi = gsi_last_bb (loop_preheader_edge (loop)->src); - - if (gimple_assign_rhs_code (write) == INTEGER_CST - && (integer_zerop (op1) || real_zerop (op1))) - res = generate_memset_zero (write, op0, nb_iter, bsi); + generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi); + res = true; /* If this is the last partition for which we generate code, we have to destroy the loop. */ - if (res && !copy_p) + if (!copy_p) { unsigned nbbs = loop->num_nodes; edge exit = single_exit (loop); @@ -504,24 +488,6 @@ has_upstream_mem_writes (int u) static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap, bitmap, bool *); -/* Flag all the uses of U. */ - -static void -rdg_flag_all_uses (struct graph *rdg, int u, bitmap partition, bitmap loops, - bitmap processed, bool *part_has_writes) -{ - struct graph_edge *e; - - for (e = rdg->vertices[u].succ; e; e = e->succ_next) - if (!bitmap_bit_p (processed, e->dest)) - { - rdg_flag_vertex_and_dependent (rdg, e->dest, partition, loops, - processed, part_has_writes); - rdg_flag_all_uses (rdg, e->dest, partition, loops, processed, - part_has_writes); - } -} - /* Flag the uses of U stopping following the information from upstream_mem_writes. */ @@ -689,68 +655,13 @@ rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition, } } -/* Flag all the nodes of RDG containing memory accesses that could - potentially belong to arrays already accessed in the current - PARTITION. */ - -static void -rdg_flag_similar_memory_accesses (struct graph *rdg, bitmap partition, - bitmap loops, bitmap processed, - VEC (int, heap) **other_stores) -{ - bool foo; - unsigned i, n; - int j, k, kk; - bitmap_iterator ii; - struct graph_edge *e; - - EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii) - if (RDG_MEM_WRITE_STMT (rdg, i) - || RDG_MEM_READS_STMT (rdg, i)) - { - for (j = 0; j < rdg->n_vertices; j++) - if (!bitmap_bit_p (processed, j) - && (RDG_MEM_WRITE_STMT (rdg, j) - || RDG_MEM_READS_STMT (rdg, j)) - && rdg_has_similar_memory_accesses (rdg, i, j)) - { - /* Flag first the node J itself, and all the nodes that - are needed to compute J. */ - rdg_flag_vertex_and_dependent (rdg, j, partition, loops, - processed, &foo); - - /* When J is a read, we want to coalesce in the same - PARTITION all the nodes that are using J: this is - needed for better cache locality. */ - rdg_flag_all_uses (rdg, j, partition, loops, processed, &foo); - - /* Remove from OTHER_STORES the vertex that we flagged. */ - if (RDG_MEM_WRITE_STMT (rdg, j)) - FOR_EACH_VEC_ELT (int, *other_stores, k, kk) - if (kk == j) - { - VEC_unordered_remove (int, *other_stores, k); - break; - } - } - - /* If the node I has two uses, then keep these together in the - same PARTITION. */ - for (n = 0, e = rdg->vertices[i].succ; e; e = e->succ_next, n++); - - if (n > 1) - rdg_flag_all_uses (rdg, i, partition, loops, processed, &foo); - } -} - /* Returns a bitmap in which all the statements needed for computing the strongly connected component C of the RDG are flagged, also including the loop exit conditions. */ static bitmap build_rdg_partition_for_component (struct graph *rdg, rdgc c, - bool *part_has_writes, - VEC (int, heap) **other_stores) + bool *part_has_writes) { int i, v; bitmap partition = BITMAP_ALLOC (NULL); @@ -762,14 +673,6 @@ build_rdg_partition_for_component (struct graph *rdg, rdgc c, rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed, part_has_writes); - /* Also iterate on the array of stores not in the starting vertices, - and determine those vertices that have some memory affinity with - the current nodes in the component: these are stores to the same - arrays, i.e. we're taking care of cache locality. */ - if (!flag_tree_loop_distribute_patterns) - rdg_flag_similar_memory_accesses (rdg, partition, loops, processed, - other_stores); - rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes); BITMAP_FREE (processed); @@ -832,6 +735,79 @@ rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices, BITMAP_FREE (saved_components); } +/* Returns true when it is possible to generate a builtin pattern for + the PARTITION of RDG. For the moment we detect only the memset + zero pattern. */ + +static bool +can_generate_builtin (struct graph *rdg, bitmap partition) +{ + unsigned i; + bitmap_iterator bi; + int nb_reads = 0; + int nb_writes = 0; + int stores_zero = 0; + + EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi) + if (RDG_MEM_READS_STMT (rdg, i)) + nb_reads++; + else if (RDG_MEM_WRITE_STMT (rdg, i)) + { + nb_writes++; + if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg, i))) + stores_zero++; + } + + return stores_zero == 1 && nb_writes == 1 && nb_reads == 0; +} + +/* Returns true when PARTITION1 and PARTITION2 have similar memory + accesses in RDG. */ + +static bool +similar_memory_accesses (struct graph *rdg, bitmap partition1, + bitmap partition2) +{ + unsigned i, j; + bitmap_iterator bi, bj; + + EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi) + if (RDG_MEM_WRITE_STMT (rdg, i) + || RDG_MEM_READS_STMT (rdg, i)) + EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj) + if (RDG_MEM_WRITE_STMT (rdg, j) + || RDG_MEM_READS_STMT (rdg, j)) + if (rdg_has_similar_memory_accesses (rdg, i, j)) + return true; + + return false; +} + +/* Fuse all the partitions from PARTITIONS that contain similar memory + references, i.e., we're taking care of cache locality. This + function does not fuse those partitions that contain patterns that + can be code generated with builtins. */ + +static void +fuse_partitions_with_similar_memory_accesses (struct graph *rdg, + VEC (bitmap, heap) **partitions) +{ + int p1, p2; + bitmap partition1, partition2; + + FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1) + if (!can_generate_builtin (rdg, partition1)) + FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2) + if (p1 != p2 + && !can_generate_builtin (rdg, partition2) + && similar_memory_accesses (rdg, partition1, partition2)) + { + bitmap_ior_into (partition1, partition2); + VEC_ordered_remove (bitmap, *partitions, p2); + p2--; + } +} + /* Aggregate several components into a useful partition that is registered in the PARTITIONS vector. Partitions will be distributed in different loops. */ @@ -854,8 +830,7 @@ rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components, if (bitmap_bit_p (processed, v)) continue; - np = build_rdg_partition_for_component (rdg, x, &part_has_writes, - other_stores); + np = build_rdg_partition_for_component (rdg, x, &part_has_writes); bitmap_ior_into (partition, np); bitmap_ior_into (processed, np); BITMAP_FREE (np); @@ -901,6 +876,8 @@ rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components, VEC_safe_push (bitmap, heap, *partitions, partition); else BITMAP_FREE (partition); + + fuse_partitions_with_similar_memory_accesses (rdg, partitions); } /* Dump to FILE the PARTITIONS. */