diff --git a/gcc/function.h b/gcc/function.h index d55cbddd0b51..dbe8a585eee1 100644 --- a/gcc/function.h +++ b/gcc/function.h @@ -269,6 +269,13 @@ struct GTY(()) function { /* Value histograms attached to particular statements. */ htab_t GTY((skip)) value_histograms; + /* Different from normal TODO_flags which are handled right at the + beginning or the end of one pass execution, the pending_TODOs + are passed down in the pipeline until one of its consumers can + perform the requested action. Consumers should then clear the + flags for the actions that they have taken. */ + unsigned int pending_TODOs; + /* For function.c. */ /* Points to the FUNCTION_DECL of this function. */ diff --git a/gcc/passes.c b/gcc/passes.c index 079ad1a88f7f..f71f63918f40 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -731,7 +731,54 @@ make_pass_late_compilation (gcc::context *ctxt) return new pass_late_compilation (ctxt); } +/* Pre-SLP scalar cleanup, it has several cleanup passes like FRE, DSE. */ +namespace { + +const pass_data pass_data_pre_slp_scalar_cleanup = +{ + GIMPLE_PASS, /* type */ + "*pre_slp_scalar_cleanup", /* name */ + OPTGROUP_LOOP, /* optinfo_flags */ + TV_SCALAR_CLEANUP, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_pre_slp_scalar_cleanup : public gimple_opt_pass +{ +public: + pass_pre_slp_scalar_cleanup (gcc::context *ctxt) + : gimple_opt_pass (pass_data_pre_slp_scalar_cleanup, ctxt) + { + } + + virtual bool + gate (function *fun) + { + return flag_tree_slp_vectorize + && (fun->pending_TODOs & PENDING_TODO_force_next_scalar_cleanup); + } + + virtual unsigned int + execute (function *fun) + { + fun->pending_TODOs &= ~PENDING_TODO_force_next_scalar_cleanup; + return 0; + } + +}; // class pass_pre_slp_scalar_cleanup + +} // anon namespace + +gimple_opt_pass * +make_pass_pre_slp_scalar_cleanup (gcc::context *ctxt) +{ + return new pass_pre_slp_scalar_cleanup (ctxt); +} /* Set the static pass number of pass PASS to ID and record that in the mapping from static pass number to pass. */ diff --git a/gcc/passes.def b/gcc/passes.def index 4ca04de3a742..c68231287b67 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -289,11 +289,16 @@ along with GCC; see the file COPYING3. If not see /* pass_vectorize must immediately follow pass_if_conversion. Please do not add any other passes in between. */ NEXT_PASS (pass_vectorize); - PUSH_INSERT_PASSES_WITHIN (pass_vectorize) + PUSH_INSERT_PASSES_WITHIN (pass_vectorize) NEXT_PASS (pass_dce); - POP_INSERT_PASSES () - NEXT_PASS (pass_predcom); + POP_INSERT_PASSES () + NEXT_PASS (pass_predcom); NEXT_PASS (pass_complete_unroll); + NEXT_PASS (pass_pre_slp_scalar_cleanup); + PUSH_INSERT_PASSES_WITHIN (pass_pre_slp_scalar_cleanup) + NEXT_PASS (pass_fre, false /* may_iterate */); + NEXT_PASS (pass_dse); + POP_INSERT_PASSES () NEXT_PASS (pass_slp_vectorize); NEXT_PASS (pass_loop_prefetch); /* Run IVOPTs after the last pass that uses data-reference analysis diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96789.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96789.c new file mode 100644 index 000000000000..d6139a014d82 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96789.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -funroll-loops -ftree-vectorize -fdump-tree-dse-details" } */ + +/* Test if scalar cleanup pass takes effects, mainly check + its secondary pass DSE can remove dead stores on array + tmp. */ + +#include "stdint.h" + +static inline void +foo (int16_t *diff, int i_size, uint8_t *val1, int i_val1, uint8_t *val2, + int i_val2) +{ + for (int y = 0; y < i_size; y++) + { + for (int x = 0; x < i_size; x++) + diff[x + y * i_size] = val1[x] - val2[x]; + val1 += i_val1; + val2 += i_val2; + } +} + +void +bar (int16_t res[16], uint8_t *val1, uint8_t *val2) +{ + int16_t d[16]; + int16_t tmp[16]; + + foo (d, 4, val1, 16, val2, 32); + + for (int i = 0; i < 4; i++) + { + int s03 = d[i * 4 + 0] + d[i * 4 + 3]; + int s12 = d[i * 4 + 1] + d[i * 4 + 2]; + int d03 = d[i * 4 + 0] - d[i * 4 + 3]; + int d12 = d[i * 4 + 1] - d[i * 4 + 2]; + + tmp[0 * 4 + i] = s03 + s12; + tmp[1 * 4 + i] = 2 * d03 + d12; + tmp[2 * 4 + i] = s03 - s12; + tmp[3 * 4 + i] = d03 - 2 * d12; + } + + for (int i = 0; i < 4; i++) + { + int s03 = tmp[i * 4 + 0] + tmp[i * 4 + 3]; + int s12 = tmp[i * 4 + 1] + tmp[i * 4 + 2]; + int d03 = tmp[i * 4 + 0] - tmp[i * 4 + 3]; + int d12 = tmp[i * 4 + 1] - tmp[i * 4 + 2]; + + res[i * 4 + 0] = s03 + s12; + res[i * 4 + 1] = 2 * d03 + d12; + res[i * 4 + 2] = s03 - s12; + res[i * 4 + 3] = d03 - 2 * d12; + } +} + +/* { dg-final { scan-tree-dump {Deleted dead store:.*tmp} "dse3" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c index d3a1bbc5ce53..b81cabe604a5 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c @@ -17,5 +17,5 @@ int foo (int *p, int b) /* { dg-final { scan-tree-dump-not "Deleted dead store" "dse1"} } */ /* { dg-final { scan-tree-dump-not "Deleted dead store" "dse2"} } */ -/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse3"} } */ +/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse4"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-29.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-29.c index 31529e7caedb..f4ef89c590cd 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-29.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-29.c @@ -22,5 +22,5 @@ foo(int cond, struct z *s) /* { dg-final { scan-tree-dump-times "Deleted dead store" 3 "dse1"} } */ /* { dg-final { scan-tree-dump-not "Deleted dead store" "dse2"} } */ -/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse3"} } */ +/* { dg-final { scan-tree-dump-not "Deleted dead store" "dse4"} } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c index 7de5ed1f5be3..72245115f305 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c @@ -10,7 +10,10 @@ foo (int *a, int *b) a[i] = b[0] + b[1] + b[i+1] + b[i+2]; } -void bar (int *a, int *b) +/* Disable pre-slp FRE to avoid unexpected SLP on the epilogue + of the 1st loop. */ +void __attribute__((optimize("-fno-tree-fre"))) +bar (int *a, int *b) { int i; for (i = 0; i < (ARR_SIZE - 2); ++i) diff --git a/gcc/timevar.def b/gcc/timevar.def index 08c21c04009e..a30317997007 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -194,6 +194,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching") DEFTIMEVAR (TV_LOOP_SPLIT , "loop splitting") DEFTIMEVAR (TV_LOOP_JAM , "unroll and jam") DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") +DEFTIMEVAR (TV_SCALAR_CLEANUP , "scalar cleanup") DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization") DEFTIMEVAR (TV_TREE_SLP_VECTORIZATION, "tree slp vectorization") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 3410ca9bc022..9cb22acc2439 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -310,6 +310,11 @@ protected: #define TODO_verify_all TODO_verify_il +/* To-do flags for pending_TODOs. */ + +/* Tell the next scalar cleanup pass that there is + work for it to do. */ +#define PENDING_TODO_force_next_scalar_cleanup (1 << 1) /* Register pass info. */ @@ -380,6 +385,7 @@ extern gimple_opt_pass *make_pass_simduid_cleanup (gcc::context *ctxt); extern gimple_opt_pass *make_pass_slp_vectorize (gcc::context *ctxt); extern gimple_opt_pass *make_pass_complete_unroll (gcc::context *ctxt); extern gimple_opt_pass *make_pass_complete_unrolli (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_pre_slp_scalar_cleanup (gcc::context *ctxt); extern gimple_opt_pass *make_pass_parallelize_loops (gcc::context *ctxt); extern gimple_opt_pass *make_pass_loop_prefetch (gcc::context *ctxt); extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt); diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index 5bb781dc7fad..33e15d448b2d 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -1406,6 +1406,9 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, bitmap_clear (father_bbs); bitmap_set_bit (father_bbs, loop_father->header->index); } + else if (unroll_outer) + /* Trigger scalar cleanup once any outermost loop gets unrolled. */ + cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup; return true; }