mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-01-12 01:54:54 +08:00
re PR libgomp/49490 (suboptimal load balancing in loops)
PR libgomp/49490 * omp-low.c (expand_omp_for_static_nochunk): Only use n ceil/ nthreads size for the first n % nthreads threads in the team instead of all threads except for the last few ones which get less work or none at all. * iter.c (gomp_iter_static_next): For chunk size 0 only use n ceil/ nthreads size for the first n % nthreads threads in the team instead of all threads except for the last few ones which get less work or none at all. * iter_ull.c (gomp_iter_ull_static_next): Likewise. * env.c (parse_schedule): If OMP_SCHEDULE doesn't have chunk argument, set run_sched_modifier to 0 for static resp. 1 for other kinds. If chunk argument is 0 and not static, set value to 1. From-SVN: r175315
This commit is contained in:
parent
4fb489e796
commit
fb79f500af
@ -1,5 +1,12 @@
|
||||
2011-06-22 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR libgomp/49490
|
||||
* omp-low.c (expand_omp_for_static_nochunk): Only
|
||||
use n ceil/ nthreads size for the first
|
||||
n % nthreads threads in the team instead of
|
||||
all threads except for the last few ones which
|
||||
get less work or none at all.
|
||||
|
||||
PR debug/49496
|
||||
* tree-vect-patterns.c (vect_recog_widen_mult_pattern): Ignore debug
|
||||
uses.
|
||||
|
@ -3,7 +3,7 @@
|
||||
marshalling to implement data sharing and copying clauses.
|
||||
Contributed by Diego Novillo <dnovillo@redhat.com>
|
||||
|
||||
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
|
||||
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
|
||||
Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
@ -4108,9 +4108,14 @@ expand_omp_for_generic (struct omp_region *region,
|
||||
else
|
||||
n = (adj + N2 - N1) / STEP;
|
||||
q = n / nthreads;
|
||||
q += (q * nthreads != n);
|
||||
s0 = q * threadid;
|
||||
e0 = min(s0 + q, n);
|
||||
tt = n % nthreads;
|
||||
if (threadid < tt) goto L3; else goto L4;
|
||||
L3:
|
||||
tt = 0;
|
||||
q = q + 1;
|
||||
L4:
|
||||
s0 = q * threadid + tt;
|
||||
e0 = s0 + q;
|
||||
V = s0 * STEP + N1;
|
||||
if (s0 >= e0) goto L2; else goto L0;
|
||||
L0:
|
||||
@ -4126,12 +4131,14 @@ static void
|
||||
expand_omp_for_static_nochunk (struct omp_region *region,
|
||||
struct omp_for_data *fd)
|
||||
{
|
||||
tree n, q, s0, e0, e, t, nthreads, threadid;
|
||||
tree n, q, s0, e0, e, t, tt, nthreads, threadid;
|
||||
tree type, itype, vmain, vback;
|
||||
basic_block entry_bb, exit_bb, seq_start_bb, body_bb, cont_bb;
|
||||
basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
|
||||
basic_block body_bb, cont_bb;
|
||||
basic_block fin_bb;
|
||||
gimple_stmt_iterator gsi;
|
||||
gimple stmt;
|
||||
edge ep;
|
||||
|
||||
itype = type = TREE_TYPE (fd->loop.v);
|
||||
if (POINTER_TYPE_P (type))
|
||||
@ -4185,19 +4192,39 @@ expand_omp_for_static_nochunk (struct omp_region *region,
|
||||
t = fold_convert (itype, t);
|
||||
n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
|
||||
|
||||
q = create_tmp_var (itype, "q");
|
||||
t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
|
||||
q = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
|
||||
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
|
||||
gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
|
||||
|
||||
t = fold_build2 (MULT_EXPR, itype, q, nthreads);
|
||||
t = fold_build2 (NE_EXPR, itype, t, n);
|
||||
t = fold_build2 (PLUS_EXPR, itype, q, t);
|
||||
q = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
|
||||
tt = create_tmp_var (itype, "tt");
|
||||
t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
|
||||
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
|
||||
gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
|
||||
|
||||
t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
|
||||
stmt = gimple_build_cond_empty (t);
|
||||
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
|
||||
|
||||
second_bb = split_block (entry_bb, stmt)->dest;
|
||||
gsi = gsi_last_bb (second_bb);
|
||||
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
|
||||
|
||||
gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
|
||||
GSI_SAME_STMT);
|
||||
stmt = gimple_build_assign_with_ops (PLUS_EXPR, q, q,
|
||||
build_int_cst (itype, 1));
|
||||
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
|
||||
|
||||
third_bb = split_block (second_bb, stmt)->dest;
|
||||
gsi = gsi_last_bb (third_bb);
|
||||
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
|
||||
|
||||
t = build2 (MULT_EXPR, itype, q, threadid);
|
||||
t = build2 (PLUS_EXPR, itype, t, tt);
|
||||
s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
|
||||
|
||||
t = fold_build2 (PLUS_EXPR, itype, s0, q);
|
||||
t = fold_build2 (MIN_EXPR, itype, t, n);
|
||||
e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
|
||||
|
||||
t = build2 (GE_EXPR, boolean_type_node, s0, e0);
|
||||
@ -4263,13 +4290,20 @@ expand_omp_for_static_nochunk (struct omp_region *region,
|
||||
gsi_remove (&gsi, true);
|
||||
|
||||
/* Connect all the blocks. */
|
||||
find_edge (entry_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
|
||||
find_edge (entry_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
|
||||
ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
|
||||
ep->probability = REG_BR_PROB_BASE / 4 * 3;
|
||||
ep = find_edge (entry_bb, second_bb);
|
||||
ep->flags = EDGE_TRUE_VALUE;
|
||||
ep->probability = REG_BR_PROB_BASE / 4;
|
||||
find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
|
||||
find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
|
||||
|
||||
find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE;
|
||||
find_edge (cont_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
|
||||
|
||||
set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, entry_bb);
|
||||
set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
|
||||
set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
|
||||
set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
|
||||
set_immediate_dominator (CDI_DOMINATORS, body_bb,
|
||||
recompute_dominator (CDI_DOMINATORS, body_bb));
|
||||
set_immediate_dominator (CDI_DOMINATORS, fin_bb,
|
||||
|
@ -1,3 +1,17 @@
|
||||
2011-06-22 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR libgomp/49490
|
||||
* iter.c (gomp_iter_static_next): For chunk size 0
|
||||
only use n ceil/ nthreads size for the first
|
||||
n % nthreads threads in the team instead of
|
||||
all threads except for the last few ones which
|
||||
get less work or none at all.
|
||||
* iter_ull.c (gomp_iter_ull_static_next): Likewise.
|
||||
* env.c (parse_schedule): If OMP_SCHEDULE doesn't have
|
||||
chunk argument, set run_sched_modifier to 0 for static
|
||||
resp. 1 for other kinds. If chunk argument is 0
|
||||
and not static, set value to 1.
|
||||
|
||||
2011-05-19 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR c++/49043
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
|
||||
/* Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
|
||||
Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
@ -108,7 +108,11 @@ parse_schedule (void)
|
||||
while (isspace ((unsigned char) *env))
|
||||
++env;
|
||||
if (*env == '\0')
|
||||
return;
|
||||
{
|
||||
gomp_global_icv.run_sched_modifier
|
||||
= gomp_global_icv.run_sched_var != GFS_STATIC;
|
||||
return;
|
||||
}
|
||||
if (*env++ != ',')
|
||||
goto unknown;
|
||||
while (isspace ((unsigned char) *env))
|
||||
@ -129,6 +133,8 @@ parse_schedule (void)
|
||||
if ((int)value != value)
|
||||
goto invalid;
|
||||
|
||||
if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC)
|
||||
value = 1;
|
||||
gomp_global_icv.run_sched_modifier = value;
|
||||
return;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2005, 2008, 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU OpenMP Library (libgomp).
|
||||
@ -59,7 +59,7 @@ gomp_iter_static_next (long *pstart, long *pend)
|
||||
trip through the outer loop. */
|
||||
if (ws->chunk_size == 0)
|
||||
{
|
||||
unsigned long n, q, i;
|
||||
unsigned long n, q, i, t;
|
||||
unsigned long s0, e0;
|
||||
long s, e;
|
||||
|
||||
@ -74,11 +74,14 @@ gomp_iter_static_next (long *pstart, long *pend)
|
||||
/* Compute the "zero-based" start and end points. That is, as
|
||||
if the loop began at zero and incremented by one. */
|
||||
q = n / nthreads;
|
||||
q += (q * nthreads != n);
|
||||
s0 = q * i;
|
||||
t = n % nthreads;
|
||||
if (i < t)
|
||||
{
|
||||
t = 0;
|
||||
q++;
|
||||
}
|
||||
s0 = q * i + t;
|
||||
e0 = s0 + q;
|
||||
if (e0 > n)
|
||||
e0 = n;
|
||||
|
||||
/* Notice when no iterations allocated for this thread. */
|
||||
if (s0 >= e0)
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2005, 2008, 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU OpenMP Library (libgomp).
|
||||
@ -60,7 +60,7 @@ gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
|
||||
trip through the outer loop. */
|
||||
if (ws->chunk_size_ull == 0)
|
||||
{
|
||||
gomp_ull n, q, i, s0, e0, s, e;
|
||||
gomp_ull n, q, i, t, s0, e0, s, e;
|
||||
|
||||
if (thr->ts.static_trip > 0)
|
||||
return 1;
|
||||
@ -75,11 +75,14 @@ gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
|
||||
/* Compute the "zero-based" start and end points. That is, as
|
||||
if the loop began at zero and incremented by one. */
|
||||
q = n / nthreads;
|
||||
q += (q * nthreads != n);
|
||||
s0 = q * i;
|
||||
t = n % nthreads;
|
||||
if (i < t)
|
||||
{
|
||||
t = 0;
|
||||
q++;
|
||||
}
|
||||
s0 = q * i + t;
|
||||
e0 = s0 + q;
|
||||
if (e0 > n)
|
||||
e0 = n;
|
||||
|
||||
/* Notice when no iterations allocated for this thread. */
|
||||
if (s0 >= e0)
|
||||
|
Loading…
Reference in New Issue
Block a user