Merged in rmlarsen/eigen2 (pull request PR-409)

Fix oversharding bug in parallelFor.
This commit is contained in:
Benoit Steiner 2018-06-21 18:34:57 +00:00
commit b6ffcd22e3

View File

@ -189,9 +189,11 @@ struct ThreadPoolDevice {
// of blocks to be evenly dividable across threads.
double block_size_f = 1.0 / CostModel::taskSize(1, cost);
Index block_size = numext::mini(n, numext::maxi<Index>(1, block_size_f));
const Index max_block_size =
numext::mini(n, numext::maxi<Index>(1, 2 * block_size_f));
const Index max_oversharding_factor = 4;
Index block_size = numext::mini(
n, numext::maxi<Index>(divup<Index>(n, max_oversharding_factor * numThreads()),
block_size_f));
const Index max_block_size = numext::mini(n, 2 * block_size);
if (block_align) {
Index new_block_size = block_align(block_size);
eigen_assert(new_block_size >= block_size);
@ -205,7 +207,8 @@ struct ThreadPoolDevice {
(divup<int>(block_count, numThreads()) * numThreads());
// Now try to increase block size up to max_block_size as long as it
// doesn't decrease parallel efficiency.
for (Index prev_block_count = block_count; prev_block_count > 1;) {
for (Index prev_block_count = block_count;
max_efficiency < 1.0 && prev_block_count > 1;) {
// This is the next block size that divides size into a smaller number
// of blocks than the current block_size.
Index coarser_block_size = divup(n, prev_block_count - 1);