Fix oversharding bug in parallelFor.

This commit is contained in:
Rasmus Munk Larsen 2018-06-20 17:51:48 -07:00
parent b8271bb368
commit 5418154a45

View File

@ -189,9 +189,11 @@ struct ThreadPoolDevice {
// of blocks to be evenly dividable across threads. // of blocks to be evenly dividable across threads.
double block_size_f = 1.0 / CostModel::taskSize(1, cost); double block_size_f = 1.0 / CostModel::taskSize(1, cost);
Index block_size = numext::mini(n, numext::maxi<Index>(1, block_size_f)); const Index max_oversharding_factor = 4;
const Index max_block_size = Index block_size = numext::mini(
numext::mini(n, numext::maxi<Index>(1, 2 * block_size_f)); n, numext::maxi<Index>(divup<Index>(n, max_oversharding_factor * numThreads()),
block_size_f));
const Index max_block_size = numext::mini(n, 2 * block_size);
if (block_align) { if (block_align) {
Index new_block_size = block_align(block_size); Index new_block_size = block_align(block_size);
eigen_assert(new_block_size >= block_size); eigen_assert(new_block_size >= block_size);
@ -205,7 +207,8 @@ struct ThreadPoolDevice {
(divup<int>(block_count, numThreads()) * numThreads()); (divup<int>(block_count, numThreads()) * numThreads());
// Now try to increase block size up to max_block_size as long as it // Now try to increase block size up to max_block_size as long as it
// doesn't decrease parallel efficiency. // doesn't decrease parallel efficiency.
for (Index prev_block_count = block_count; prev_block_count > 1;) { for (Index prev_block_count = block_count;
max_efficiency < 1.0 && prev_block_count > 1;) {
// This is the next block size that divides size into a smaller number // This is the next block size that divides size into a smaller number
// of blocks than the current block_size. // of blocks than the current block_size.
Index coarser_block_size = divup(n, prev_block_count - 1); Index coarser_block_size = divup(n, prev_block_count - 1);