mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-27 07:29:52 +08:00
Reduce dispatch overhead in parallelFor by only calling thread_pool.Schedule() for one of the two recursive calls in handleRange. This avoids going through the scedule path to push both recursive calls onto another thread-queue in the binary tree, but instead executes one of them on the main thread. At the leaf level this will still activate a full complement of threads, but will save up to 50% of the overhead in Schedule (random number generation, insertion in queue which includes signaling via atomics).
This commit is contained in:
parent
0ee92aa38e
commit
32df1b1046
@ -256,7 +256,7 @@ struct ThreadPoolDevice {
|
|||||||
// Split into halves and submit to the pool.
|
// Split into halves and submit to the pool.
|
||||||
Index mid = first + divup((last - first) / 2, block_size) * block_size;
|
Index mid = first + divup((last - first) / 2, block_size) * block_size;
|
||||||
pool_->Schedule([=, &handleRange]() { handleRange(mid, last); });
|
pool_->Schedule([=, &handleRange]() { handleRange(mid, last); });
|
||||||
pool_->Schedule([=, &handleRange]() { handleRange(first, mid); });
|
handleRange(first, mid);
|
||||||
};
|
};
|
||||||
handleRange(0, n);
|
handleRange(0, n);
|
||||||
barrier.Wait();
|
barrier.Wait();
|
||||||
|
Loading…
Reference in New Issue
Block a user