mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-12 19:20:36 +08:00
Avoid a division in NonBlockingThreadPool::Steal.
Looking at profiles we spend ~10-20% of Steal on simply computing random % size. We can reduce random 32-bit int into [0, size) range with a single multiplication and shift. This transformation is described in https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
This commit is contained in:
parent
7769600245
commit
eb6cc29583
@ -335,8 +335,12 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
||||
PerThread* pt = GetPerThread();
|
||||
const size_t size = limit - start;
|
||||
unsigned r = Rand(&pt->rand);
|
||||
unsigned victim = r % size;
|
||||
unsigned inc = all_coprimes_[size - 1][r % all_coprimes_[size - 1].size()];
|
||||
// Reduce r into [0, size) range, this utilizes trick from
|
||||
// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
|
||||
eigen_plain_assert(all_coprimes_[size - 1].size() < (1<<30));
|
||||
unsigned victim = ((uint64_t)r * (uint64_t)size) >> 32;
|
||||
unsigned index = ((uint64_t) all_coprimes_[size - 1].size() * (uint64_t)r) >> 32;
|
||||
unsigned inc = all_coprimes_[size - 1][index];
|
||||
|
||||
for (unsigned i = 0; i < size; i++) {
|
||||
eigen_plain_assert(start + victim < limit);
|
||||
|
Loading…
x
Reference in New Issue
Block a user