diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index 9353f41e2..43a274651 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -335,8 +335,12 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { PerThread* pt = GetPerThread(); const size_t size = limit - start; unsigned r = Rand(&pt->rand); - unsigned victim = r % size; - unsigned inc = all_coprimes_[size - 1][r % all_coprimes_[size - 1].size()]; + // Reduce r into [0, size) range, this utilizes trick from + // https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ + eigen_plain_assert(all_coprimes_[size - 1].size() < (1<<30)); + unsigned victim = ((uint64_t)r * (uint64_t)size) >> 32; + unsigned index = ((uint64_t) all_coprimes_[size - 1].size() * (uint64_t)r) >> 32; + unsigned inc = all_coprimes_[size - 1][index]; for (unsigned i = 0; i < size; i++) { eigen_plain_assert(start + victim < limit);