This commit is contained in:
Rasmus Munk Larsen 2019-03-06 11:54:30 -08:00
commit 3c3f639fe2
2 changed files with 6 additions and 1 deletions

View File

@ -317,6 +317,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
// GPU: the evaluation of the expression is offloaded to a GPU.
#if defined(EIGEN_USE_GPU)
#if defined(EIGEN_GPUCC)
template <typename Expression, bool Vectorizable, bool Tileable>
class TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable> {
@ -326,7 +327,6 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable> {
};
#if defined(EIGEN_GPUCC)
template <typename Evaluator, typename StorageIndex, bool Vectorizable>
struct EigenMetaKernelEval {
static __device__ EIGEN_ALWAYS_INLINE

View File

@ -56,6 +56,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
thread_data_[i].thread.reset(
env_.CreateThread([this, i]() { WorkerLoop(i); }));
}
global_steal_partition_ = EncodePartition(0, num_threads_);
#ifndef EIGEN_THREAD_LOCAL
// Wait for workers to initialize per_thread_map_. Otherwise we might race
// with them in Schedule or CurrentThreadId.
@ -237,6 +238,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
MaxSizeVector<ThreadData> thread_data_;
MaxSizeVector<MaxSizeVector<unsigned>> all_coprimes_;
MaxSizeVector<EventCount::Waiter> waiters_;
unsigned global_steal_partition_;
std::atomic<unsigned> blocked_;
std::atomic<bool> spinning_;
std::atomic<bool> done_;
@ -354,6 +356,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
Task LocalSteal() {
PerThread* pt = GetPerThread();
unsigned partition = GetStealPartition(pt->thread_id);
// If thread steal partition is the same as global partition, there is no
// need to go through the steal loop twice.
if (global_steal_partition_ == partition) return Task();
unsigned start, limit;
DecodePartition(partition, &start, &limit);
AssertBounds(start, limit);