Merge.

2025-01-30 17:40:05 +08:00 · 2019-03-06 11:54:30 -08:00 · 2019-03-06 11:54:30 -08:00 · 3c3f639fe2
commit 3c3f639fe2
parent f4ec8edea8 41cdc370d0
2 changed files with 6 additions and 1 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@ -317,6 +317,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr

 // GPU: the evaluation of the expression is offloaded to a GPU.
 #if defined(EIGEN_USE_GPU)
+#if defined(EIGEN_GPUCC)

 template <typename Expression, bool Vectorizable, bool Tileable>
 class TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable> {
@ -326,7 +327,6 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable> {
 };


-#if defined(EIGEN_GPUCC)
 template <typename Evaluator, typename StorageIndex, bool Vectorizable>
 struct EigenMetaKernelEval {
  static __device__ EIGEN_ALWAYS_INLINE
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@ -56,6 +56,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
      thread_data_[i].thread.reset(
          env_.CreateThread([this, i]() { WorkerLoop(i); }));
    }
+    global_steal_partition_ = EncodePartition(0, num_threads_);
 #ifndef EIGEN_THREAD_LOCAL
    // Wait for workers to initialize per_thread_map_. Otherwise we might race
    // with them in Schedule or CurrentThreadId.
@ -237,6 +238,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
  MaxSizeVector<ThreadData> thread_data_;
  MaxSizeVector<MaxSizeVector<unsigned>> all_coprimes_;
  MaxSizeVector<EventCount::Waiter> waiters_;
+  unsigned global_steal_partition_;
  std::atomic<unsigned> blocked_;
  std::atomic<bool> spinning_;
  std::atomic<bool> done_;
@ -354,6 +356,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
  Task LocalSteal() {
    PerThread* pt = GetPerThread();
    unsigned partition = GetStealPartition(pt->thread_id);
+    // If thread steal partition is the same as global partition, there is no
+    // need to go through the steal loop twice.
+    if (global_steal_partition_ == partition) return Task();
    unsigned start, limit;
    DecodePartition(partition, &start, &limit);
    AssertBounds(start, limit);