mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-30 17:40:05 +08:00
Merge.
This commit is contained in:
commit
3c3f639fe2
@ -317,6 +317,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
|
||||
|
||||
// GPU: the evaluation of the expression is offloaded to a GPU.
|
||||
#if defined(EIGEN_USE_GPU)
|
||||
#if defined(EIGEN_GPUCC)
|
||||
|
||||
template <typename Expression, bool Vectorizable, bool Tileable>
|
||||
class TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable> {
|
||||
@ -326,7 +327,6 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable> {
|
||||
};
|
||||
|
||||
|
||||
#if defined(EIGEN_GPUCC)
|
||||
template <typename Evaluator, typename StorageIndex, bool Vectorizable>
|
||||
struct EigenMetaKernelEval {
|
||||
static __device__ EIGEN_ALWAYS_INLINE
|
||||
|
@ -56,6 +56,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
||||
thread_data_[i].thread.reset(
|
||||
env_.CreateThread([this, i]() { WorkerLoop(i); }));
|
||||
}
|
||||
global_steal_partition_ = EncodePartition(0, num_threads_);
|
||||
#ifndef EIGEN_THREAD_LOCAL
|
||||
// Wait for workers to initialize per_thread_map_. Otherwise we might race
|
||||
// with them in Schedule or CurrentThreadId.
|
||||
@ -237,6 +238,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
||||
MaxSizeVector<ThreadData> thread_data_;
|
||||
MaxSizeVector<MaxSizeVector<unsigned>> all_coprimes_;
|
||||
MaxSizeVector<EventCount::Waiter> waiters_;
|
||||
unsigned global_steal_partition_;
|
||||
std::atomic<unsigned> blocked_;
|
||||
std::atomic<bool> spinning_;
|
||||
std::atomic<bool> done_;
|
||||
@ -354,6 +356,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
|
||||
Task LocalSteal() {
|
||||
PerThread* pt = GetPerThread();
|
||||
unsigned partition = GetStealPartition(pt->thread_id);
|
||||
// If thread steal partition is the same as global partition, there is no
|
||||
// need to go through the steal loop twice.
|
||||
if (global_steal_partition_ == partition) return Task();
|
||||
unsigned start, limit;
|
||||
DecodePartition(partition, &start, &limit);
|
||||
AssertBounds(start, limit);
|
||||
|
Loading…
Reference in New Issue
Block a user