mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
Fix performance regressions due to https://bitbucket.org/eigen/eigen/pull-requests/662.
The change caused the device struct to be copied for each expression evaluation, and caused, e.g., a 10% regression in the TensorFlow multinomial op on GPU: Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------------- BM_Multinomial_gpu_1_100000_4 128173 231326 2922 1.610G items/s VS Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------------- BM_Multinomial_gpu_1_100000_4 146683 246914 2719 1.509G items/s
This commit is contained in:
parent
f22b7283a3
commit
e2999d4c38
@ -164,7 +164,7 @@ struct TensorEvaluator
|
||||
protected:
|
||||
EvaluatorPointerType m_data;
|
||||
Dimensions m_dims;
|
||||
const Device m_device;
|
||||
const Device EIGEN_DEVICE_REF m_device;
|
||||
};
|
||||
|
||||
namespace {
|
||||
@ -302,7 +302,7 @@ struct TensorEvaluator<const Derived, Device>
|
||||
protected:
|
||||
EvaluatorPointerType m_data;
|
||||
Dimensions m_dims;
|
||||
const Device m_device;
|
||||
const Device EIGEN_DEVICE_REF m_device;
|
||||
};
|
||||
|
||||
|
||||
@ -480,7 +480,7 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
||||
|
||||
|
||||
private:
|
||||
const Device m_device;
|
||||
const Device EIGEN_DEVICE_REF m_device;
|
||||
const UnaryOp m_functor;
|
||||
TensorEvaluator<ArgType, Device> m_argImpl;
|
||||
};
|
||||
@ -603,7 +603,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
}
|
||||
#endif
|
||||
private:
|
||||
const Device m_device;
|
||||
const Device EIGEN_DEVICE_REF m_device;
|
||||
const BinaryOp m_functor;
|
||||
TensorEvaluator<LeftArgType, Device> m_leftImpl;
|
||||
TensorEvaluator<RightArgType, Device> m_rightImpl;
|
||||
|
@ -182,7 +182,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
private:
|
||||
TensorEvaluator<ArgType, Device> m_impl;
|
||||
const ArgType m_op;
|
||||
const Device m_device;
|
||||
const Device EIGEN_DEVICE_REF m_device;
|
||||
EvaluatorPointerType m_buffer;
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user