mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-15 07:10:37 +08:00
Fix performance regressions due to https://bitbucket.org/eigen/eigen/pull-requests/662.
The change caused the device struct to be copied for each expression evaluation, and caused, e.g., a 10% regression in the TensorFlow multinomial op on GPU: Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------------- BM_Multinomial_gpu_1_100000_4 128173 231326 2922 1.610G items/s VS Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------------- BM_Multinomial_gpu_1_100000_4 146683 246914 2719 1.509G items/s
This commit is contained in:
parent
f22b7283a3
commit
e2999d4c38
@ -164,7 +164,7 @@ struct TensorEvaluator
|
|||||||
protected:
|
protected:
|
||||||
EvaluatorPointerType m_data;
|
EvaluatorPointerType m_data;
|
||||||
Dimensions m_dims;
|
Dimensions m_dims;
|
||||||
const Device m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@ -302,7 +302,7 @@ struct TensorEvaluator<const Derived, Device>
|
|||||||
protected:
|
protected:
|
||||||
EvaluatorPointerType m_data;
|
EvaluatorPointerType m_data;
|
||||||
Dimensions m_dims;
|
Dimensions m_dims;
|
||||||
const Device m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -480,7 +480,7 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
|||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Device m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
const UnaryOp m_functor;
|
const UnaryOp m_functor;
|
||||||
TensorEvaluator<ArgType, Device> m_argImpl;
|
TensorEvaluator<ArgType, Device> m_argImpl;
|
||||||
};
|
};
|
||||||
@ -603,7 +603,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
private:
|
private:
|
||||||
const Device m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
const BinaryOp m_functor;
|
const BinaryOp m_functor;
|
||||||
TensorEvaluator<LeftArgType, Device> m_leftImpl;
|
TensorEvaluator<LeftArgType, Device> m_leftImpl;
|
||||||
TensorEvaluator<RightArgType, Device> m_rightImpl;
|
TensorEvaluator<RightArgType, Device> m_rightImpl;
|
||||||
|
@ -182,7 +182,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
|||||||
private:
|
private:
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
const ArgType m_op;
|
const ArgType m_op;
|
||||||
const Device m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
EvaluatorPointerType m_buffer;
|
EvaluatorPointerType m_buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user