mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
Fix TensorContractionOp evaluators for GPU and SYCL
This commit is contained in:
parent
038b55464b
commit
c95aacab90
@ -505,9 +505,9 @@ template<typename Scalar, typename Index, typename LhsMapper,
|
||||
__global__ void
|
||||
#if defined(EIGEN_HIPCC)
|
||||
__launch_bounds__(512, 1)
|
||||
#else
|
||||
#else
|
||||
__launch_bounds__(512)
|
||||
#endif
|
||||
#endif
|
||||
EigenContractionKernel(const LhsMapper lhs, const RhsMapper rhs,
|
||||
const OutputMapper output,
|
||||
const Index m_size, const Index n_size, const Index k_size) {
|
||||
@ -698,7 +698,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh
|
||||
|
||||
#undef prefetch_lhs
|
||||
#undef add_vals
|
||||
|
||||
|
||||
Index horiz_base = threadIdx.y*4+base_n;
|
||||
if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
@ -1137,7 +1137,7 @@ template<typename Index, typename LhsMapper,
|
||||
__global__ void
|
||||
#if defined(EIGEN_HIPCC)
|
||||
__launch_bounds__(256, 1)
|
||||
#else
|
||||
#else
|
||||
__launch_bounds__(256)
|
||||
#endif
|
||||
EigenFloatContractionKernel(const LhsMapper lhs, const RhsMapper rhs,
|
||||
@ -1184,7 +1184,7 @@ template<typename Index, typename LhsMapper,
|
||||
__global__ void
|
||||
#if defined(EIGEN_HIPCC)
|
||||
__launch_bounds__(256, 1)
|
||||
#else
|
||||
#else
|
||||
__launch_bounds__(256)
|
||||
#endif
|
||||
EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs,
|
||||
|
@ -23,15 +23,18 @@
|
||||
namespace Eigen {
|
||||
|
||||
template <typename Index, typename LhsScalar, typename RhsScalar,bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered> struct LaunchSyclKernels;
|
||||
template<typename Indices, typename LeftArgType, typename RightArgType>
|
||||
struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, const Eigen::SyclDevice> :
|
||||
public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, const Eigen::SyclDevice> > {
|
||||
template<typename Indices, typename LeftArgType, typename RightArgType, typename OutputKernelType>
|
||||
struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, const Eigen::SyclDevice> :
|
||||
public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, const Eigen::SyclDevice> > {
|
||||
|
||||
static_assert(std::is_same<OutputKernelType, const NoOpOutputKernel>::value,
|
||||
"SYCL tensor contraction does not support output kernels.");
|
||||
|
||||
typedef const Eigen::SyclDevice Device;
|
||||
|
||||
typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self;
|
||||
typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> Self;
|
||||
typedef TensorContractionEvaluatorBase<Self> Base;
|
||||
typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType;
|
||||
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
Loading…
Reference in New Issue
Block a user