mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
Add tiled evaluation for TensorForcedEvalOp
This commit is contained in:
parent
694084ecbd
commit
b95941e5c2
@ -90,14 +90,21 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = true,
|
IsAligned = true,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = false,
|
BlockAccess = internal::is_arithmetic<CoeffReturnType>::value,
|
||||||
PreferBlockAccess = false,
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = true
|
RawAccess = true
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef typename internal::TensorBlock<
|
||||||
|
CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout>
|
||||||
|
TensorBlock;
|
||||||
|
typedef typename internal::TensorBlockReader<
|
||||||
|
CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout>
|
||||||
|
TensorBlockReader;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
||||||
/// op_ is used for sycl
|
/// op_ is used for sycl
|
||||||
: m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL)
|
: m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL)
|
||||||
@ -139,6 +146,14 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index);
|
return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||||
|
std::vector<internal::TensorOpResourceRequirements>*) const {}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const {
|
||||||
|
assert(m_buffer != NULL);
|
||||||
|
TensorBlockReader::Run(block, m_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
||||||
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
@ -452,6 +452,38 @@ static void test_execute_slice_lvalue(Device d)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T, int NumDims, typename Device, bool Vectorizable,
|
||||||
|
bool Tileable, int Layout>
|
||||||
|
static void test_execute_broadcasting_of_forced_eval(Device d)
|
||||||
|
{
|
||||||
|
static constexpr int Options = 0 | Layout;
|
||||||
|
|
||||||
|
auto dims = RandomDims<NumDims>(1, 10);
|
||||||
|
Tensor<T, NumDims, Options, Index> src(dims);
|
||||||
|
src.setRandom();
|
||||||
|
|
||||||
|
const auto broadcasts = RandomDims<NumDims>(1, 7);
|
||||||
|
const auto expr = src.square().eval().broadcast(broadcasts);
|
||||||
|
|
||||||
|
// We assume that broadcasting on a default device is tested and correct, so
|
||||||
|
// we can rely on it to verify correctness of tensor executor and tiling.
|
||||||
|
Tensor<T, NumDims, Options, Index> golden;
|
||||||
|
golden = expr;
|
||||||
|
|
||||||
|
// Now do the broadcasting using configured tensor executor.
|
||||||
|
Tensor<T, NumDims, Options, Index> dst(golden.dimensions());
|
||||||
|
|
||||||
|
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
|
||||||
|
using Executor =
|
||||||
|
internal::TensorExecutor<const Assign, Device, Vectorizable, Tileable>;
|
||||||
|
|
||||||
|
Executor::run(Assign(dst, expr), d);
|
||||||
|
|
||||||
|
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
|
||||||
|
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#define CALL_SUBTEST_PART(PART) \
|
#define CALL_SUBTEST_PART(PART) \
|
||||||
CALL_SUBTEST_##PART
|
CALL_SUBTEST_##PART
|
||||||
|
|
||||||
@ -528,8 +560,13 @@ EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
|
|||||||
CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 4);
|
CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 4);
|
||||||
CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 5);
|
CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 5);
|
||||||
|
|
||||||
|
CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 2);
|
||||||
|
CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 3);
|
||||||
|
CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 4);
|
||||||
|
CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 5);
|
||||||
|
|
||||||
// Force CMake to split this test.
|
// Force CMake to split this test.
|
||||||
// EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11
|
// EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11;12
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef CALL_SUBTEST_COMBINATIONS
|
#undef CALL_SUBTEST_COMBINATIONS
|
||||||
|
Loading…
Reference in New Issue
Block a user