Fix broken tensor executor test, allow tensor packets of size 1.

This commit is contained in:
Antonio Sánchez 2022-03-07 20:30:37 +00:00 committed by Rasmus Munk Larsen
parent 28c7c1a629
commit 008ff3483a
13 changed files with 28 additions and 56 deletions

View File

@ -1847,8 +1847,3 @@ product of 2 1d tensors (through contractions) returns a 0d tensor.
* The IndexList class requires a cxx11 compliant compiler. You can use an
array of indices instead if you don't have access to a modern compiler.
* On GPUs only floating point values are properly tested and optimized for.
* Complex and integer values are known to be broken on GPUs. If you try to use
them you'll most likely end up triggering a static assertion failure such as
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)

View File

@ -370,7 +370,6 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByNByOne
(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
@ -416,7 +415,6 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
// Concatenates m_broadcast[dim] copies,
// [v0, ..., vN, v0, ..., vN, ... ]
// with dim == NumDims - 1 for col-major, dim == 0 for row-major.
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
// Size of flattened tensor.
@ -445,7 +443,6 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
// Interleaves m_broadcast[dim] copies,
// [v0, v0, ..., v1, v1, ..., vN, vN, ... ]
// with dim == 0 for col-major, dim == NumDims - 1 for row-major.
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + PacketSize-1 < dimensions().TotalSize());
const Index M = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ?
@ -476,7 +473,6 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
const Index originalIndex = index;
@ -534,7 +530,6 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
const Index originalIndex = index;

View File

@ -219,7 +219,6 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
if (isInnerChipping()) {
@ -447,8 +446,6 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketReturnType& x)
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
if (this->isInnerChipping()) {
// m_stride is equal to 1, so let's avoid the integer division.
eigen_assert(this->m_stride == 1);

View File

@ -155,7 +155,6 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
const int packetSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];

View File

@ -449,7 +449,6 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) {

View File

@ -551,7 +551,6 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
const Index initialIndex = index;
@ -609,7 +608,6 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
const Index initialIndex = index;

View File

@ -197,7 +197,6 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0;

View File

@ -837,7 +837,6 @@ static const bool RunningOnGPU = false;
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions())));
if (RunningOnGPU && m_result) {

View File

@ -215,7 +215,6 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
// TODO(ndjaitly): write a better packing routine that uses
@ -448,7 +447,6 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketReturnType& x) {
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
// This code is pilfered from TensorMorphing.h

View File

@ -227,8 +227,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*this, index);
}
@ -399,8 +398,6 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
template <int StoreMode> EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketReturnType& x)
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
EIGEN_UNROLL_LOOP

View File

@ -245,8 +245,6 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];

View File

@ -421,7 +421,6 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 ||

View File

@ -612,43 +612,42 @@ static void test_async_execute_binary_expr(Device d)
}
}
#ifdef EIGEN_DONT_VECTORIZE
#define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL
#else
#define VECTORIZABLE(VAL) VAL
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE 0
#endif
#define VECTORIZABLE(T, VAL) !EIGEN_DONT_VECTORIZE && Eigen::internal::packet_traits<T>::Vectorizable && VAL
#define CALL_SUBTEST_PART(PART) \
CALL_SUBTEST_##PART
#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(tp_device)))
// NOTE: Currently only ThreadPoolDevice supports async expression evaluation.
#define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(tp_device)))
EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
Eigen::DefaultDevice default_device;