mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-27 07:29:52 +08:00
Merge branch 'nan_prop' of https://gitlab.com/rmlarsen1/eigen into nan_prop
This commit is contained in:
commit
e67672024d
@ -454,6 +454,7 @@ template<typename Derived> class DenseBase
|
|||||||
template<int NaNPropagation>
|
template<int NaNPropagation>
|
||||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
|
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
|
||||||
|
|
||||||
|
|
||||||
// By default, the fastest version with undefined NaN propagation semantics is
|
// By default, the fastest version with undefined NaN propagation semantics is
|
||||||
// used.
|
// used.
|
||||||
// TODO(rmlarsen): Replace with default template argument when we move to
|
// TODO(rmlarsen): Replace with default template argument when we move to
|
||||||
@ -465,8 +466,6 @@ template<typename Derived> class DenseBase
|
|||||||
return maxCoeff<PropagateFast>();
|
return maxCoeff<PropagateFast>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||||
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
|
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
|
||||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||||
|
@ -12,12 +12,21 @@
|
|||||||
|
|
||||||
#include "MatrixProductCommon.h"
|
#include "MatrixProductCommon.h"
|
||||||
|
|
||||||
#if __GNUC__ > 10 || __clang_major__ > 11 || \
|
#if EIGEN_COMP_LLVM
|
||||||
(__GNUC__ == 10 && (__GNUC_MINOR__ > 2 || \
|
#if !defined(EIGEN_ALTIVEC_DISABLE_MMA) && !defined(EIGEN_ALTIVEC_MMA_ONLY)
|
||||||
(__GNUC_MINOR__ == 2 && \
|
#ifdef __MMA__
|
||||||
__GNUC_PATCHLEVEL__ >= 1)))
|
#define EIGEN_ALTIVEC_MMA_ONLY
|
||||||
|
#else
|
||||||
|
#define EIGEN_ALTIVEC_DISABLE_MMA
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __has_builtin
|
||||||
|
#if __has_builtin(__builtin_mma_assemble_acc)
|
||||||
#define ALTIVEC_MMA_SUPPORT
|
#define ALTIVEC_MMA_SUPPORT
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
|
#if defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
|
||||||
#include "MatrixProductMMA.h"
|
#include "MatrixProductMMA.h"
|
||||||
|
@ -12,9 +12,11 @@
|
|||||||
|
|
||||||
#pragma GCC target("cpu=power10")
|
#pragma GCC target("cpu=power10")
|
||||||
|
|
||||||
|
#ifdef __has_builtin
|
||||||
#if !__has_builtin(__builtin_vsx_assemble_pair)
|
#if !__has_builtin(__builtin_vsx_assemble_pair)
|
||||||
#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
|
#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
|
||||||
|
@ -409,6 +409,9 @@ EIGEN_DECLARE_TEST(gpu_basic)
|
|||||||
// (aka 'ArrayBase<Eigen::Replicate<Eigen::Array<float, 4, 1, 0, 4, 1>, -1, -1> >') has protected default constructor
|
// (aka 'ArrayBase<Eigen::Replicate<Eigen::Array<float, 4, 1, 0, 4, 1>, -1, -1> >') has protected default constructor
|
||||||
CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array4f>(), nthreads, in, out) );
|
CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array4f>(), nthreads, in, out) );
|
||||||
CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array33f>(), nthreads, in, out) );
|
CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array33f>(), nthreads, in, out) );
|
||||||
|
|
||||||
|
// HIP does not support new/delete on device.
|
||||||
|
CALL_SUBTEST( run_and_compare_to_gpu(alloc_new_delete<Vector3f>(), nthreads, in, out) );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
CALL_SUBTEST( run_and_compare_to_gpu(redux<Array4f>(), nthreads, in, out) );
|
CALL_SUBTEST( run_and_compare_to_gpu(redux<Array4f>(), nthreads, in, out) );
|
||||||
@ -438,5 +441,4 @@ EIGEN_DECLARE_TEST(gpu_basic)
|
|||||||
typedef Matrix<float,6,6> Matrix6f;
|
typedef Matrix<float,6,6> Matrix6f;
|
||||||
CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues<Matrix6f>(), nthreads, in, out) );
|
CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues<Matrix6f>(), nthreads, in, out) );
|
||||||
#endif
|
#endif
|
||||||
CALL_SUBTEST( run_and_compare_to_gpu(alloc_new_delete<Vector3f>(), nthreads, in, out) );
|
|
||||||
}
|
}
|
||||||
|
@ -451,6 +451,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
}
|
}
|
||||||
|
|
||||||
m_is_identity = true;
|
m_is_identity = true;
|
||||||
|
bool degenerate = false;
|
||||||
for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
|
for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
|
||||||
eigen_assert(m_impl.dimensions()[i] >=
|
eigen_assert(m_impl.dimensions()[i] >=
|
||||||
op.sizes()[i] + op.startIndices()[i]);
|
op.sizes()[i] + op.startIndices()[i]);
|
||||||
@ -458,6 +459,9 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
op.startIndices()[i] != 0) {
|
op.startIndices()[i] != 0) {
|
||||||
m_is_identity = false;
|
m_is_identity = false;
|
||||||
}
|
}
|
||||||
|
if (op.sizes()[i] == 0) { // we have an empty size
|
||||||
|
degenerate = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// No strides for scalars.
|
// No strides for scalars.
|
||||||
@ -475,8 +479,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
m_outputStrides[0] = 1;
|
m_outputStrides[0] = 1;
|
||||||
for (int i = 1; i < NumDims; ++i) {
|
for (int i = 1; i < NumDims; ++i) {
|
||||||
m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
|
m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
|
||||||
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
|
// NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
|
||||||
}
|
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); }
|
||||||
} else {
|
} else {
|
||||||
m_inputStrides[NumDims-1] = 1;
|
m_inputStrides[NumDims-1] = 1;
|
||||||
for (int i = NumDims - 2; i >= 0; --i) {
|
for (int i = NumDims - 2; i >= 0; --i) {
|
||||||
@ -487,8 +491,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
m_outputStrides[NumDims-1] = 1;
|
m_outputStrides[NumDims-1] = 1;
|
||||||
for (int i = NumDims - 2; i >= 0; --i) {
|
for (int i = NumDims - 2; i >= 0; --i) {
|
||||||
m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
|
m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
|
||||||
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
|
// NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
|
||||||
}
|
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -479,6 +479,66 @@ static void test_composition()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T, int DataLayout>
|
||||||
|
static void test_empty_slice()
|
||||||
|
{
|
||||||
|
Tensor<T, 3, DataLayout> tensor(2,3,5);
|
||||||
|
tensor.setRandom();
|
||||||
|
Tensor<T, 3, DataLayout> copy = tensor;
|
||||||
|
|
||||||
|
// empty size in first dimension
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> indices1(1,2,3);
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> sizes1(0,1,2);
|
||||||
|
Tensor<T, 3, DataLayout> slice1(0,1,2);
|
||||||
|
slice1.setRandom();
|
||||||
|
tensor.slice(indices1, sizes1) = slice1;
|
||||||
|
|
||||||
|
// empty size in second dimension
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> indices2(1,2,3);
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> sizes2(1,0,2);
|
||||||
|
Tensor<T, 3, DataLayout> slice2(1,0,2);
|
||||||
|
slice2.setRandom();
|
||||||
|
tensor.slice(indices2, sizes2) = slice2;
|
||||||
|
|
||||||
|
// empty size in third dimension
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> indices3(1,2,3);
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> sizes3(1,1,0);
|
||||||
|
Tensor<T, 3, DataLayout> slice3(1,1,0);
|
||||||
|
slice3.setRandom();
|
||||||
|
tensor.slice(indices3, sizes3) = slice3;
|
||||||
|
|
||||||
|
// empty size in first and second dimension
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> indices4(1,2,3);
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> sizes4(0,0,2);
|
||||||
|
Tensor<T, 3, DataLayout> slice4(0,0,2);
|
||||||
|
slice4.setRandom();
|
||||||
|
tensor.slice(indices4, sizes4) = slice4;
|
||||||
|
|
||||||
|
// empty size in second and third dimension
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> indices5(1,2,3);
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> sizes5(1,0,0);
|
||||||
|
Tensor<T, 3, DataLayout> slice5(1,0,0);
|
||||||
|
slice5.setRandom();
|
||||||
|
tensor.slice(indices5, sizes5) = slice5;
|
||||||
|
|
||||||
|
// empty size in all dimensions
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> indices6(1,2,3);
|
||||||
|
Eigen::DSizes<ptrdiff_t, 3> sizes6(0,0,0);
|
||||||
|
Tensor<T, 3, DataLayout> slice6(0,0,0);
|
||||||
|
slice6.setRandom();
|
||||||
|
tensor.slice(indices6, sizes6) = slice6;
|
||||||
|
|
||||||
|
// none of these operations should change the tensor's components
|
||||||
|
// because all of the rvalue slices have at least one zero dimension
|
||||||
|
for (int i = 0; i < 2; ++i) {
|
||||||
|
for (int j = 0; j < 3; ++j) {
|
||||||
|
for (int k = 0; k < 5; ++k) {
|
||||||
|
VERIFY_IS_EQUAL(tensor(i,j,k), copy(i,j,k));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#define CALL_SUBTEST_PART(PART) \
|
#define CALL_SUBTEST_PART(PART) \
|
||||||
CALL_SUBTEST_##PART
|
CALL_SUBTEST_##PART
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user