From 573b377110d488886bfc6b319c140a3375d5d91a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 27 Feb 2015 08:46:04 -0800 Subject: [PATCH] Added support for vectorized type casting of tensors --- Eigen/Core | 2 + Eigen/src/Core/GenericPacketMath.h | 22 ++ Eigen/src/Core/arch/AVX/TypeCasting.h | 51 +++++ Eigen/src/Core/arch/SSE/TypeCasting.h | 77 +++++++ unsupported/Eigen/CXX11/Tensor | 1 + .../Eigen/CXX11/src/Tensor/TensorBase.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorConversion.h | 202 ++++++++++++++++++ .../src/Tensor/TensorForwardDeclarations.h | 1 + unsupported/test/cxx11_tensor_casts.cpp | 56 ++++- 9 files changed, 413 insertions(+), 3 deletions(-) create mode 100644 Eigen/src/Core/arch/AVX/TypeCasting.h create mode 100644 Eigen/src/Core/arch/SSE/TypeCasting.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h diff --git a/Eigen/Core b/Eigen/Core index 0b8eaa61c..b7205bda5 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -297,10 +297,12 @@ using std::ptrdiff_t; #include "src/Core/arch/AVX/PacketMath.h" #include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX/Complex.h" + #include "src/Core/arch/AVX/TypeCasting.h" #elif defined EIGEN_VECTORIZE_SSE #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/Complex.h" + #include "src/Core/arch/SSE/TypeCasting.h" #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) #include "src/Core/arch/AltiVec/PacketMath.h" #include "src/Core/arch/AltiVec/Complex.h" diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 721280b2c..678938c6b 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -98,6 +98,28 @@ template struct packet_traits : default_packet_traits template struct packet_traits : packet_traits { }; +template struct type_casting_traits { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + +/** \internal \returns static_cast(a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a) { + return static_cast(a); +} +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/) { + return static_cast(a); +} + + /** \internal \returns a + b (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h new file mode 100644 index 000000000..83bfdc604 --- /dev/null +++ b/Eigen/src/Core/arch/AVX/TypeCasting.h @@ -0,0 +1,51 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TYPE_CASTING_AVX_H +#define EIGEN_TYPE_CASTING_AVX_H + +namespace Eigen { + +namespace internal { + +// For now we use SSE to handle integers, so we can't use AVX instructions to cast +// from int to float +template <> +struct type_casting_traits { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + + +template<> EIGEN_STRONG_INLINE Packet8i pcast(const Packet8f& a) { + return _mm256_cvtps_epi32(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet8i& a) { + return _mm256_cvtepi32_ps(a); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_AVX_H diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h new file mode 100644 index 000000000..454f4d38d --- /dev/null +++ b/Eigen/src/Core/arch/SSE/TypeCasting.h @@ -0,0 +1,77 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TYPE_CASTING_SSE_H +#define EIGEN_TYPE_CASTING_SSE_H + +namespace Eigen { + +namespace internal { + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet4i pcast(const Packet4f& a) { + return _mm_cvtps_epi32(a); +} + + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i& a) { + return _mm_cvtepi32_ps(a); +} + + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 2, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet2d& a, const Packet2d& b) { + return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); +} + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 2 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet2d pcast(const Packet4f& a) { + // Simply discard the second half of the input + return _mm_cvtps_pd(a); +} + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_SSE_H diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 34107ae71..7bd8cc9d4 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -65,6 +65,7 @@ #include "unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 13709b504..e22dd4de0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -164,9 +164,9 @@ class TensorBase } template EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorConversionOp cast() const { - return unaryExpr(internal::scalar_cast_op()); + return TensorConversionOp(derived()); } // Generic binary operation support. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h new file mode 100644 index 000000000..29f536cf9 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -0,0 +1,202 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H + +namespace Eigen { + +/** \class TensorConversionOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor conversion class. This class makes it possible to vectorize + * type casting operations when the number of scalars per packet in the source + * and the destination type differ + */ +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef TargetType Scalar; + typedef typename packet_traits::type Packet; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + enum { Flags = 0 }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorConversionOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorConversionOp type; +}; + +} // end namespace internal + + +template +struct PacketConverter { + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + return internal::pcast(m_impl.template packet(index)); + } + + private: + const TensorEvaluator& m_impl; +}; + + +template +struct PacketConverter { + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + + SrcPacket src1 = m_impl.template packet(index); + SrcPacket src2 = m_impl.template packet(index + SrcPacketSize); + TgtPacket result = internal::pcast(src1, src2); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; + + +template +struct PacketConverter { + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + if (index + SrcPacketSize < m_maxIndex) { + return internal::pcast(m_impl.template packet(index)); + } else { + const int TgtPacketSize = internal::unpacket_traits::size; + EIGEN_ALIGN_DEFAULT typename internal::unpacket_traits::type values[TgtPacketSize]; + for (int i = 0; i < TgtPacketSize; ++i) { + values[i] = m_impl.coeff(index+i); + } + TgtPacket rslt = internal::pload(values); + return rslt; + } + } + + private: + const TensorEvaluator& m_impl; + const typename TensorEvaluator::Index m_maxIndex; +}; + +template +class TensorConversionOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::Packet Packet; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::nested::type Nested; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename NumTraits::Real RealScalar; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) + : m_xpr(xpr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + + + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorConversionOp XprType; + typedef typename XprType::Index Index; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef TargetType Scalar; + typedef TargetType CoeffReturnType; + typedef typename internal::remove_all::Scalar>::type SrcType; + typedef typename internal::traits::Packet PacketReturnType; + typedef typename internal::packet_traits::type PacketSourceType; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess && internal::type_casting_traits::VectorizedCast, + Layout = TensorEvaluator::Layout, + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) + { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() + { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + internal::scalar_cast_op converter; + return converter(m_impl.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int SrcCoeffRatio = internal::type_casting_traits::SrcCoeffRatio; + const int TgtCoeffRatio = internal::type_casting_traits::TgtCoeffRatio; + PacketConverter, PacketSourceType, PacketReturnType, + SrcCoeffRatio, TgtCoeffRatio> converter(m_impl); + return converter.template packet(index); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + TensorEvaluator m_impl; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 7bec2b10a..3607fe3fe 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -25,6 +25,7 @@ template class T template class TensorReductionOp; template class TensorConcatenationOp; template class TensorContractionOp; +template class TensorConversionOp; template class TensorConvolutionOp; template class TensorPatchOp; template class TensorImagePatchOp; diff --git a/unsupported/test/cxx11_tensor_casts.cpp b/unsupported/test/cxx11_tensor_casts.cpp index 4f7ff7067..f53679d7b 100644 --- a/unsupported/test/cxx11_tensor_casts.cpp +++ b/unsupported/test/cxx11_tensor_casts.cpp @@ -17,7 +17,7 @@ using Eigen::array; static void test_simple_cast() { Tensor ftensor(20,30); - ftensor.setRandom(); + ftensor = ftensor.random() * 100.f; Tensor chartensor(20,30); chartensor.setRandom(); Tensor, 2> cplextensor(20,30); @@ -35,7 +35,61 @@ static void test_simple_cast() } +static void test_vectorized_cast() +{ + Tensor itensor(20,30); + itensor = itensor.random() / 1000; + Tensor ftensor(20,30); + ftensor.setRandom(); + Tensor dtensor(20,30); + dtensor.setRandom(); + + ftensor = itensor.cast(); + dtensor = itensor.cast(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_EQUAL(itensor(i,j), static_cast(ftensor(i,j))); + VERIFY_IS_EQUAL(dtensor(i,j), static_cast(ftensor(i,j))); + } + } +} + + +static void test_big_to_small_type_cast() +{ + Tensor dtensor(20, 30); + dtensor.setRandom(); + Tensor ftensor(20, 30); + ftensor = dtensor.cast(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_APPROX(dtensor(i,j), static_cast(ftensor(i,j))); + } + } +} + + +static void test_small_to_big_type_cast() +{ + Tensor ftensor(20, 30); + ftensor.setRandom(); + Tensor dtensor(20, 30); + dtensor = ftensor.cast(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_APPROX(dtensor(i,j), static_cast(ftensor(i,j))); + } + } +} + + void test_cxx11_tensor_casts() { CALL_SUBTEST(test_simple_cast()); + CALL_SUBTEST(test_vectorized_cast()); + CALL_SUBTEST(test_big_to_small_type_cast()); + CALL_SUBTEST(test_small_to_big_type_cast()); }