mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-18 14:34:17 +08:00
Hey, finally the copyCoeff stuff is not only used to implement swap anymore :)
Add an internal pseudo expression allowing to optimize operators like +=, *= using the copyCoeff stuff. This allows to easily enforce aligned load for the destination matrix everywhere.
This commit is contained in:
parent
e3d890bc5a
commit
eb8f450071
@ -167,6 +167,7 @@ struct Dense {};
|
||||
#include "src/Core/ReturnByValue.h"
|
||||
#include "src/Core/NoAlias.h"
|
||||
#include "src/Core/Matrix.h"
|
||||
#include "src/Core/SelfCwiseBinaryOp.h"
|
||||
#include "src/Core/CwiseBinaryOp.h"
|
||||
#include "src/Core/CwiseUnaryOp.h"
|
||||
#include "src/Core/CwiseNullaryOp.h"
|
||||
|
@ -178,7 +178,9 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
|
||||
{
|
||||
return *this = *this - other;
|
||||
SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived> tmp(derived());
|
||||
tmp = other;
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** replaces \c *this by \c *this + \a other.
|
||||
@ -190,7 +192,9 @@ template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
return *this = *this + other;
|
||||
SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived> tmp(derived());
|
||||
tmp = other;
|
||||
return derived();
|
||||
}
|
||||
|
||||
#endif // EIGEN_CWISE_BINARY_OP_H
|
||||
|
@ -33,9 +33,17 @@ EIGEN_STRONG_INLINE const CwiseUnaryOp<ei_scalar_opposite_op<typename ei_traits<
|
||||
operator-() const { return derived(); }
|
||||
|
||||
EIGEN_STRONG_INLINE Derived& operator*=(const Scalar& other)
|
||||
{ return *this = *this * other; }
|
||||
{
|
||||
SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived> tmp(derived());
|
||||
tmp = PlainMatrixType::Constant(rows(),cols(),other);
|
||||
return derived();
|
||||
}
|
||||
EIGEN_STRONG_INLINE Derived& operator/=(const Scalar& other)
|
||||
{ return *this = *this / other; }
|
||||
{
|
||||
SelfCwiseBinaryOp<typename ei_meta_if<NumTraits<Scalar>::HasFloatingPoint,ei_scalar_product_op<Scalar>,ei_scalar_quotient_op<Scalar> >::ret, Derived> tmp(derived());
|
||||
tmp = PlainMatrixType::Constant(rows(),cols(), NumTraits<Scalar>::HasFloatingPoint ? Scalar(1)/other : other);
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns an expression of \c *this scaled by the scalar factor \a scalar */
|
||||
EIGEN_STRONG_INLINE const ScalarMultipleReturnType
|
||||
|
@ -34,6 +34,22 @@
|
||||
* of generic vectorized code.
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_DEBUG_ALIGNED_LOAD
|
||||
#define EIGEN_DEBUG_ALIGNED_LOAD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
#define EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_DEBUG_ALIGNED_STORE
|
||||
#define EIGEN_DEBUG_ALIGNED_STORE
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_DEBUG_UNALIGNED_STORE
|
||||
#define EIGEN_DEBUG_UNALIGNED_STORE
|
||||
#endif
|
||||
|
||||
struct ei_default_packet_traits
|
||||
{
|
||||
enum {
|
||||
@ -44,13 +60,13 @@ struct ei_default_packet_traits
|
||||
HasAbs = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
|
||||
|
||||
HasDiv = 0,
|
||||
HasSqrt = 0,
|
||||
HasExp = 0,
|
||||
HasLog = 0,
|
||||
HasPow = 0,
|
||||
|
||||
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasTan = 0,
|
||||
@ -128,7 +144,7 @@ ei_pxor(const Packet& a, const Packet& b) { return a ^ b; }
|
||||
/** \internal \returns the bitwise andnot of \a a and \a b */
|
||||
template<typename Packet> inline Packet
|
||||
ei_pandnot(const Packet& a, const Packet& b) { return a & (!b); }
|
||||
|
||||
|
||||
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
|
||||
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
|
||||
ei_pload(const Scalar* from) { return *from; }
|
||||
|
@ -34,13 +34,13 @@
|
||||
* and type \c AlignedDerivedType in their respective ei_traits<> specialization structure.
|
||||
* The value of \c PacketAccess can be either \b AsRequested, or set to \b EnforceAlignedAccess which
|
||||
* enforces both aligned loads and stores.
|
||||
*
|
||||
* \c EnforceAlignedAccess is automatically set in expressions such as
|
||||
*
|
||||
* \c EnforceAlignedAccess is automatically set in expressions such as
|
||||
* \code A += B; \endcode where A is either a Block or a Map. Here,
|
||||
* this expression is transfomed into \code A = A_with_EnforceAlignedAccess + B; \endcode
|
||||
* avoiding unaligned loads from A. Indeed, since Eigen's packet evaluation mechanism
|
||||
* automatically align to the destination matrix, we know that loads to A will be aligned too.
|
||||
*
|
||||
*
|
||||
* The type \c AlignedDerivedType should correspond to the equivalent expression type
|
||||
* with \c PacketAccess set to \c EnforceAlignedAccess.
|
||||
*
|
||||
@ -197,32 +197,6 @@ template<typename Derived> class MapBase
|
||||
using Base::operator=;
|
||||
using Base::operator*=;
|
||||
|
||||
// FIXME it seems VS does not allow to do "using Base::operator+="
|
||||
// and to overload operator+= at the same time, therefore we have to
|
||||
// explicitly add these two overloads.
|
||||
// Maybe there exists a better solution though.
|
||||
template<typename ProductDerived, typename Lhs,typename Rhs>
|
||||
Derived& operator+=(const Flagged<ProductBase<ProductDerived,Lhs,Rhs>, 0, EvalBeforeAssigningBit>& other)
|
||||
{ return Base::operator+=(other); }
|
||||
|
||||
template<typename ProductDerived, typename Lhs,typename Rhs>
|
||||
Derived& operator-=(const Flagged<ProductBase<ProductDerived,Lhs,Rhs>, 0, EvalBeforeAssigningBit>& other)
|
||||
{ return Base::operator-=(other); }
|
||||
|
||||
template<typename OtherDerived>
|
||||
Derived& operator+=(const MatrixBase<OtherDerived>& other)
|
||||
{ return derived() = forceAligned() + other; }
|
||||
|
||||
template<typename OtherDerived>
|
||||
Derived& operator-=(const MatrixBase<OtherDerived>& other)
|
||||
{ return derived() = forceAligned() - other; }
|
||||
|
||||
Derived& operator*=(const Scalar& other)
|
||||
{ return derived() = forceAligned() * other; }
|
||||
|
||||
Derived& operator/=(const Scalar& other)
|
||||
{ return derived() = forceAligned() / other; }
|
||||
|
||||
protected:
|
||||
|
||||
void checkDataAlignment() const
|
||||
@ -230,7 +204,7 @@ template<typename Derived> class MapBase
|
||||
ei_assert( ((!(ei_traits<Derived>::Flags&AlignedBit))
|
||||
|| ((std::size_t(m_data)&0xf)==0)) && "data is not aligned");
|
||||
}
|
||||
|
||||
|
||||
const Scalar* EIGEN_RESTRICT m_data;
|
||||
const ei_int_if_dynamic<RowsAtCompileTime> m_rows;
|
||||
const ei_int_if_dynamic<ColsAtCompileTime> m_cols;
|
||||
|
113
Eigen/src/Core/SelfCwiseBinaryOp.h
Normal file
113
Eigen/src/Core/SelfCwiseBinaryOp.h
Normal file
@ -0,0 +1,113 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_SELFCWISEBINARYOP_H
|
||||
#define EIGEN_SELFCWISEBINARYOP_H
|
||||
|
||||
/** \class SelfCwiseBinaryOp
|
||||
*
|
||||
* \internal
|
||||
*
|
||||
* \brief Internal helper class for optimizing operators like +=, -=
|
||||
*/
|
||||
template<typename BinaryOp, typename MatrixType>
|
||||
struct ei_traits<SelfCwiseBinaryOp<BinaryOp,MatrixType> > : ei_traits<MatrixType> {};
|
||||
|
||||
template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp
|
||||
: public MatrixBase<SelfCwiseBinaryOp<BinaryOp,MatrixType> >
|
||||
{
|
||||
public:
|
||||
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(SelfCwiseBinaryOp)
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
|
||||
using Base::operator=;
|
||||
|
||||
inline SelfCwiseBinaryOp(MatrixType& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
|
||||
|
||||
inline int rows() const { return m_matrix.rows(); }
|
||||
inline int cols() const { return m_matrix.cols(); }
|
||||
inline int stride() const { return m_matrix.stride(); }
|
||||
|
||||
// note that this function is needed by assign to correctly align loads/stores
|
||||
// TODO make Assign use .data()
|
||||
inline Scalar& coeffRef(int row, int col)
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
// note that this function is needed by assign to correctly align loads/stores
|
||||
// TODO make Assign use .data()
|
||||
inline Scalar& coeffRef(int index)
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
void copyCoeff(int row, int col, const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
ei_internal_assert(row >= 0 && row < rows()
|
||||
&& col >= 0 && col < cols());
|
||||
Scalar& tmp = m_matrix.coeffRef(row,col);
|
||||
tmp = m_functor(tmp, _other.coeff(row,col));
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
void copyCoeff(int index, const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
ei_internal_assert(index >= 0 && index < m_matrix.size());
|
||||
Scalar& tmp = m_matrix.coeffRef(index);
|
||||
tmp = m_functor(tmp, _other.coeff(index));
|
||||
}
|
||||
|
||||
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||
void copyPacket(int row, int col, const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
ei_internal_assert(row >= 0 && row < rows()
|
||||
&& col >= 0 && col < cols());
|
||||
m_matrix.template writePacket<StoreMode>(row, col,
|
||||
m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) );
|
||||
}
|
||||
|
||||
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||
void copyPacket(int index, const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
OtherDerived& _other = other.const_cast_derived();
|
||||
ei_internal_assert(index >= 0 && index < m_matrix.size());
|
||||
m_matrix.template writePacket<StoreMode>(index,
|
||||
m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) );
|
||||
}
|
||||
|
||||
protected:
|
||||
MatrixType& m_matrix;
|
||||
const BinaryOp& m_functor;
|
||||
|
||||
private:
|
||||
SelfCwiseBinaryOp& operator=(const SelfCwiseBinaryOp&);
|
||||
};
|
||||
|
||||
#endif // EIGEN_SELFCWISEBINARYOP_H
|
@ -172,14 +172,14 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a,
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { return _mm_load_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pload<double>(const double* from) { return _mm_load_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pload<double>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
|
||||
#if (!defined __GNUC__) && (!defined __ICC)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { return _mm_loadu_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<double>(const double* from) { return _mm_loadu_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<int>(const int* from) { return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { return EIGEN_DEBUG_UNALIGNED_LOAD _mm_loadu_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<double>(const double* from) { return EIGEN_DEBUG_UNALIGNED_LOAD _mm_loadu_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<int>(const int* from) { return EIGEN_DEBUG_UNALIGNED_LOAD _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
|
||||
#else
|
||||
// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
|
||||
// require pointer casting to incompatible pointer types and leads to invalid code
|
||||
@ -188,6 +188,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<int>(const int* from) { return
|
||||
// TODO: do the same for MSVC (ICC is compatible)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
|
||||
{
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
__m128 res;
|
||||
asm volatile ("movsd %[from0], %[r]" : [r] "=x" (res) : [from0] "m" (*from), [dummy] "m" (*(from+1)) );
|
||||
asm volatile ("movhps %[from2], %[r]" : [r] "+x" (res) : [from2] "m" (*(from+2)), [dummy] "m" (*(from+3)) );
|
||||
@ -195,6 +196,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from)
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from)
|
||||
{
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
__m128d res;
|
||||
asm volatile ("movsd %[from0], %[r]" : [r] "=x" (res) : [from0] "m" (*from) );
|
||||
asm volatile ("movhpd %[from1], %[r]" : [r] "+x" (res) : [from1] "m" (*(from+1)) );
|
||||
@ -202,6 +204,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from)
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from)
|
||||
{
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
__m128i res;
|
||||
asm volatile ("movsd %[from0], %[r]" : [r] "=x" (res) : [from0] "m" (*from), [dummy] "m" (*(from+1)) );
|
||||
asm volatile ("movhps %[from2], %[r]" : [r] "+x" (res) : [from2] "m" (*(from+2)), [dummy] "m" (*(from+3)) );
|
||||
@ -209,16 +212,17 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from)
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { _mm_store_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<double>(double* to, const Packet2d& from) { _mm_store_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstoreu<double>(double* to, const Packet2d& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE
|
||||
_mm_storel_pd((to), from);
|
||||
_mm_storeh_pd((to+1), from);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { ei_pstoreu((double*)to, _mm_castps_pd(from)); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { ei_pstoreu((double*)to, _mm_castsi128_pd(from)); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, _mm_castps_pd(from)); }
|
||||
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, _mm_castsi128_pd(from)); }
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64)
|
||||
// The temporary variable fixes an internal compilation error.
|
||||
|
@ -48,6 +48,7 @@ template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
|
||||
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
|
||||
template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
|
||||
template<typename BinOp, typename MatrixType> class SelfCwiseBinaryOp;
|
||||
template<typename Derived, typename Lhs, typename Rhs> class ProductBase;
|
||||
|
||||
template<typename Derived> class DiagonalBase;
|
||||
|
@ -99,6 +99,7 @@ ei_add_test(vectorization_logic)
|
||||
ei_add_test(basicstuff)
|
||||
ei_add_test(linearstructure)
|
||||
ei_add_test(cwiseop)
|
||||
ei_add_test(unalignedcount)
|
||||
ei_add_test(redux)
|
||||
ei_add_test(visitor)
|
||||
ei_add_test(product_small)
|
||||
|
@ -35,7 +35,7 @@ template<typename VectorType> void map_class(const VectorType& m)
|
||||
Scalar* array2 = ei_aligned_new<Scalar>(size);
|
||||
Scalar* array3 = new Scalar[size+1];
|
||||
Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3;
|
||||
|
||||
|
||||
Map<VectorType, Aligned>(array1, size) = VectorType::Random(size);
|
||||
Map<VectorType, Aligned>(array2, size) = Map<VectorType,Aligned>(array1, size);
|
||||
Map<VectorType>(array3unaligned, size) = Map<VectorType>(array1, size);
|
||||
@ -62,7 +62,7 @@ template<typename VectorType> void map_static_methods(const VectorType& m)
|
||||
Scalar* array2 = ei_aligned_new<Scalar>(size);
|
||||
Scalar* array3 = new Scalar[size+1];
|
||||
Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3;
|
||||
|
||||
|
||||
VectorType::MapAligned(array1, size) = VectorType::Random(size);
|
||||
VectorType::Map(array2, size) = VectorType::Map(array1, size);
|
||||
VectorType::Map(array3unaligned, size) = VectorType::Map(array1, size);
|
||||
@ -71,7 +71,7 @@ template<typename VectorType> void map_static_methods(const VectorType& m)
|
||||
VectorType ma3 = VectorType::Map(array3unaligned, size);
|
||||
VERIFY_IS_APPROX(ma1, ma2);
|
||||
VERIFY_IS_APPROX(ma1, ma3);
|
||||
|
||||
|
||||
ei_aligned_delete(array1, size);
|
||||
ei_aligned_delete(array2, size);
|
||||
delete[] array3;
|
||||
|
56
test/unalignedcount.cpp
Normal file
56
test/unalignedcount.cpp
Normal file
@ -0,0 +1,56 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
static int nb_load;
|
||||
static int nb_loadu;
|
||||
static int nb_store;
|
||||
static int nb_storeu;
|
||||
|
||||
#define EIGEN_DEBUG_ALIGNED_LOAD { nb_load++; }
|
||||
#define EIGEN_DEBUG_UNALIGNED_LOAD { nb_loadu++; }
|
||||
#define EIGEN_DEBUG_ALIGNED_STORE { nb_store++; }
|
||||
#define EIGEN_DEBUG_UNALIGNED_STORE { nb_storeu++; }
|
||||
|
||||
#define VERIFY_ALIGNED_UNALIGNED_COUNT(XPR,AL,UL,AS,US) {\
|
||||
nb_load = nb_loadu = nb_store = nb_storeu = 0; \
|
||||
XPR; \
|
||||
if(!(nb_load==AL && nb_loadu==UL && nb_store==AS && nb_storeu==US)) \
|
||||
std::cerr << " >> " << nb_load << ", " << nb_loadu << ", " << nb_store << ", " << nb_storeu << "\n"; \
|
||||
VERIFY( (#XPR) && nb_load==AL && nb_loadu==UL && nb_store==AS && nb_storeu==US ); \
|
||||
}
|
||||
|
||||
|
||||
#include "main.h"
|
||||
|
||||
void test_unalignedcount()
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE
|
||||
VectorXf a(40), b(40);
|
||||
VERIFY_ALIGNED_UNALIGNED_COUNT(a += b, 20, 0, 10, 0);
|
||||
VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) += b.segment(0,40), 10, 10, 10, 0);
|
||||
VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) -= b.segment(0,40), 10, 10, 10, 0);
|
||||
VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) *= 3.5, 10, 0, 10, 0);
|
||||
VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) /= 3.5, 10, 0, 10, 0);
|
||||
#endif
|
||||
}
|
Loading…
Reference in New Issue
Block a user