Update to latest default branch

This commit is contained in:
Ville Kallioniemi 2016-01-21 23:08:54 -07:00
commit 9b6c72958a
122 changed files with 1684 additions and 1127 deletions

View File

@ -12,7 +12,6 @@
#include "src/Core/util/DisableStupidWarnings.h"
#include <complex.h>
extern "C" {
#include <pastix_nompi.h>
#include <pastix.h>

View File

@ -28,8 +28,8 @@ namespace internal {
*
* \brief Robust Cholesky decomposition of a matrix with pivoting
*
* \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read.
*
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite

View File

@ -22,8 +22,8 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
*
* \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
*
* \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read.
*
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
@ -436,10 +436,7 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
*
* \param bAndX represents both the right-hand side matrix b and result x.
*
* \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
*
* This version avoids a copy when the right hand side matrix b is not
* needed anymore.
* This version avoids a copy when the right hand side matrix b is not needed anymore.
*
* \sa LLT::solve(), MatrixBase::llt()
*/

View File

@ -12,7 +12,16 @@
namespace Eigen {
/** \class Array
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
typedef ArrayXpr XprKind;
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
};
}
/** \class Array
* \ingroup Core_Module
*
* \brief General-purpose arrays with easy API for coefficient-wise operations
@ -26,21 +35,12 @@ namespace Eigen {
*
* See documentation of class Matrix for detailed information on the template parameters
* storage layout.
*
*
* This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
*
* \sa \ref TutorialArrayClass, \ref TopicClassHierarchy
* \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
*/
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
typedef ArrayXpr XprKind;
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
};
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
class Array
: public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >

View File

@ -682,9 +682,9 @@ template< typename DstXprType, typename SrcXprType, typename Functor,
struct Assignment;
// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition.
// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated.
// So this intermediate function removes everything related to AssumeAliasing such that Assignment
// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
// So this intermediate function removes everything related to "assume-aliasing" such that Assignment
// does not has to bother about these annoying details.
template<typename Dst, typename Src>
@ -698,21 +698,21 @@ EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src)
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
}
// Deal with AssumeAliasing
// Deal with "assume-aliasing"
template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0)
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
{
typename plain_matrix_type<Src>::type tmp(src);
call_assignment_no_alias(dst, tmp, func);
}
template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0)
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
{
call_assignment_no_alias(dst, src, func);
}
// by-pass AssumeAliasing
// by-pass "assume-aliasing"
// When there is no aliasing, we require that 'dst' has been properly resized
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)

View File

@ -161,15 +161,15 @@ class BandMatrixBase : public EigenBase<Derived>
*
* \brief Represents a rectangular matrix with a banded storage
*
* \param _Scalar Numeric type, i.e. float, double, int
* \param Rows Number of rows, or \b Dynamic
* \param Cols Number of columns, or \b Dynamic
* \param Supers Number of super diagonal
* \param Subs Number of sub diagonal
* \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
* The former controls \ref TopicStorageOrders "storage order", and defaults to
* column-major. The latter controls whether the matrix represents a selfadjoint
* matrix in which case either Supers of Subs have to be null.
* \tparam _Scalar Numeric type, i.e. float, double, int
* \tparam _Rows Number of rows, or \b Dynamic
* \tparam _Cols Number of columns, or \b Dynamic
* \tparam _Supers Number of super diagonal
* \tparam _Subs Number of sub diagonal
* \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
* The former controls \ref TopicStorageOrders "storage order", and defaults to
* column-major. The latter controls whether the matrix represents a selfadjoint
* matrix in which case either Supers of Subs have to be null.
*
* \sa class TridiagonalMatrix
*/
@ -302,9 +302,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
*
* \brief Represents a tridiagonal matrix with a compact banded storage
*
* \param _Scalar Numeric type, i.e. float, double, int
* \param Size Number of rows and cols, or \b Dynamic
* \param _Options Can be 0 or \b SelfAdjoint
* \tparam Scalar Numeric type, i.e. float, double, int
* \tparam Size Number of rows and cols, or \b Dynamic
* \tparam Options Can be 0 or \b SelfAdjoint
*
* \sa class BandMatrix
*/

View File

@ -13,41 +13,6 @@
namespace Eigen {
/** \class Block
* \ingroup Core_Module
*
* \brief Expression of a fixed-size or dynamic-size block
*
* \param XprType the type of the expression in which we are taking a block
* \param BlockRows the number of rows of the block we are taking at compile time (optional)
* \param BlockCols the number of columns of the block we are taking at compile time (optional)
* \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or
* to set of columns of a column major matrix (optional). The parameter allows to determine
* at compile time whether aligned access is possible on the block expression.
*
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
* most of the time this is the only way it is used.
*
* However, if you want to directly maniputate block expressions,
* for instance if you want to write a function returning such an expression, you
* will need to use this class.
*
* Here is an example illustrating the dynamic case:
* \include class_Block.cpp
* Output: \verbinclude class_Block.out
*
* \note Even though this expression has dynamic size, in the case where \a XprType
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
* it does not cause a dynamic memory allocation.
*
* Here is an example illustrating the fixed-size case:
* \include class_FixedBlock.cpp
* Output: \verbinclude class_FixedBlock.out
*
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
*/
namespace internal {
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
@ -101,6 +66,40 @@ template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool In
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
/** \class Block
* \ingroup Core_Module
*
* \brief Expression of a fixed-size or dynamic-size block
*
* \tparam XprType the type of the expression in which we are taking a block
* \tparam BlockRows the number of rows of the block we are taking at compile time (optional)
* \tparam BlockCols the number of columns of the block we are taking at compile time (optional)
* \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or
* to set of columns of a column major matrix (optional). The parameter allows to determine
* at compile time whether aligned access is possible on the block expression.
*
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
* most of the time this is the only way it is used.
*
* However, if you want to directly maniputate block expressions,
* for instance if you want to write a function returning such an expression, you
* will need to use this class.
*
* Here is an example illustrating the dynamic case:
* \include class_Block.cpp
* Output: \verbinclude class_Block.out
*
* \note Even though this expression has dynamic size, in the case where \a XprType
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
* it does not cause a dynamic memory allocation.
*
* Here is an example illustrating the fixed-size case:
* \include class_FixedBlock.cpp
* Output: \verbinclude class_FixedBlock.out
*
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
*/
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
: public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
{

View File

@ -63,10 +63,6 @@ struct evaluator_traits_base
// by default, get evaluator kind and shape from storage
typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
// 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
// temporary; 0 if not.
static const int AssumeAliasing = 0;
};
// Default evaluator traits
@ -75,6 +71,10 @@ struct evaluator_traits : public evaluator_traits_base<T>
{
};
template<typename T, typename Shape = typename evaluator_traits<T>::Shape >
struct evaluator_assume_aliasing {
static const bool value = false;
};
// By default, we assume a unary expression:
template<typename T>

View File

@ -13,26 +13,6 @@
namespace Eigen {
/** \class CwiseBinaryOp
* \ingroup Core_Module
*
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
*
* \param BinaryOp template functor implementing the operator
* \param Lhs the type of the left-hand side
* \param Rhs the type of the right-hand side
*
* This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
* It is the return type of binary operators, by which we mean only those binary operators where
* both the left-hand side and the right-hand side are Eigen expressions.
* For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
*
* Most of the time, this is the only way that it is used, so you typically don't have to name
* CwiseBinaryOp types explicitly.
*
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
*/
namespace internal {
template<typename BinaryOp, typename Lhs, typename Rhs>
struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
@ -74,6 +54,25 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
class CwiseBinaryOpImpl;
/** \class CwiseBinaryOp
* \ingroup Core_Module
*
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
*
* \tparam BinaryOp template functor implementing the operator
* \tparam LhsType the type of the left-hand side
* \tparam RhsType the type of the right-hand side
*
* This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
* It is the return type of binary operators, by which we mean only those binary operators where
* both the left-hand side and the right-hand side are Eigen expressions.
* For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
*
* Most of the time, this is the only way that it is used, so you typically don't have to name
* CwiseBinaryOp types explicitly.
*
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
*/
template<typename BinaryOp, typename LhsType, typename RhsType>
class CwiseBinaryOp :
public CwiseBinaryOpImpl<

View File

@ -12,24 +12,6 @@
namespace Eigen {
/** \class CwiseNullaryOp
* \ingroup Core_Module
*
* \brief Generic expression of a matrix where all coefficients are defined by a functor
*
* \param NullaryOp template functor implementing the operator
* \param PlainObjectType the underlying plain matrix/array type
*
* This class represents an expression of a generic nullary operator.
* It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
* and most of the time this is the only way it is used.
*
* However, if you want to write a function returning such an expression, you
* will need to use this class.
*
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
*/
namespace internal {
template<typename NullaryOp, typename PlainObjectType>
struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
@ -40,6 +22,23 @@ struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectT
};
}
/** \class CwiseNullaryOp
* \ingroup Core_Module
*
* \brief Generic expression of a matrix where all coefficients are defined by a functor
*
* \tparam NullaryOp template functor implementing the operator
* \tparam PlainObjectType the underlying plain matrix/array type
*
* This class represents an expression of a generic nullary operator.
* It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
* and most of the time this is the only way it is used.
*
* However, if you want to write a function returning such an expression, you
* will need to use this class.
*
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
*/
template<typename NullaryOp, typename PlainObjectType>
class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
{

View File

@ -13,26 +13,6 @@
namespace Eigen {
/** \class CwiseUnaryOp
* \ingroup Core_Module
*
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression
*
* \param UnaryOp template functor implementing the operator
* \param XprType the type of the expression to which we are applying the unary operator
*
* This class represents an expression where a unary operator is applied to an expression.
* It is the return type of all operations taking exactly 1 input expression, regardless of the
* presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
* is considered unary, because only the right-hand side is an expression, and its
* return type is a specialization of CwiseUnaryOp.
*
* Most of the time, this is the only way that it is used, so you typically don't have to name
* CwiseUnaryOp types explicitly.
*
* \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
*/
namespace internal {
template<typename UnaryOp, typename XprType>
struct traits<CwiseUnaryOp<UnaryOp, XprType> >
@ -52,6 +32,25 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
template<typename UnaryOp, typename XprType, typename StorageKind>
class CwiseUnaryOpImpl;
/** \class CwiseUnaryOp
* \ingroup Core_Module
*
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression
*
* \tparam UnaryOp template functor implementing the operator
* \tparam XprType the type of the expression to which we are applying the unary operator
*
* This class represents an expression where a unary operator is applied to an expression.
* It is the return type of all operations taking exactly 1 input expression, regardless of the
* presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
* is considered unary, because only the right-hand side is an expression, and its
* return type is a specialization of CwiseUnaryOp.
*
* Most of the time, this is the only way that it is used, so you typically don't have to name
* CwiseUnaryOp types explicitly.
*
* \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
*/
template<typename UnaryOp, typename XprType>
class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
{

View File

@ -12,20 +12,6 @@
namespace Eigen {
/** \class CwiseUnaryView
* \ingroup Core_Module
*
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
*
* \param ViewOp template functor implementing the view
* \param MatrixType the type of the matrix we are applying the unary operator
*
* This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
* It is the return type of real() and imag(), and most of the time this is the only way it is used.
*
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
*/
namespace internal {
template<typename ViewOp, typename MatrixType>
struct traits<CwiseUnaryView<ViewOp, MatrixType> >
@ -55,6 +41,19 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
template<typename ViewOp, typename MatrixType, typename StorageKind>
class CwiseUnaryViewImpl;
/** \class CwiseUnaryView
* \ingroup Core_Module
*
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
*
* \tparam ViewOp template functor implementing the view
* \tparam MatrixType the type of the matrix we are applying the unary operator
*
* This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
* It is the return type of real() and imag(), and most of the time this is the only way it is used.
*
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
*/
template<typename ViewOp, typename MatrixType>
class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
{

View File

@ -191,19 +191,31 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
y() const { return (*this)[1]; }
y() const
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
return (*this)[1];
}
/** equivalent to operator[](2). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
z() const { return (*this)[2]; }
z() const
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
return (*this)[2];
}
/** equivalent to operator[](3). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE CoeffReturnType
w() const { return (*this)[3]; }
w() const
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
return (*this)[3];
}
/** \internal
* \returns the packet of coefficients starting at the given row and column. It is your responsibility
@ -424,19 +436,31 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
y() { return (*this)[1]; }
y()
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
return (*this)[1];
}
/** equivalent to operator[](2). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
z() { return (*this)[2]; }
z()
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
return (*this)[2];
}
/** equivalent to operator[](3). */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
w() { return (*this)[3]; }
w()
{
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
return (*this)[3];
}
};
/** \brief Base class providing direct read-only coefficient access to matrices and arrays.

View File

@ -99,11 +99,13 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{
EIGEN_USING_STD_MATH(sqrt)
return sqrt(squaredNorm());
return numext::sqrt(squaredNorm());
}
/** \returns an expression of the quotient of *this by its own norm.
/** \returns an expression of the quotient of \c *this by its own norm.
*
* \warning If the input vector is too small (i.e., this->norm()==0),
* then this function returns a copy of the input.
*
* \only_for_vectors
*
@ -115,19 +117,29 @@ MatrixBase<Derived>::normalized() const
{
typedef typename internal::nested_eval<Derived,2>::type _Nested;
_Nested n(derived());
return n / n.norm();
RealScalar z = n.squaredNorm();
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
if(z>RealScalar(0))
return n / numext::sqrt(z);
else
return n;
}
/** Normalizes the vector, i.e. divides it by its own norm.
*
* \only_for_vectors
*
* \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
*
* \sa norm(), normalized()
*/
template<typename Derived>
inline void MatrixBase<Derived>::normalize()
{
*this /= norm();
RealScalar z = squaredNorm();
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
if(z>RealScalar(0))
derived() /= numext::sqrt(z);
}
//---------- implementation of other norms ----------

View File

@ -80,7 +80,7 @@ struct IOFormat
*
* \brief Pseudo expression providing matrix output with given format
*
* \param ExpressionType the type of the object on which IO stream operations are performed
* \tparam ExpressionType the type of the object on which IO stream operations are performed
*
* This class represents an expression with stream operators controlled by a given IOFormat.
* It is the return type of DenseBase::format()

View File

@ -13,6 +13,28 @@
namespace Eigen {
namespace internal {
template<typename PlainObjectType, int MapOptions, typename StrideType>
struct traits<Map<PlainObjectType, MapOptions, StrideType> >
: public traits<PlainObjectType>
{
typedef traits<PlainObjectType> TraitsBase;
enum {
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
? int(PlainObjectType::InnerStrideAtCompileTime)
: int(StrideType::InnerStrideAtCompileTime),
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
Alignment = int(MapOptions)&int(AlignedMask),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
};
private:
enum { Options }; // Expressions don't have Options
};
}
/** \class Map
* \ingroup Core_Module
*
@ -63,29 +85,6 @@ namespace Eigen {
*
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
*/
namespace internal {
template<typename PlainObjectType, int MapOptions, typename StrideType>
struct traits<Map<PlainObjectType, MapOptions, StrideType> >
: public traits<PlainObjectType>
{
typedef traits<PlainObjectType> TraitsBase;
enum {
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
? int(PlainObjectType::InnerStrideAtCompileTime)
: int(StrideType::InnerStrideAtCompileTime),
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
Alignment = int(MapOptions)&int(AlignedMask),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
};
private:
enum { Options }; // Expressions don't have Options
};
}
template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
: public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
{

View File

@ -26,7 +26,7 @@ long double abs(long double x) { return (fabsl(x)); }
namespace internal {
/** \internal \struct global_math_functions_filtering_base
/** \internal \class global_math_functions_filtering_base
*
* What it does:
* Defines a typedef 'type' as follows:
@ -954,8 +954,8 @@ T (ceil)(const T& x)
return ceil(x);
}
// Log base 2 for 32 bits positive integers.
// Conveniently returns 0 for x==0.
/** Log base 2 for 32 bits positive integers.
* Conveniently returns 0 for x==0. */
inline int log2(int x)
{
eigen_assert(x>=0);
@ -969,6 +969,22 @@ inline int log2(int x)
return table[(v * 0x07C4ACDDU) >> 27];
}
/** \returns the square root of \a x.
*
* It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
* specializations when SSE is enabled.
*
* It's usage is justified in performance critical functions, like norm/normalize.
*/
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T sqrt(const T &x)
{
EIGEN_USING_STD_MATH(sqrt);
return sqrt(x);
}
} // end namespace numext
namespace internal {

View File

@ -13,6 +13,45 @@
namespace Eigen {
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
private:
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
enum {
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
required_alignment = unpacket_traits<PacketScalar>::alignment,
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
};
public:
typedef _Scalar Scalar;
typedef Dense StorageKind;
typedef Eigen::Index StorageIndex;
typedef MatrixXpr XprKind;
enum {
RowsAtCompileTime = _Rows,
ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols,
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
Options = _Options,
InnerStrideAtCompileTime = 1,
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
Alignment = actual_alignment
};
};
}
/** \class Matrix
* \ingroup Core_Module
*
@ -98,7 +137,7 @@ namespace Eigen {
* </dl>
*
* <i><b>ABI and storage layout</b></i>
*
*
* The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
* <table class="manual">
* <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
@ -130,50 +169,11 @@ namespace Eigen {
* </table>
* Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two
* smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
*
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
* \ref TopicStorageOrders
*
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
* \ref TopicStorageOrders
*/
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
private:
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
enum {
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
required_alignment = unpacket_traits<PacketScalar>::alignment,
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
};
public:
typedef _Scalar Scalar;
typedef Dense StorageKind;
typedef Eigen::Index StorageIndex;
typedef MatrixXpr XprKind;
enum {
RowsAtCompileTime = _Rows,
ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols,
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
Options = _Options,
InnerStrideAtCompileTime = 1,
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
Alignment = actual_alignment
};
};
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
class Matrix
: public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >

View File

@ -13,25 +13,24 @@
namespace Eigen {
/** \class NestByValue
* \ingroup Core_Module
*
* \brief Expression which must be nested by value
*
* \param ExpressionType the type of the object of which we are requiring nesting-by-value
*
* This class is the return type of MatrixBase::nestByValue()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::nestByValue()
*/
namespace internal {
template<typename ExpressionType>
struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>
{};
}
/** \class NestByValue
* \ingroup Core_Module
*
* \brief Expression which must be nested by value
*
* \tparam ExpressionType the type of the object of which we are requiring nesting-by-value
*
* This class is the return type of MatrixBase::nestByValue()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::nestByValue()
*/
template<typename ExpressionType> class NestByValue
: public internal::dense_xpr_base< NestByValue<ExpressionType> >::type
{

View File

@ -17,7 +17,7 @@ namespace Eigen {
*
* \brief Pseudo expression providing an operator = assuming no aliasing
*
* \param ExpressionType the type of the object on which to do the lazy assignment
* \tparam ExpressionType the type of the object on which to do the lazy assignment
*
* This class represents an expression with special assignment operators
* assuming no aliasing between the target expression and the source expression.

View File

@ -17,7 +17,7 @@ namespace Eigen {
*
* \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
*
* \param T the numeric type at hand
* \tparam T the numeric type at hand
*
* This class stores enums, typedefs and static methods giving information about a numeric type.
*

View File

@ -13,12 +13,18 @@
namespace Eigen {
namespace internal {
enum PermPermProduct_t {PermPermProduct};
} // end namespace internal
/** \class PermutationBase
* \ingroup Core_Module
*
* \brief Base class for permutations
*
* \param Derived the derived class
* \tparam Derived the derived class
*
* This class is the base class for all expressions representing a permutation matrix,
* internally stored as a vector of integers.
@ -36,13 +42,6 @@ namespace Eigen {
*
* \sa class PermutationMatrix, class PermutationWrapper
*/
namespace internal {
enum PermPermProduct_t {PermPermProduct};
} // end namespace internal
template<typename Derived>
class PermutationBase : public EigenBase<Derived>
{
@ -280,20 +279,6 @@ class PermutationBase : public EigenBase<Derived>
};
/** \class PermutationMatrix
* \ingroup Core_Module
*
* \brief Permutation matrix
*
* \param SizeAtCompileTime the number of rows/cols, or Dynamic
* \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
* \param StorageIndex the integer type of the indices
*
* This class represents a permutation matrix, internally stored as a vector of integers.
*
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
*/
namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
@ -306,6 +291,19 @@ struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _Storag
};
}
/** \class PermutationMatrix
* \ingroup Core_Module
*
* \brief Permutation matrix
*
* \tparam SizeAtCompileTime the number of rows/cols, or Dynamic
* \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
* \tparam _StorageIndex the integer type of the indices
*
* This class represents a permutation matrix, internally stored as a vector of integers.
*
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
*/
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
{
@ -482,18 +480,6 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
IndicesType m_indices;
};
/** \class PermutationWrapper
* \ingroup Core_Module
*
* \brief Class to view a vector of integers as a permutation matrix
*
* \param _IndicesType the type of the vector of integer (can be any compatible expression)
*
* This class allows to view any vector expression of integers as a permutation matrix.
*
* \sa class PermutationBase, class PermutationMatrix
*/
template<typename _IndicesType> class TranspositionsWrapper;
namespace internal {
template<typename _IndicesType>
@ -513,6 +499,17 @@ struct traits<PermutationWrapper<_IndicesType> >
};
}
/** \class PermutationWrapper
* \ingroup Core_Module
*
* \brief Class to view a vector of integers as a permutation matrix
*
* \tparam _IndicesType the type of the vector of integer (can be any compatible expression)
*
* This class allows to view any vector expression of integers as a permutation matrix.
*
* \sa class PermutationBase, class PermutationMatrix
*/
template<typename _IndicesType>
class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
{

View File

@ -14,22 +14,6 @@ namespace Eigen {
template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
/** \class Product
* \ingroup Core_Module
*
* \brief Expression of the product of two arbitrary matrices or vectors
*
* \param Lhs the type of the left-hand side expression
* \param Rhs the type of the right-hand side expression
*
* This class represents an expression of the product of two arbitrary matrices.
*
* The other template parameters are:
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
*
*/
namespace internal {
// Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times
@ -102,7 +86,20 @@ struct traits<Product<Lhs, Rhs, Option> >
} // end namespace internal
/** \class Product
* \ingroup Core_Module
*
* \brief Expression of the product of two arbitrary matrices or vectors
*
* \tparam _Lhs the type of the left-hand side expression
* \tparam _Rhs the type of the right-hand side expression
*
* This class represents an expression of the product of two arbitrary matrices.
*
* The other template parameters are:
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
*
*/
template<typename _Lhs, typename _Rhs, int Option>
class Product : public ProductImpl<_Lhs,_Rhs,Option,
typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,

View File

@ -38,10 +38,9 @@ struct evaluator<Product<Lhs, Rhs, Options> >
// Catch scalar * ( A * B ) and transform it to (A*scalar) * B
// TODO we should apply that rule only if that's really helpful
template<typename Lhs, typename Rhs, typename Scalar>
struct evaluator_traits<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
: evaluator_traits_base<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
struct evaluator_assume_aliasing<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
{
enum { AssumeAliasing = 1 };
static const bool value = true;
};
template<typename Lhs, typename Rhs, typename Scalar>
struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
@ -81,17 +80,8 @@ template< typename Lhs, typename Rhs,
struct generic_product_impl;
template<typename Lhs, typename Rhs>
struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> >
: evaluator_traits_base<Product<Lhs, Rhs, DefaultProduct> >
{
enum { AssumeAliasing = 1 };
};
template<typename Lhs, typename Rhs>
struct evaluator_traits<Product<Lhs, Rhs, AliasFreeProduct> >
: evaluator_traits_base<Product<Lhs, Rhs, AliasFreeProduct> >
{
enum { AssumeAliasing = 0 };
struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
static const bool value = true;
};
// This is the default evaluator implementation for products:
@ -189,6 +179,13 @@ struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBi
//----------------------------------------
// Catch "Dense ?= xpr + Product<>" expression to save one temporary
// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
// TODO enable it for "Dense ?= xpr - Product<>" as well.
template<typename OtherXpr, typename Lhs, typename Rhs>
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar>, const OtherXpr,
const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
static const bool value = true;
};
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2>
struct assignment_from_xpr_plus_product

View File

@ -12,76 +12,6 @@
namespace Eigen {
/** \class Ref
* \ingroup Core_Module
*
* \brief A matrix or vector expression mapping an existing expression
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
* but accepts a variable outer stride (leading dimension).
* This can be overridden by specifying strides.
* The type passed here must be a specialization of the Stride template, see examples below.
*
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
* A Ref<> object can represent either a const expression or a l-value:
* \code
* // in-out argument:
* void foo1(Ref<VectorXf> x);
*
* // read-only const argument:
* void foo2(const Ref<const VectorXf>& x);
* \endcode
*
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
* can be greater than the number of rows.
*
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
* Here are some examples:
* \code
* MatrixXf A;
* VectorXf a;
* foo1(a.head()); // OK
* foo1(A.col()); // OK
* foo1(A.row()); // Compilation error because here innerstride!=1
* foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
* foo2(A.row().transpose()); // The row is copied into a contiguous temporary
* foo2(2*a); // The expression is evaluated into a temporary
* foo2(A.col().segment(2,4)); // No temporary
* \endcode
*
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
* Here is an example accepting an innerstride!=1:
* \code
* // in-out argument:
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
* foo3(A.row()); // OK
* \endcode
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
* template function, e.g.:
* \code
* // in the .h:
* void foo(const Ref<MatrixXf>& A);
* void foo(const Ref<MatrixXf,0,Stride<> >& A);
*
* // in the .cpp:
* template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
* ... // crazy code goes here
* }
* void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
* void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
* \endcode
*
*
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
*/
namespace internal {
template<typename _PlainObjectType, int _Options, typename _StrideType>
@ -182,7 +112,75 @@ protected:
StrideBase m_stride;
};
/** \class Ref
* \ingroup Core_Module
*
* \brief A matrix or vector expression mapping an existing expression
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
* but accepts a variable outer stride (leading dimension).
* This can be overridden by specifying strides.
* The type passed here must be a specialization of the Stride template, see examples below.
*
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
* A Ref<> object can represent either a const expression or a l-value:
* \code
* // in-out argument:
* void foo1(Ref<VectorXf> x);
*
* // read-only const argument:
* void foo2(const Ref<const VectorXf>& x);
* \endcode
*
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
* can be greater than the number of rows.
*
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
* Here are some examples:
* \code
* MatrixXf A;
* VectorXf a;
* foo1(a.head()); // OK
* foo1(A.col()); // OK
* foo1(A.row()); // Compilation error because here innerstride!=1
* foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
* foo2(A.row().transpose()); // The row is copied into a contiguous temporary
* foo2(2*a); // The expression is evaluated into a temporary
* foo2(A.col().segment(2,4)); // No temporary
* \endcode
*
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
* Here is an example accepting an innerstride!=1:
* \code
* // in-out argument:
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
* foo3(A.row()); // OK
* \endcode
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
* template function, e.g.:
* \code
* // in the .h:
* void foo(const Ref<MatrixXf>& A);
* void foo(const Ref<MatrixXf,0,Stride<> >& A);
*
* // in the .cpp:
* template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
* ... // crazy code goes here
* }
* void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
* void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
* \endcode
*
*
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
*/
template<typename PlainObjectType, int Options, typename StrideType> class Ref
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
{
@ -209,6 +207,7 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
#else
/** Implicit constructor from any dense expression */
template<typename Derived>
inline Ref(DenseBase<Derived>& expr)
#endif

View File

@ -12,21 +12,6 @@
namespace Eigen {
/**
* \class Replicate
* \ingroup Core_Module
*
* \brief Expression of the multiple replication of a matrix or vector
*
* \param MatrixType the type of the object we are replicating
*
* This class represents an expression of the multiple replication of a matrix or vector.
* It is the return type of DenseBase::replicate() and most of the time
* this is the only way it is used.
*
* \sa DenseBase::replicate()
*/
namespace internal {
template<typename MatrixType,int RowFactor,int ColFactor>
struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
@ -57,6 +42,22 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
};
}
/**
* \class Replicate
* \ingroup Core_Module
*
* \brief Expression of the multiple replication of a matrix or vector
*
* \tparam MatrixType the type of the object we are replicating
* \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic.
* \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic.
*
* This class represents an expression of the multiple replication of a matrix or vector.
* It is the return type of DenseBase::replicate() and most of the time
* this is the only way it is used.
*
* \sa DenseBase::replicate()
*/
template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
: public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
{

View File

@ -13,11 +13,6 @@
namespace Eigen {
/** \class ReturnByValue
* \ingroup Core_Module
*
*/
namespace internal {
template<typename Derived>
@ -48,6 +43,10 @@ struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
} // end namespace internal
/** \class ReturnByValue
* \ingroup Core_Module
*
*/
template<typename Derived> class ReturnByValue
: public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator
{

View File

@ -14,20 +14,6 @@
namespace Eigen {
/** \class Reverse
* \ingroup Core_Module
*
* \brief Expression of the reverse of a vector or matrix
*
* \param MatrixType the type of the object of which we are taking the reverse
*
* This class represents an expression of the reverse of a vector.
* It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
*/
namespace internal {
template<typename MatrixType, int Direction>
@ -60,6 +46,20 @@ template<typename PacketType> struct reverse_packet_cond<PacketType,false>
} // end namespace internal
/** \class Reverse
* \ingroup Core_Module
*
* \brief Expression of the reverse of a vector or matrix
*
* \tparam MatrixType the type of the object of which we are taking the reverse
* \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections
*
* This class represents an expression of the reverse of a vector.
* It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
*/
template<typename MatrixType, int Direction> class Reverse
: public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
{

View File

@ -203,8 +203,6 @@ struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
{
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef SelfAdjointShape Shape;
static const int AssumeAliasing = 0;
};
template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>

View File

@ -31,8 +31,8 @@ namespace Eigen {
* arguments to the constructor.
*
* Indeed, this class takes two template parameters:
* \param _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
* \param _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
* \tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
* \tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
*
* Here is an example:
* \include Map_general_stride.cpp

View File

@ -13,20 +13,6 @@
namespace Eigen {
/** \class Transpose
* \ingroup Core_Module
*
* \brief Expression of the transpose of a matrix
*
* \param MatrixType the type of the object of which we are taking the transpose
*
* This class represents an expression of the transpose of a matrix.
* It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
*/
namespace internal {
template<typename MatrixType>
struct traits<Transpose<MatrixType> > : public traits<MatrixType>
@ -50,6 +36,19 @@ struct traits<Transpose<MatrixType> > : public traits<MatrixType>
template<typename MatrixType, typename StorageKind> class TransposeImpl;
/** \class Transpose
* \ingroup Core_Module
*
* \brief Expression of the transpose of a matrix
*
* \tparam MatrixType the type of the object of which we are taking the transpose
*
* This class represents an expression of the transpose of a matrix.
* It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
*/
template<typename MatrixType> class Transpose
: public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
{

View File

@ -12,35 +12,6 @@
namespace Eigen {
/** \class Transpositions
* \ingroup Core_Module
*
* \brief Represents a sequence of transpositions (row/column interchange)
*
* \param SizeAtCompileTime the number of transpositions, or Dynamic
* \param MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
*
* This class represents a permutation transformation as a sequence of \em n transpositions
* \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
* Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
* the rows \c i and \c indices[i] of the matrix \c M.
* A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
*
* Compared to the class PermutationMatrix, such a sequence of transpositions is what is
* computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
*
* To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
* \code
* Transpositions tr;
* MatrixXf mat;
* mat = tr * mat;
* \endcode
* In this example, we detect that the matrix appears on both side, and so the transpositions
* are applied in-place without any temporary or extra copy.
*
* \sa class PermutationMatrix
*/
template<typename Derived>
class TranspositionsBase
{
@ -154,6 +125,35 @@ struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageInde
};
}
/** \class Transpositions
* \ingroup Core_Module
*
* \brief Represents a sequence of transpositions (row/column interchange)
*
* \tparam SizeAtCompileTime the number of transpositions, or Dynamic
* \tparam MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
*
* This class represents a permutation transformation as a sequence of \em n transpositions
* \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
* Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
* the rows \c i and \c indices[i] of the matrix \c M.
* A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
*
* Compared to the class PermutationMatrix, such a sequence of transpositions is what is
* computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
*
* To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
* \code
* Transpositions tr;
* MatrixXf mat;
* mat = tr * mat;
* \endcode
* In this example, we detect that the matrix appears on both side, and so the transpositions
* are applied in-place without any temporary or extra copy.
*
* \sa class PermutationMatrix
*/
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
{

View File

@ -704,10 +704,6 @@ struct evaluator_traits<TriangularView<MatrixType,Mode> >
{
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;
// 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
// temporary; 0 if not.
static const int AssumeAliasing = 0;
};
template<typename MatrixType, unsigned int Mode>

View File

@ -13,13 +13,23 @@
namespace Eigen {
namespace internal {
template<typename VectorType, int Size>
struct traits<VectorBlock<VectorType, Size> >
: public traits<Block<VectorType,
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
{
};
}
/** \class VectorBlock
* \ingroup Core_Module
*
* \brief Expression of a fixed-size or dynamic-size sub-vector
*
* \param VectorType the type of the object in which we are taking a sub-vector
* \param Size size of the sub-vector we are taking at compile time (optional)
* \tparam VectorType the type of the object in which we are taking a sub-vector
* \tparam Size size of the sub-vector we are taking at compile time (optional)
*
* This class represents an expression of either a fixed-size or dynamic-size sub-vector.
* It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and
@ -43,17 +53,6 @@ namespace Eigen {
*
* \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)
*/
namespace internal {
template<typename VectorType, int Size>
struct traits<VectorBlock<VectorType, Size> >
: public traits<Block<VectorType,
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
{
};
}
template<typename VectorType, int Size> class VectorBlock
: public Block<VectorType,
internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,

View File

@ -141,8 +141,8 @@ struct member_redux {
*
* \brief Pseudo expression providing partial reduction operations
*
* \param ExpressionType the type of the object on which to do partial reductions
* \param Direction indicates the direction of the redux (#Vertical or #Horizontal)
* \tparam ExpressionType the type of the object on which to do partial reductions
* \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
*
* This class represents a pseudo expression with partial reduction features.
* It is the return type of DenseBase::colwise() and DenseBase::rowwise()
@ -187,11 +187,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
protected:
/** \internal
* \returns the i-th subvector according to the \c Direction */
typedef typename internal::conditional<isVertical,
typename ExpressionType::ColXpr,
typename ExpressionType::RowXpr>::type SubVector;
/** \internal
* \returns the i-th subvector according to the \c Direction */
EIGEN_DEVICE_FUNC
SubVector subVector(Index i)
{

View File

@ -518,6 +518,28 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
} // end namespace internal
namespace numext {
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float sqrt(const float &x)
{
return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double sqrt(const double &x)
{
#if EIGEN_COMP_GNUC
return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
#else
return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
#endif
}
} // end namespace numex
} // end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_SSE_H

View File

@ -145,12 +145,9 @@ static void run(Index rows, Index cols, Index depth,
// Release all the sub blocks A'_i of A' for the current thread,
// i.e., we simply decrement the number of users by 1
#pragma omp critical
{
for(Index i=0; i<threads; ++i)
#pragma omp atomic
info[i].users -= 1;
}
}
}
else

View File

@ -123,18 +123,18 @@ template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::R
template<typename Scalar, typename Index>
class BlasVectorMapper {
public:
EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
return m_data[i];
}
template <typename Packet, int AlignmentType>
EIGEN_ALWAYS_INLINE Packet load(Index i) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
return ploadt<Packet, AlignmentType>(m_data + i);
}
template <typename Packet>
bool aligned(Index i) const {
EIGEN_DEVICE_FUNC bool aligned(Index i) const {
return (size_t(m_data+i)%sizeof(Packet))==0;
}
@ -148,25 +148,25 @@ class BlasLinearMapper {
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
EIGEN_ALWAYS_INLINE void prefetch(int i) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
internal::prefetch(&operator()(i));
}
EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
return m_data[i];
}
EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
return ploadt<Packet, AlignmentType>(m_data + i);
}
EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
return ploadt<HalfPacket, AlignmentType>(m_data + i);
}
EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
}
@ -184,18 +184,18 @@ class blas_data_mapper {
typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
typedef BlasVectorMapper<Scalar, Index> VectorMapper;
EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
getSubMapper(Index i, Index j) const {
return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
}
EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
return LinearMapper(&operator()(i, j));
}
EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
return VectorMapper(&operator()(i, j));
}
@ -205,28 +205,28 @@ class blas_data_mapper {
return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
}
EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
return ploadt<Packet, AlignmentType>(&operator()(i, j));
}
EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
}
template<typename SubPacket>
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
}
template<typename SubPacket>
EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
}
const Index stride() const { return m_stride; }
const Scalar* data() const { return m_data; }
EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
Index firstAligned(Index size) const {
EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
if (size_t(m_data)%sizeof(Scalar)) {
return -1;
}

View File

@ -224,7 +224,7 @@ enum {
/** \ingroup enums
* Enum for indicating whether a buffer is aligned or not. */
enum Foo {
enum {
Unaligned=0, /**< Data pointer has no specific alignment. */
Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */
Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */

View File

@ -524,7 +524,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_align
* \sa first_default_aligned()
*/
template<int Alignment, typename Scalar, typename Index>
inline Index first_aligned(const Scalar* array, Index size)
EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
{
static const Index ScalarSize = sizeof(Scalar);
static const Index AlignmentSize = Alignment / ScalarSize;
@ -544,14 +544,15 @@ inline Index first_aligned(const Scalar* array, Index size)
}
else
{
return std::min<Index>( (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask, size);
Index first = (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
return (first < size) ? first : size;
}
}
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
* \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
template<typename Scalar, typename Index>
inline Index first_default_aligned(const Scalar* array, Index size)
EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
{
typedef typename packet_traits<Scalar>::type DefaultPacketType;
return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);

View File

@ -50,6 +50,7 @@
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
OUT_OF_RANGE_ACCESS,
YOU_MADE_A_PROGRAMMING_MISTAKE,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,

View File

@ -390,9 +390,9 @@ struct transfer_constness
* a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes
* many coefficient accesses in the nested expressions -- as is the case with matrix product for example.
*
* \param T the type of the expression being nested.
* \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
* \param PlainObject the type of the temporary if needed.
* \tparam T the type of the expression being nested.
* \tparam n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
* \tparam PlainObject the type of the temporary if needed.
*/
template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval
{
@ -575,7 +575,7 @@ template <int ProductTag> struct product_promote_storage_type<Dense,
template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Dense, ProductTag> { typedef Dense ret; };
/** \internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type.
* \param Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType.
* \tparam Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType.
*/
template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
struct plain_row_type

View File

@ -375,8 +375,12 @@ namespace internal {
* Performs a QR step on a tridiagonal symmetric matrix represented as a
* pair of two vectors \a diag and \a subdiag.
*
* \param matA the input selfadjoint matrix
* \param hCoeffs returned Householder coefficients
* \param diag the diagonal part of the input selfadjoint tridiagonal matrix
* \param subdiag the sub-diagonal part of the input selfadjoint tridiagonal matrix
* \param start starting index of the submatrix to work on
* \param end last+1 index of the submatrix to work on
* \param matrixQ pointer to the column-major matrix holding the eigenvectors, can be 0
* \param n size of the input matrix
*
* For compilation efficiency reasons, this procedure does not use eigen expression
* for its arguments.
@ -467,9 +471,10 @@ namespace internal {
* \brief Compute the eigendecomposition from a tridiagonal matrix
*
* \param[in,out] diag : On input, the diagonal of the matrix, on output the eigenvalues
* \param[in] subdiag : The subdiagonal part of the matrix.
* \param[in,out] : On input, the maximum number of iterations, on output, the effective number of iterations.
* \param[out] eivec : The matrix to store the eigenvectors... if needed. allocated on input
* \param[in,out] subdiag : The subdiagonal part of the matrix (entries are modified during the decomposition)
* \param[in] maxIterations : the maximum number of iterations
* \param[in] computeEigenvectors : whether the eigenvectors have to be computed or not
* \param[out] eivec : The matrix to store the eigenvectors if computeEigenvectors==true. Must be allocated on input.
* \returns \c Success or \c NoConvergence
*/
template<typename MatrixType, typename DiagType, typename SubDiagType>

View File

@ -304,7 +304,6 @@ struct evaluator_traits<Homogeneous<ArgType,Direction> >
{
typedef typename storage_kind_to_evaluator_kind<typename ArgType::StorageKind>::Kind Kind;
typedef HomogeneousShape Shape;
static const int AssumeAliasing = 0;
};
template<> struct AssignmentKind<DenseShape,HomogeneousShape> { typedef Dense2Dense Kind; };

View File

@ -22,8 +22,8 @@ namespace Eigen {
* A hyperplane is an affine subspace of dimension n-1 in a space of dimension n.
* For example, a hyperplane in a plane is a line; a hyperplane in 3-space is a plane.
*
* \param _Scalar the scalar type, i.e., the type of the coefficients
* \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
* \tparam _Scalar the scalar type, i.e., the type of the coefficients
* \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
* Notice that the dimension of the hyperplane is _AmbientDim-1.
*
* This class represents an hyperplane as the zero set of the implicit equation

View File

@ -23,8 +23,8 @@ namespace Eigen {
* direction vector \f$ \mathbf{d} \f$ such that the line corresponds to
* the set \f$ l(t) = \mathbf{o} + t \mathbf{d} \f$, \f$ t \in \mathbf{R} \f$.
*
* \param _Scalar the scalar type, i.e., the type of the coefficients
* \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
* \tparam _Scalar the scalar type, i.e., the type of the coefficients
* \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
*/
template <typename _Scalar, int _AmbientDim, int _Options>
class ParametrizedLine

View File

@ -18,7 +18,7 @@ namespace Eigen {
*
* \brief Represents a rotation/orientation in a 2 dimensional space.
*
* \param _Scalar the scalar type, i.e., the type of the coefficients
* \tparam _Scalar the scalar type, i.e., the type of the coefficients
*
* This class is equivalent to a single scalar representing a counter clock wise rotation
* as a single angle in radian. It provides some additional features such as the automatic

View File

@ -22,8 +22,8 @@ struct rotation_base_generic_product_selector;
*
* \brief Common base class for compact rotation representations
*
* \param Derived is the derived type, i.e., a rotation type
* \param _Dim the dimension of the space
* \tparam Derived is the derived type, i.e., a rotation type
* \tparam _Dim the dimension of the space
*/
template<typename Derived, int _Dim>
class RotationBase
@ -164,8 +164,8 @@ namespace internal {
*
* Helper function to return an arbitrary rotation object to a rotation matrix.
*
* \param Scalar the numeric type of the matrix coefficients
* \param Dim the dimension of the current space
* \tparam Scalar the numeric type of the matrix coefficients
* \tparam Dim the dimension of the current space
*
* It returns a Dim x Dim fixed size matrix.
*

View File

@ -18,7 +18,7 @@ namespace Eigen {
*
* \brief Represents a generic uniform scaling transformation
*
* \param _Scalar the scalar type, i.e., the type of the coefficients.
* \tparam _Scalar the scalar type, i.e., the type of the coefficients.
*
* This class represent a uniform scaling transformation. It is the return
* type of Scaling(Scalar), and most of the time this is the only way it

View File

@ -18,8 +18,8 @@ namespace Eigen {
*
* \brief Represents a translation transformation
*
* \param _Scalar the scalar type, i.e., the type of the coefficients.
* \param _Dim the dimension of the space, can be a compile time value or Dynamic
* \tparam _Scalar the scalar type, i.e., the type of the coefficients.
* \tparam _Dim the dimension of the space, can be a compile time value or Dynamic
*
* \note This class is not aimed to be used to store a translation transformation,
* but rather to make easier the constructions and updates of Transform objects.

View File

@ -21,7 +21,7 @@ namespace Eigen {
* References : C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with
* Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999
*
* \tparam _MatrixType The type of the sparse matrix. It is advised to give a row-oriented sparse matrix
* \tparam Scalar the scalar type of the input matrices
* \tparam _UpLo The triangular part that will be used for the computations. It can be Lower
* or Upper. Default is Lower.
* \tparam _OrderingType The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<int>,

View File

@ -29,7 +29,7 @@ template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
*
* \brief LU decomposition of a matrix with complete pivoting, and related features
*
* \param MatrixType the type of the matrix of which we are computing the LU decomposition
* \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
*
* This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is
* decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is

View File

@ -34,7 +34,7 @@ template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
*
* \brief LU decomposition of a matrix with partial pivoting, and related features
*
* \param MatrixType the type of the matrix of which we are computing the LU decomposition
* \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
*
* This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A
* is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P

View File

@ -84,8 +84,11 @@ StorageIndex cs_tdfs(StorageIndex j, StorageIndex k, StorageIndex *head, const S
/** \internal
* \ingroup OrderingMethods_Module
* Approximate minimum degree ordering algorithm.
* \returns the permutation P reducing the fill-in of the input matrix \a C
* The input matrix \a C must be a selfadjoint compressed column major SparseMatrix object. Both the upper and lower parts have to be stored, but the diagonal entries are optional.
*
* \param[in] C the input selfadjoint matrix stored in compressed column major format.
* \param[out] perm the permutation P reducing the fill-in of the input matrix \a C
*
* Note that the input matrix \a C must be complete, that is both the upper and lower parts have to be stored, as well as the diagonal entries.
* On exit the values of C are destroyed */
template<typename Scalar, typename StorageIndex>
void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm)

View File

@ -516,7 +516,7 @@ static IndexType init_rows_cols /* returns true if OK, or false otherwise */
Col [col].start = p [col] ;
Col [col].length = p [col+1] - p [col] ;
if (Col [col].length < 0)
if ((Col [col].length) < 0) // extra parentheses to work-around gcc bug 10200
{
/* column pointers must be non-decreasing */
stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;

View File

@ -19,20 +19,21 @@ namespace internal {
/** \internal
* \ingroup OrderingMethods_Module
* \returns the symmetric pattern A^T+A from the input matrix A.
* \param[in] A the input non-symmetric matrix
* \param[out] symmat the symmetric pattern A^T+A from the input matrix \a A.
* FIXME: The values should not be considered here
*/
template<typename MatrixType>
void ordering_helper_at_plus_a(const MatrixType& mat, MatrixType& symmat)
void ordering_helper_at_plus_a(const MatrixType& A, MatrixType& symmat)
{
MatrixType C;
C = mat.transpose(); // NOTE: Could be costly
C = A.transpose(); // NOTE: Could be costly
for (int i = 0; i < C.rows(); i++)
{
for (typename MatrixType::InnerIterator it(C, i); it; ++it)
it.valueRef() = 0.0;
}
symmat = C + mat;
symmat = C + A;
}
}

View File

@ -184,7 +184,7 @@ class PastixBase : public SparseSolverBase<Derived>
* The statistics related to the different phases of factorization and solve are saved here as well
* \sa analyzePattern() factorize()
*/
Array<RealScalar,IPARM_SIZE,1>& dparm()
Array<double,DPARM_SIZE,1>& dparm()
{
return m_dparm;
}
@ -244,8 +244,8 @@ class PastixBase : public SparseSolverBase<Derived>
mutable ComputationInfo m_info;
mutable pastix_data_t *m_pastixdata; // Data structure for pastix
mutable int m_comm; // The MPI communicator identifier
mutable Matrix<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
mutable Matrix<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters
mutable Array<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
mutable Array<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters
mutable Matrix<StorageIndex,Dynamic,1> m_perm; // Permutation vector
mutable Matrix<StorageIndex,Dynamic,1> m_invp; // Inverse permutation vector
mutable int m_size; // Size of the matrix
@ -268,7 +268,7 @@ void PastixBase<Derived>::init()
0, 0, 0, 1, m_iparm.data(), m_dparm.data());
m_iparm[IPARM_MATRIX_VERIFICATION] = API_NO;
m_iparm[IPARM_VERBOSE] = 2;
m_iparm[IPARM_VERBOSE] = API_VERBOSE_NOT;
m_iparm[IPARM_ORDERING] = API_ORDER_SCOTCH;
m_iparm[IPARM_INCOMPLETE] = API_NO;
m_iparm[IPARM_OOC_LIMIT] = 2000;
@ -581,6 +581,7 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
{
out.resize(matrix.rows(), matrix.cols());
// Pastix supports only lower, column-major matrices
out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
internal::c_to_fortran_numbering(out);
@ -666,6 +667,7 @@ class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
{
// Pastix supports only lower, column-major matrices
out.resize(matrix.rows(), matrix.cols());
out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
internal::c_to_fortran_numbering(out);
}

View File

@ -28,7 +28,7 @@ template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >
*
* \brief Householder rank-revealing QR decomposition of a matrix with column-pivoting
*
* \param MatrixType the type of the matrix of which we are computing the QR decomposition
* \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
*
* This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b Q and \b R
* such that

View File

@ -37,7 +37,7 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
*
* \brief Householder rank-revealing QR decomposition of a matrix with full pivoting
*
* \param MatrixType the type of the matrix of which we are computing the QR decomposition
* \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
*
* This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b P', \b Q and \b R
* such that

View File

@ -21,7 +21,7 @@ namespace Eigen {
*
* \brief Householder QR decomposition of a matrix
*
* \param MatrixType the type of the matrix of which we are computing the QR decomposition
* \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
*
* This class performs a QR decomposition of a matrix \b A into matrices \b Q and \b R
* such that

View File

@ -47,9 +47,8 @@ struct traits<BDCSVD<_MatrixType> >
*
* \brief class Bidiagonal Divide and Conquer SVD
*
* \param MatrixType the type of the matrix of which we are computing the SVD decomposition
* We plan to have a very similar interface to JacobiSVD on this class.
* It should be used to speed up the calcul of SVD for big matrices.
* \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
*
*/
template<typename _MatrixType>
class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >

View File

@ -449,8 +449,8 @@ struct traits<JacobiSVD<_MatrixType,QRPreconditioner> >
*
* \brief Two-sided Jacobi SVD decomposition of a rectangular matrix
*
* \param MatrixType the type of the matrix of which we are computing the SVD decomposition
* \param QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally
* \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
* \tparam QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally
* for the R-SVD step for non-square matrices. See discussion of possible values below.
*
* SVD decomposition consists in decomposing any n-by-p matrix \a A as a product

View File

@ -39,18 +39,16 @@ namespace internal {
} // end namespace internal
/** \ingroup SparseCholesky_Module
* \brief A direct sparse Cholesky factorizations
* \brief A base class for direct sparse Cholesky factorizations
*
* These classes provide LL^T and LDL^T Cholesky factorizations of sparse matrices that are
* selfadjoint and positive definite. The factorization allows for solving A.X = B where
* This is a base class for LL^T and LDL^T Cholesky factorizations of sparse matrices that are
* selfadjoint and positive definite. These factorizations allow for solving A.X = B where
* X and B can be either dense or sparse.
*
* In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
* such that the factorized matrix is P A P^-1.
*
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
* or Upper. Default is Lower.
* \tparam Derived the type of the derived class, that is the actual factorization type.
*
*/
template<typename Derived>

View File

@ -22,6 +22,16 @@ struct traits<SparseCompressedBase<Derived> > : traits<Derived>
} // end namespace internal
/** \ingroup SparseCore_Module
* \class SparseCompressedBase
* \brief Common base class for sparse [compressed]-{row|column}-storage format.
*
* This class defines the common interface for all derived classes implementing the compressed sparse storage format, such as:
* - SparseMatrix
* - Ref<SparseMatrixType,Options>
* - Map<SparseMatrixType>
*
*/
template<typename Derived>
class SparseCompressedBase
: public SparseMatrixBase<Derived>

View File

@ -37,11 +37,15 @@ struct traits<Map<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, St
};
} // end namespace internal
template<typename Derived,
int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
> class SparseMapBase;
/** \ingroup SparseCore_Module
* class SparseMapBase
* \brief Common base class for Map and Ref instance of sparse matrix and vector.
*/
template<typename Derived>
class SparseMapBase<Derived,ReadOnlyAccessors>
: public SparseCompressedBase<Derived>
@ -71,22 +75,33 @@ class SparseMapBase<Derived,ReadOnlyAccessors>
public:
/** \copydoc SparseMatrixBase::rows() */
inline Index rows() const { return IsRowMajor ? m_outerSize : m_innerSize; }
/** \copydoc SparseMatrixBase::cols() */
inline Index cols() const { return IsRowMajor ? m_innerSize : m_outerSize; }
/** \copydoc SparseMatrixBase::innerSize() */
inline Index innerSize() const { return m_innerSize; }
/** \copydoc SparseMatrixBase::outerSize() */
inline Index outerSize() const { return m_outerSize; }
/** \copydoc SparseCompressedBase::nonZeros */
inline Index nonZeros() const { return m_zero_nnz[1]; }
/** \copydoc SparseCompressedBase::isCompressed */
bool isCompressed() const { return m_innerNonZeros==0; }
//----------------------------------------
// direct access interface
/** \copydoc SparseMatrix::valuePtr */
inline const Scalar* valuePtr() const { return m_values; }
/** \copydoc SparseMatrix::innerIndexPtr */
inline const StorageIndex* innerIndexPtr() const { return m_innerIndices; }
/** \copydoc SparseMatrix::outerIndexPtr */
inline const StorageIndex* outerIndexPtr() const { return m_outerIndex; }
/** \copydoc SparseMatrix::innerNonZeroPtr */
inline const StorageIndex* innerNonZeroPtr() const { return m_innerNonZeros; }
//----------------------------------------
/** \copydoc SparseMatrix::coeff */
inline Scalar coeff(Index row, Index col) const
{
const Index outer = IsRowMajor ? row : col;
@ -125,6 +140,10 @@ class SparseMapBase<Derived,ReadOnlyAccessors>
inline SparseMapBase() {}
};
/** \ingroup SparseCore_Module
* class SparseMapBase
* \brief Common base class for writable Map and Ref instance of sparse matrix and vector.
*/
template<typename Derived>
class SparseMapBase<Derived,WriteAccessors>
: public SparseMapBase<Derived,ReadOnlyAccessors>
@ -185,9 +204,23 @@ class SparseMapBase<Derived,WriteAccessors>
inline SparseMapBase() {}
};
/** \ingroup SparseCore_Module
*
* \brief Specialization of class Map for SparseMatrix-like storage.
*
* \tparam SparseMatrixType the equivalent sparse matrix type of the referenced data, it must be a template instance of class SparseMatrix.
*
* \sa class Map, class SparseMatrix, class Ref<SparseMatrixType,Options>
*/
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>
class Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType>
: public SparseMapBase<Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >
#else
template<typename SparseMatrixType>
class Map<SparseMatrixType>
: public SparseMapBase<Derived,WriteAccessors>
#endif
{
public:
typedef SparseMapBase<Map> Base;
@ -196,6 +229,12 @@ class Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType>
public:
/** Constructs a read-write Map to a sparse matrix of size \a rows x \a cols, containing \a nnz non-zero coefficients,
* stored as a sparse format as defined by the pointers \a outerIndexPtr, \a innerIndexPtr, and \a valuePtr.
* If the optional parameter \a innerNonZerosPtr is the null pointer, then a standard compressed format is assumed.
*
* More details on the expected storage schemes are given in the \ref TutorialSparse "manual pages".
*/
inline Map(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr,
StorageIndex* innerIndexPtr, Scalar* valuePtr, StorageIndex* innerNonZerosPtr = 0)
: Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr)

View File

@ -108,20 +108,25 @@ protected:
/**
* \ingroup Sparse_Module
* \ingroup SparseCore_Module
*
* \brief A sparse matrix expression referencing an existing sparse expression
*
* \tparam PlainObjectType the equivalent sparse matrix type of the referenced data
* \tparam SparseMatrixType the equivalent sparse matrix type of the referenced data, it must be a template instance of class SparseMatrix.
* \tparam Options specifies whether the a standard compressed format is required \c Options is \c #StandardCompressedFormat, or \c 0.
* The default is \c 0.
* \tparam StrideType Only used for dense Ref
*
* \sa class Ref
*/
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>
class Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType >
: public internal::SparseRefBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType > >
#else
template<typename SparseMatrixType, int Options>
class Ref<SparseMatrixType, Options>
: public SparseMapBase<Derived,WriteAccessors> // yes, that's weird to use Derived here, but that works!
#endif
{
typedef SparseMatrix<MatScalar,MatOptions,MatIndex> PlainObjectType;
typedef internal::traits<Ref> Traits;
@ -155,6 +160,7 @@ class Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType >
template<typename Derived>
inline Ref(const SparseCompressedBase<Derived>& expr)
#else
/** Implicit constructor from any sparse expression (2D matrix or 1D vector) */
template<typename Derived>
inline Ref(SparseCompressedBase<Derived>& expr)
#endif
@ -225,19 +231,23 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
/**
* \ingroup Sparse_Module
* \ingroup SparseCore_Module
*
* \brief A sparse vector expression referencing an existing sparse vector expression
*
* \tparam PlainObjectType the equivalent sparse matrix type of the referenced data
* \tparam Options Not used for SparseVector.
* \tparam StrideType Only used for dense Ref
* \tparam SparseVectorType the equivalent sparse vector type of the referenced data, it must be a template instance of class SparseVector.
*
* \sa class Ref
*/
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>
class Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType >
: public internal::SparseRefBase<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType > >
#else
template<typename SparseVectorType>
class Ref<SparseVectorType>
: public SparseMapBase<Derived,WriteAccessors>
#endif
{
typedef SparseVector<MatScalar,MatOptions,MatIndex> PlainObjectType;
typedef internal::traits<Ref> Traits;
@ -259,6 +269,7 @@ class Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType >
template<typename Derived>
inline Ref(const SparseCompressedBase<Derived>& expr)
#else
/** Implicit constructor from any 1D sparse vector expression */
template<typename Derived>
inline Ref(SparseCompressedBase<Derived>& expr)
#endif

View File

@ -211,8 +211,6 @@ struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >
{
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef SparseSelfAdjointShape Shape;
static const int AssumeAliasing = 0;
};
struct SparseSelfAdjoint2Sparse {};

View File

@ -14,22 +14,21 @@
namespace Eigen {
namespace internal {
/**
* \brief Performs numeric block updates from a given supernode to a single column
*
* \param segsize Size of the segment (and blocks ) to use for updates
* \param[in,out] dense Packed values of the original matrix
* \param tempv temporary vector to use for updates
* \param lusup array containing the supernodes
* \param lda Leading dimension in the supernode
* \param nrow Number of rows in the rectangular part of the supernode
* \param lsub compressed row subscripts of supernodes
* \param lptr pointer to the first column of the current supernode in lsub
* \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
* \return 0 on success
*/
template <int SegSizeAtCompileTime> struct LU_kernel_bmod
{
/** \internal
* \brief Performs numeric block updates from a given supernode to a single column
*
* \param segsize Size of the segment (and blocks ) to use for updates
* \param[in,out] dense Packed values of the original matrix
* \param tempv temporary vector to use for updates
* \param lusup array containing the supernodes
* \param lda Leading dimension in the supernode
* \param nrow Number of rows in the rectangular part of the supernode
* \param lsub compressed row subscripts of supernodes
* \param lptr pointer to the first column of the current supernode in lsub
* \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
*/
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);

View File

@ -691,7 +691,6 @@ struct evaluator_traits<SparseQRMatrixQReturnType<SparseQRType> >
typedef typename SparseQRType::MatrixType MatrixType;
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef SparseShape Shape;
static const int AssumeAliasing = 0;
};
template< typename DstXprType, typename SparseQRType>

View File

@ -32,17 +32,19 @@ A << 1, 2, 3, // Initialize A. The elements can also be
B << A, A, A; // B is three horizontally stacked A's.
A.fill(10); // Fill A with all 10's.
// Eigen // Matlab
MatrixXd::Identity(rows,cols) // eye(rows,cols)
C.setIdentity(rows,cols) // C = eye(rows,cols)
MatrixXd::Zero(rows,cols) // zeros(rows,cols)
C.setZero(rows,cols) // C = ones(rows,cols)
MatrixXd::Ones(rows,cols) // ones(rows,cols)
C.setOnes(rows,cols) // C = ones(rows,cols)
MatrixXd::Random(rows,cols) // rand(rows,cols)*2-1 // MatrixXd::Random returns uniform random numbers in (-1, 1).
C.setRandom(rows,cols) // C = rand(rows,cols)*2-1
VectorXd::LinSpaced(size,low,high) // linspace(low,high,size)'
v.setLinSpaced(size,low,high) // v = linspace(low,high,size)'
// Eigen // Matlab
MatrixXd::Identity(rows,cols) // eye(rows,cols)
C.setIdentity(rows,cols) // C = eye(rows,cols)
MatrixXd::Zero(rows,cols) // zeros(rows,cols)
C.setZero(rows,cols) // C = ones(rows,cols)
MatrixXd::Ones(rows,cols) // ones(rows,cols)
C.setOnes(rows,cols) // C = ones(rows,cols)
MatrixXd::Random(rows,cols) // rand(rows,cols)*2-1 // MatrixXd::Random returns uniform random numbers in (-1, 1).
C.setRandom(rows,cols) // C = rand(rows,cols)*2-1
VectorXd::LinSpaced(size,low,high) // linspace(low,high,size)'
v.setLinSpaced(size,low,high) // v = linspace(low,high,size)'
VectorXi::LinSpaced(((hi-low)/step)+1, // low:step:hi
low,low+step*(size-1))
// Matrix slicing and blocks. All expressions listed here are read/write.
@ -85,13 +87,15 @@ P.bottomRightCorner<rows,cols>() // P(end-rows+1:end, end-cols+1:end)
R.row(i) = P.col(j); // R(i, :) = P(:, i)
R.col(j1).swap(mat1.col(j2)); // R(:, [j1 j2]) = R(:, [j2, j1])
// Views, transpose, etc; all read-write except for .adjoint().
// Views, transpose, etc;
// Eigen // Matlab
R.adjoint() // R'
R.transpose() // R.' or conj(R')
R.diagonal() // diag(R)
R.transpose() // R.' or conj(R') // Read-write
R.diagonal() // diag(R) // Read-write
x.asDiagonal() // diag(x)
R.transpose().colwise().reverse(); // rot90(R)
R.transpose().colwise().reverse() // rot90(R) // Read-write
R.replicate(i,j) // repmat(P,i,j)
// All the same as Matlab, but matlab doesn't have *= style operators.
// Matrix-vector. Matrix-matrix. Matrix-scalar.
@ -103,37 +107,39 @@ a *= M; R = P + Q; R = P/s;
R -= Q; R /= s;
// Vectorized operations on each element independently
// Eigen // Matlab
R = P.cwiseProduct(Q); // R = P .* Q
R = P.array() * s.array();// R = P .* s
R = P.cwiseQuotient(Q); // R = P ./ Q
R = P.array() / Q.array();// R = P ./ Q
R = P.array() + s.array();// R = P + s
R = P.array() - s.array();// R = P - s
R.array() += s; // R = R + s
R.array() -= s; // R = R - s
R.array() < Q.array(); // R < Q
R.array() <= Q.array(); // R <= Q
R.cwiseInverse(); // 1 ./ P
R.array().inverse(); // 1 ./ P
R.array().sin() // sin(P)
R.array().cos() // cos(P)
R.array().pow(s) // P .^ s
R.array().square() // P .^ 2
R.array().cube() // P .^ 3
R.cwiseSqrt() // sqrt(P)
R.array().sqrt() // sqrt(P)
R.array().exp() // exp(P)
R.array().log() // log(P)
R.cwiseMax(P) // max(R, P)
R.array().max(P.array()) // max(R, P)
R.cwiseMin(P) // min(R, P)
R.array().min(P.array()) // min(R, P)
R.cwiseAbs() // abs(P)
R.array().abs() // abs(P)
R.cwiseAbs2() // abs(P.^2)
R.array().abs2() // abs(P.^2)
(R.array() < s).select(P,Q); // (R < s ? P : Q)
// Eigen // Matlab
R = P.cwiseProduct(Q); // R = P .* Q
R = P.array() * s.array(); // R = P .* s
R = P.cwiseQuotient(Q); // R = P ./ Q
R = P.array() / Q.array(); // R = P ./ Q
R = P.array() + s.array(); // R = P + s
R = P.array() - s.array(); // R = P - s
R.array() += s; // R = R + s
R.array() -= s; // R = R - s
R.array() < Q.array(); // R < Q
R.array() <= Q.array(); // R <= Q
R.cwiseInverse(); // 1 ./ P
R.array().inverse(); // 1 ./ P
R.array().sin() // sin(P)
R.array().cos() // cos(P)
R.array().pow(s) // P .^ s
R.array().square() // P .^ 2
R.array().cube() // P .^ 3
R.cwiseSqrt() // sqrt(P)
R.array().sqrt() // sqrt(P)
R.array().exp() // exp(P)
R.array().log() // log(P)
R.cwiseMax(P) // max(R, P)
R.array().max(P.array()) // max(R, P)
R.cwiseMin(P) // min(R, P)
R.array().min(P.array()) // min(R, P)
R.cwiseAbs() // abs(P)
R.array().abs() // abs(P)
R.cwiseAbs2() // abs(P.^2)
R.array().abs2() // abs(P.^2)
(R.array() < s).select(P,Q ); // (R < s ? P : Q)
R = (Q.array()==0).select(P,A) // R(Q==0) = P(Q==0)
// Reductions.
int r, c;
@ -164,12 +170,12 @@ x.dot(y) // dot(x, y)
x.cross(y) // cross(x, y) Requires #include <Eigen/Geometry>
//// Type conversion
// Eigen // Matlab
A.cast<double>(); // double(A)
A.cast<float>(); // single(A)
A.cast<int>(); // int32(A)
A.real(); // real(A)
A.imag(); // imag(A)
// Eigen // Matlab
A.cast<double>(); // double(A)
A.cast<float>(); // single(A)
A.cast<int>(); // int32(A)
A.real(); // real(A)
A.imag(); // imag(A)
// if the original type equals destination type, no work is done
// Note that for most operations Eigen requires all operands to have the same type:

View File

@ -42,6 +42,15 @@ template<> struct adjoint_specific<false> {
VERIFY_IS_APPROX(v1, v1.norm() * v3);
VERIFY_IS_APPROX(v3, v1.normalized());
VERIFY_IS_APPROX(v3.norm(), RealScalar(1));
// check null inputs
VERIFY_IS_APPROX((v1*0).normalized(), (v1*0));
RealScalar very_small = (std::numeric_limits<RealScalar>::min)();
VERIFY( (v1*very_small).norm() == 0 );
VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small));
v3 = v1*very_small;
v3.normalize();
VERIFY_IS_APPROX(v3, (v1*very_small));
// check compatibility of dot and adjoint
ref = NumTraits<Scalar>::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm()));

View File

@ -15,16 +15,18 @@
template<typename T, typename I> void test_incomplete_cholesky_T()
{
typedef SparseMatrix<T,0,I> SparseMatrixType;
ConjugateGradient<SparseMatrixType, Lower, IncompleteCholesky<T, Lower, AMDOrdering<I> > > cg_illt_lower_amd;
ConjugateGradient<SparseMatrixType, Lower, IncompleteCholesky<T, Lower, NaturalOrdering<I> > > cg_illt_lower_nat;
ConjugateGradient<SparseMatrixType, Upper, IncompleteCholesky<T, Upper, AMDOrdering<I> > > cg_illt_upper_amd;
ConjugateGradient<SparseMatrixType, Upper, IncompleteCholesky<T, Upper, NaturalOrdering<I> > > cg_illt_upper_nat;
ConjugateGradient<SparseMatrixType, Lower, IncompleteCholesky<T, Lower, AMDOrdering<I> > > cg_illt_lower_amd;
ConjugateGradient<SparseMatrixType, Lower, IncompleteCholesky<T, Lower, NaturalOrdering<I> > > cg_illt_lower_nat;
ConjugateGradient<SparseMatrixType, Upper, IncompleteCholesky<T, Upper, AMDOrdering<I> > > cg_illt_upper_amd;
ConjugateGradient<SparseMatrixType, Upper, IncompleteCholesky<T, Upper, NaturalOrdering<I> > > cg_illt_upper_nat;
ConjugateGradient<SparseMatrixType, Upper|Lower, IncompleteCholesky<T, Lower, AMDOrdering<I> > > cg_illt_uplo_amd;
CALL_SUBTEST( check_sparse_spd_solving(cg_illt_lower_amd) );
CALL_SUBTEST( check_sparse_spd_solving(cg_illt_lower_nat) );
CALL_SUBTEST( check_sparse_spd_solving(cg_illt_upper_amd) );
CALL_SUBTEST( check_sparse_spd_solving(cg_illt_upper_nat) );
CALL_SUBTEST( check_sparse_spd_solving(cg_illt_uplo_amd) );
}
void test_incomplete_cholesky()

View File

@ -27,6 +27,14 @@ template<typename T> void test_pastix_T()
check_sparse_spd_solving(pastix_llt_upper);
check_sparse_spd_solving(pastix_ldlt_upper);
check_sparse_square_solving(pastix_lu);
// Some compilation check:
pastix_llt_lower.iparm();
pastix_llt_lower.dparm();
pastix_ldlt_lower.iparm();
pastix_ldlt_lower.dparm();
pastix_lu.iparm();
pastix_lu.dparm();
}
// There is no support for selfadjoint matrices with PaStiX.

View File

@ -145,14 +145,31 @@ template<typename MatrixType> void product(const MatrixType& m)
VERIFY_IS_APPROX(res.col(r).noalias() = square * square.col(r), (square * square.col(r)).eval());
// inner product
Scalar x = square2.row(c) * square2.col(c2);
VERIFY_IS_APPROX(x, square2.row(c).transpose().cwiseProduct(square2.col(c2)).sum());
{
Scalar x = square2.row(c) * square2.col(c2);
VERIFY_IS_APPROX(x, square2.row(c).transpose().cwiseProduct(square2.col(c2)).sum());
}
// outer product
VERIFY_IS_APPROX(m1.col(c) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
VERIFY_IS_APPROX(m1.row(r).transpose() * m1.col(c).transpose(), m1.block(r,0,1,cols).transpose() * m1.block(0,c,rows,1).transpose());
VERIFY_IS_APPROX(m1.block(0,c,rows,1) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
VERIFY_IS_APPROX(m1.col(c) * m1.block(r,0,1,cols), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
VERIFY_IS_APPROX(m1.leftCols(1) * m1.row(r), m1.block(0,0,rows,1) * m1.block(r,0,1,cols));
VERIFY_IS_APPROX(m1.col(c) * m1.topRows(1), m1.block(0,c,rows,1) * m1.block(0,0,1,cols));
{
VERIFY_IS_APPROX(m1.col(c) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
VERIFY_IS_APPROX(m1.row(r).transpose() * m1.col(c).transpose(), m1.block(r,0,1,cols).transpose() * m1.block(0,c,rows,1).transpose());
VERIFY_IS_APPROX(m1.block(0,c,rows,1) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
VERIFY_IS_APPROX(m1.col(c) * m1.block(r,0,1,cols), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
VERIFY_IS_APPROX(m1.leftCols(1) * m1.row(r), m1.block(0,0,rows,1) * m1.block(r,0,1,cols));
VERIFY_IS_APPROX(m1.col(c) * m1.topRows(1), m1.block(0,c,rows,1) * m1.block(0,0,1,cols));
}
// Aliasing
{
ColVectorType x(cols); x.setRandom();
ColVectorType z(x);
ColVectorType y(cols); y.setZero();
ColSquareMatrixType A(cols,cols); A.setRandom();
// CwiseBinaryOp
VERIFY_IS_APPROX(x = y + A*x, A*z);
x = z;
// CwiseUnaryOp
VERIFY_IS_APPROX(x = Scalar(1.)*(A*x), A*z);
}
}

View File

@ -9,6 +9,27 @@
#include "product.h"
template<typename T>
void test_aliasing()
{
int rows = internal::random<int>(1,12);
int cols = internal::random<int>(1,12);
typedef Matrix<T,Dynamic,Dynamic> MatrixType;
typedef Matrix<T,Dynamic,1> VectorType;
VectorType x(cols); x.setRandom();
VectorType z(x);
VectorType y(rows); y.setZero();
MatrixType A(rows,cols); A.setRandom();
// CwiseBinaryOp
VERIFY_IS_APPROX(x = y + A*x, A*z); // OK because "y + A*x" is marked as "assume-aliasing"
x = z;
// CwiseUnaryOp
VERIFY_IS_APPROX(x = T(1.)*(A*x), A*z); // OK because 1*(A*x) is replaced by (1*A*x) which is a Product<> expression
x = z;
// VERIFY_IS_APPROX(x = y-A*x, -A*z); // Not OK in 3.3 because x is resized before A*x gets evaluated
x = z;
}
void test_product_large()
{
for(int i = 0; i < g_repeat; i++) {
@ -17,6 +38,8 @@ void test_product_large()
CALL_SUBTEST_3( product(MatrixXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
CALL_SUBTEST_4( product(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
CALL_SUBTEST_5( product(Matrix<float,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
CALL_SUBTEST_1( test_aliasing<float>() );
}
#if defined EIGEN_TEST_PART_6

View File

@ -43,10 +43,16 @@ template<typename MatrixType> void product_notemporary(const MatrixType& m)
r1 = internal::random<Index>(8,rows-r0);
VERIFY_EVALUATION_COUNT( m3 = (m1 * m2.adjoint()), 1);
VERIFY_EVALUATION_COUNT( m3 = (m1 * m2.adjoint()).transpose(), 1);
VERIFY_EVALUATION_COUNT( m3.noalias() = m1 * m2.adjoint(), 0);
VERIFY_EVALUATION_COUNT( m3 = s1 * (m1 * m2.transpose()), 1);
// VERIFY_EVALUATION_COUNT( m3 = m3 + s1 * (m1 * m2.transpose()), 1);
VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * (m1 * m2.transpose()), 0);
VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()), 1);
VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()).transpose(), 1);
VERIFY_EVALUATION_COUNT( m3.noalias() = m3 + m1 * m2.transpose(), 0);
VERIFY_EVALUATION_COUNT( m3.noalias() += m3 + m1 * m2.transpose(), 0);
VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 + m1 * m2.transpose(), 0);

View File

@ -88,6 +88,7 @@ typedef unsigned __int64 uint64_t;
#include "src/Tensor/TensorReductionCuda.h"
#include "src/Tensor/TensorArgMax.h"
#include "src/Tensor/TensorConcatenation.h"
#include "src/Tensor/TensorContractionMapper.h"
#include "src/Tensor/TensorContraction.h"
#include "src/Tensor/TensorContractionThreadPool.h"
#include "src/Tensor/TensorContractionCuda.h"

View File

@ -109,11 +109,9 @@ template<int n, typename x> struct get;
template<int n, typename a, typename... as> struct get<n, type_list<a, as...>> : get<n-1, type_list<as...>> {};
template<typename a, typename... as> struct get<0, type_list<a, as...>> { typedef a type; };
template<int n EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, as)> struct get<n, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(as)>> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); };
template<typename T, int n, T a, T... as> struct get<n, numeric_list<T, a, as...>> : get<n-1, numeric_list<T, as...>> {};
template<typename T, T a, T... as> struct get<0, numeric_list<T, a, as...>> { constexpr static T value = a; };
template<typename T, int n EIGEN_TPL_PP_SPEC_HACK_DEFC(T, as)> struct get<n, numeric_list<T EIGEN_TPL_PP_SPEC_HACK_USEC(as)>> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); };
/* always get type, regardless of dummy; good for parameter pack expansion */
@ -406,7 +404,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) {
template<typename Op, typename A, typename B, std::size_t N, int... n>
constexpr inline array<decltype(Op::run(A(), B())),N> h_array_zip(array<A, N> a, array<B, N> b, numeric_list<int, n...>)
{
return array<decltype(Op::run(A(), B())),N>{{ Op::run(array_get<n>(a), array_get<n>(b))... }};
return array<decltype(Op::run(A(), B())),N>{ Op::run(array_get<n>(a), array_get<n>(b))... };
}
template<typename Op, typename A, typename B, std::size_t N>
@ -434,7 +432,7 @@ constexpr inline auto array_zip_and_reduce(array<A, N> a, array<B, N> b) -> decl
template<typename Op, typename A, std::size_t N, int... n>
constexpr inline array<decltype(Op::run(A())),N> h_array_apply(array<A, N> a, numeric_list<int, n...>)
{
return array<decltype(Op::run(A())),N>{{ Op::run(array_get<n>(a))... }};
return array<decltype(Op::run(A())),N>{ Op::run(array_get<n>(a))... };
}
template<typename Op, typename A, std::size_t N>

View File

@ -132,13 +132,13 @@ template <typename T> class array<T, 0> {
return *static_cast<const T*>(NULL);
}
static EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE array() { }
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
array(std::initializer_list<T> l) {
EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) {
eigen_assert(l.size() == 0);
}
#endif

View File

@ -78,7 +78,8 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign),
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
Layout = Options_ & RowMajor ? RowMajor : ColMajor,
CoordAccess = true
CoordAccess = true,
RawAccess = true
};
static const int Options = Options_;
@ -118,7 +119,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
{
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
return coeff(array<Index, NumIndices>{firstIndex, secondIndex, otherIndices...});
}
#endif
@ -158,7 +159,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
{
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
return coeffRef(array<Index, NumIndices>{firstIndex, secondIndex, otherIndices...});
}
#endif
@ -198,7 +199,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
{
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
return this->operator()(array<Index, NumIndices>{firstIndex, secondIndex, otherIndices...});
}
#else
EIGEN_DEVICE_FUNC
@ -265,7 +266,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
{
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
return operator()(array<Index, NumIndices>{firstIndex, secondIndex, otherIndices...});
}
#else
EIGEN_DEVICE_FUNC
@ -341,7 +342,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions)
: m_storage(internal::array_prod(array<Index, NumIndices>{{firstDimension, otherDimensions...}}), array<Index, NumIndices>{{firstDimension, otherDimensions...}})
: m_storage(internal::array_prod(array<Index, NumIndices>{firstDimension, otherDimensions...}), array<Index, NumIndices>{firstDimension, otherDimensions...})
{
// The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
@ -426,7 +427,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
{
// The number of dimensions used to resize a tensor must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
resize(array<Index, NumIndices>{{firstDimension, otherDimensions...}});
resize(array<Index, NumIndices>{firstDimension, otherDimensions...});
}
#endif

View File

@ -89,6 +89,7 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
BlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
@ -134,7 +135,7 @@ struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<Xp
typedef Index Scalar;
typedef typename XprType::Nested Nested;
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
static const int Layout = XprTraits::Layout;
};
@ -210,6 +211,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
BlockAccess = false,
Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)

View File

@ -97,6 +97,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & TensorEvaluator<RightArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess,
};
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
@ -152,6 +153,8 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
return m_leftImpl.template packet<LoadMode>(index);
}
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); }
private:
TensorEvaluator<LeftArgType, Device> m_leftImpl;
TensorEvaluator<RightArgType, Device> m_rightImpl;

View File

@ -46,6 +46,21 @@ struct nested<TensorBroadcastingOp<Broadcast, XprType>, 1, typename eval<TensorB
typedef TensorBroadcastingOp<Broadcast, XprType> type;
};
template <typename Dims>
struct is_input_scalar {
static const bool value = false;
};
template <>
struct is_input_scalar<Sizes<> > {
static const bool value = true;
};
#ifndef EIGEN_EMULATE_CXX11_META_H
template <typename std::size_t... Indices>
struct is_input_scalar<Sizes<Indices...> > {
static const bool value = (Sizes<Indices...>::total_size == 1);
};
#endif
} // end namespace internal
@ -94,6 +109,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
@ -103,7 +119,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
// and store the result in a scalar. Instead one should reshape the scalar into a a N-D
// tensor with N >= 1 of 1 element first and then broadcast.
EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
const InputDimensions& input_dims = m_impl.dimensions();
const Broadcast& broadcast = op.broadcast();
for (int i = 0; i < NumDims; ++i) {
eigen_assert(input_dims[i] > 0);
@ -143,6 +159,10 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
{
if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
return m_impl.coeff(0);
}
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
return coeffColMajor(index);
} else {
@ -214,6 +234,10 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
{
if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
return internal::pset1<PacketReturnType>(m_impl.coeff(0));
}
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
return packetColMajor<LoadMode>(index);
} else {

View File

@ -145,6 +145,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
@ -304,6 +305,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)

View File

@ -125,6 +125,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
IsAligned = false,
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
@ -287,6 +288,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
IsAligned = false,
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device)

View File

@ -21,358 +21,6 @@ namespace Eigen {
*/
namespace internal {
enum {
Rhs = 0,
Lhs = 1,
};
/*
* Implementation of the Eigen blas_data_mapper class for tensors.
*/
template<typename Scalar, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
int packet_size, bool inner_dim_contiguous>
class SimpleTensorContractionMapper {
public:
EIGEN_DEVICE_FUNC
SimpleTensorContractionMapper(const Tensor& tensor,
const nocontract_t& nocontract_strides,
const nocontract_t& ij_strides,
const contract_t& contract_strides,
const contract_t& k_strides) :
m_tensor(tensor),
m_nocontract_strides(nocontract_strides),
m_ij_strides(ij_strides),
m_contract_strides(contract_strides),
m_k_strides(k_strides) { }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void prefetch(Index /*i*/) { }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar operator()(Index row) const {
// column major assumption
return operator()(row, 0);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const {
return m_tensor.coeff(computeIndex(row, col));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const {
const bool left = (side == Lhs);
Index nocontract_val = left ? row : col;
Index linidx = 0;
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
const Index idx = nocontract_val / m_ij_strides[i];
linidx += idx * m_nocontract_strides[i];
nocontract_val -= idx * m_ij_strides[i];
}
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
if (side == Lhs && inner_dim_contiguous) {
eigen_assert(m_nocontract_strides[0] == 1);
linidx += nocontract_val;
} else {
linidx += nocontract_val * m_nocontract_strides[0];
}
}
Index contract_val = left ? col : row;
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
const Index idx = contract_val / m_k_strides[i];
linidx += idx * m_contract_strides[i];
contract_val -= idx * m_k_strides[i];
}
if(array_size<contract_t>::value > 0) {
if (side == Rhs && inner_dim_contiguous) {
eigen_assert(m_contract_strides[0] == 1);
linidx += contract_val;
} else {
linidx += contract_val * m_contract_strides[0];
}
}
return linidx;
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE IndexPair<Index> computeIndexPair(Index row, Index col, const Index distance) const {
const bool left = (side == Lhs);
Index nocontract_val[2] = {left ? row : col, left ? row + distance : col};
Index linidx[2] = {0, 0};
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
const Index idx0 = nocontract_val[0] / m_ij_strides[i];
const Index idx1 = nocontract_val[1] / m_ij_strides[i];
linidx[0] += idx0 * m_nocontract_strides[i];
linidx[1] += idx1 * m_nocontract_strides[i];
nocontract_val[0] -= idx0 * m_ij_strides[i];
nocontract_val[1] -= idx1 * m_ij_strides[i];
}
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
if (side == Lhs && inner_dim_contiguous) {
eigen_assert(m_nocontract_strides[0] == 1);
linidx[0] += nocontract_val[0];
linidx[1] += nocontract_val[1];
} else {
linidx[0] += nocontract_val[0] * m_nocontract_strides[0];
linidx[1] += nocontract_val[1] * m_nocontract_strides[0];
}
}
Index contract_val[2] = {left ? col : row, left ? col : row + distance};
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
const Index idx0 = contract_val[0] / m_k_strides[i];
const Index idx1 = contract_val[1] / m_k_strides[i];
linidx[0] += idx0 * m_contract_strides[i];
linidx[1] += idx1 * m_contract_strides[i];
contract_val[0] -= idx0 * m_k_strides[i];
contract_val[1] -= idx1 * m_k_strides[i];
}
if (side == Rhs && inner_dim_contiguous) {
eigen_assert(m_contract_strides[0] == 1);
linidx[0] += contract_val[0];
linidx[1] += contract_val[1];
} else {
linidx[0] += contract_val[0] * m_contract_strides[0];
linidx[1] += contract_val[1] * m_contract_strides[0];
}
return IndexPair<Index>(linidx[0], linidx[1]);
}
Index firstAligned(Index size) const {
return size;
}
Index stride() const {
return 1;
}
protected:
const Tensor m_tensor;
const nocontract_t m_nocontract_strides;
const nocontract_t m_ij_strides;
const contract_t m_contract_strides;
const contract_t m_k_strides;
};
template<typename Scalar, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
int packet_size, bool inner_dim_contiguous,
bool inner_dim_reordered, int Alignment>
class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous>
{
public:
typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous> ParentMapper;
EIGEN_DEVICE_FUNC
BaseTensorContractionMapper(const Tensor& tensor,
const nocontract_t& nocontract_strides,
const nocontract_t& ij_strides,
const contract_t& contract_strides,
const contract_t& k_strides) :
ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
// whole method makes column major assumption
// don't need to add offsets for now (because operator handles that)
// current code assumes packet size must be a multiple of 2
EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) {
const Index index = this->computeIndex(i, j);
eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1);
return this->m_tensor.template packet<Alignment>(index);
}
const IndexPair<Index> indexPair = this->computeIndexPair(i, j, packet_size - 1);
const Index first = indexPair.first;
const Index last = indexPair.second;
// We can always do optimized packet reads from left hand side right now, because
// the vertical matrix dimension on the left hand side is never contracting.
// On the right hand side we need to check if the contracting dimensions may have
// been shuffled first.
if (Tensor::PacketAccess &&
(side == Lhs || internal::array_size<contract_t>::value <= 1 || !inner_dim_reordered) &&
(last - first) == (packet_size - 1)) {
return this->m_tensor.template packet<Alignment>(first);
}
EIGEN_ALIGN_MAX Scalar data[packet_size];
data[0] = this->m_tensor.coeff(first);
for (Index k = 1; k < packet_size - 1; k += 2) {
const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1);
data[k] = this->m_tensor.coeff(internal_pair.first);
data[k + 1] = this->m_tensor.coeff(internal_pair.second);
}
data[packet_size - 1] = this->m_tensor.coeff(last);
return pload<Packet>(data);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
// whole method makes column major assumption
// don't need to add offsets for now (because operator handles that)
const Index half_packet_size = unpacket_traits<HalfPacket>::size;
if (half_packet_size == packet_size) {
return loadPacket(i, j);
}
EIGEN_ALIGN_MAX Scalar data[half_packet_size];
for (Index k = 0; k < half_packet_size; k++) {
data[k] = operator()(i + k, j);
}
return pload<HalfPacket>(data);
}
};
template<typename Scalar, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
bool inner_dim_contiguous,
bool inner_dim_reordered, int Alignment>
class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous>
{
public:
typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous> ParentMapper;
EIGEN_DEVICE_FUNC
BaseTensorContractionMapper(const Tensor& tensor,
const nocontract_t& nocontract_strides,
const nocontract_t& ij_strides,
const contract_t& contract_strides,
const contract_t& k_strides) :
ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
EIGEN_ALIGN_MAX Scalar data[1];
data[0] = this->m_tensor.coeff(this->computeIndex(i, j));
return pload<typename packet_traits<Scalar>::type>(data);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const {
return loadPacket(i, j);
}
};
template<typename Scalar, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
int packet_size,
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
class TensorContractionInputMapper;
template<typename Scalar, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
int packet_size,
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
class TensorContractionSubMapper {
public:
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
typedef TensorContractionInputMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper;
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self;
typedef Self LinearMapper;
EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)
: m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
return m_base_mapper(i + m_vert_offset, m_horiz_offset);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const {
return m_base_mapper(i + m_vert_offset, j + m_horiz_offset);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
return m_base_mapper.loadPacket(i + m_vert_offset, m_horiz_offset);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
return m_base_mapper.loadPacket(i + m_vert_offset, j + m_horiz_offset);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
return m_base_mapper.loadHalfPacket(i + m_vert_offset, m_horiz_offset);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const {
m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
}
template <typename PacketT, int AlignmentType>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const {
EIGEN_STATIC_ASSERT((internal::is_same<PacketT, Packet>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((AlignmentType == Aligned || Alignment == Unaligned), YOU_MADE_A_PROGRAMMING_MISTAKE);
return loadPacket(i);
}
template <typename Packet>
EIGEN_DEVICE_FUNC bool aligned(Index) const {
return false;
}
private:
const ParentMapper& m_base_mapper;
const Index m_vert_offset;
const Index m_horiz_offset;
};
template<typename Scalar, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
int packet_size,
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
class TensorContractionInputMapper
: public BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> {
public:
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base;
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
typedef SubMapper VectorMapper;
EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor,
const nocontract_t& nocontract_strides,
const nocontract_t& ij_strides,
const contract_t& contract_strides,
const contract_t& k_strides)
: Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const {
return SubMapper(*this, i, j);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
return VectorMapper(*this, i, j);
}
};
template<typename Dimensions, typename LhsXprType, typename RhsXprType>
struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >
{
@ -480,6 +128,7 @@ struct TensorContractionEvaluatorBase
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = true
};
// Most of the code is assuming that both input tensors are ColMajor. If the
@ -498,8 +147,6 @@ struct TensorContractionEvaluatorBase
static const int ContractDims = internal::array_size<Indices>::value;
static const int NumDims = max_n_1<LDims + RDims - 2 * ContractDims>::size;
typedef array<Index, LDims> left_dim_mapper_t;
typedef array<Index, RDims> right_dim_mapper_t;
typedef array<Index, ContractDims> contract_t;
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;
@ -546,8 +193,8 @@ struct TensorContractionEvaluatorBase
// We need to flip all the pairs of contracting indices as well as
// reversing the dimensions.
for (int i = 0; i < ContractDims; i++) {
eval_op_indices[i].first = LDims - 1 - op.indices()[i].second;
eval_op_indices[i].second = RDims - 1 - op.indices()[i].first;
eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second;
eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first;
}
}
@ -741,17 +388,19 @@ struct TensorContractionEvaluatorBase
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
const Index lhs_packet_size = internal::packet_traits<LhsScalar>::size;
const Index rhs_packet_size = internal::packet_traits<RhsScalar>::size;
const int lhs_alignment = LeftEvaluator::IsAligned ? Aligned : Unaligned;
const int rhs_alignment = RightEvaluator::IsAligned ? Aligned : Unaligned;
typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
LeftEvaluator, left_nocontract_t,
contract_t, lhs_packet_size,
lhs_inner_dim_contiguous,
false, Unaligned> LhsMapper;
false, lhs_alignment> LhsMapper;
typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,
RightEvaluator, right_nocontract_t,
contract_t, rhs_packet_size,
rhs_inner_dim_contiguous,
rhs_inner_dim_reordered, Unaligned> RhsMapper;
rhs_inner_dim_reordered, rhs_alignment> RhsMapper;
LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides,
m_left_contracting_strides, m_k_strides);
@ -784,11 +433,11 @@ struct TensorContractionEvaluatorBase
}
template<int LoadMode>
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {
return internal::ploadt<Packet, LoadMode>(m_result + index);
}
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { return m_result; }
protected:
// Prevent assignment
@ -853,9 +502,6 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
static const int ContractDims = internal::array_size<Indices>::value;
typedef array<Index, LDims> left_dim_mapper_t;
typedef array<Index, RDims> right_dim_mapper_t;
typedef array<Index, ContractDims> contract_t;
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;

View File

@ -1261,7 +1261,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
Base(op, device) {}
// We need to redefine this method to make nvcc happy
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
this->m_leftImpl.evalSubExprsIfNeeded(NULL);
this->m_rightImpl.evalSubExprsIfNeeded(NULL);
if (data) {
@ -1317,6 +1317,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
void evalTyped(Scalar* buffer) const {
// columns in left side, rows in right side
const Index k = this->m_k_size;
EIGEN_UNUSED_VARIABLE(k)
// rows in left side
const Index m = this->m_i_size;
@ -1361,7 +1362,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
const dim3 block_size(16, 16, 1);
LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel16x16<Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, this->m_device, lhs, rhs, output, m, n, k);
} else {
const Index m_blocks = (m + 127) / 128;
const Index m_blocks = (m + 127) / 128;
const Index n_blocks = (n + 63) / 64;
const dim3 num_blocks(m_blocks, n_blocks, 1);
const dim3 block_size(8, 32, 1);

View File

@ -0,0 +1,464 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
namespace Eigen {
namespace internal {
enum {
Rhs = 0,
Lhs = 1,
};
/*
* Implementation of the Eigen blas_data_mapper class for tensors.
*/
template <typename Tensor, bool HasRawAccess> struct CoeffLoader {
enum {
DirectOffsets = false
};
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_tensor(tensor) { }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index) {
eigen_assert(false && "unsupported");
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return m_tensor.coeff(index); }
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Tensor::PacketReturnType packet(typename Tensor::Index index) const
{
return m_tensor.template packet<LoadMode>(index);
}
private:
const Tensor m_tensor;
};
template <typename Tensor> struct CoeffLoader<Tensor, true> {
enum {
DirectOffsets = true
};
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_data(tensor.data()) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) {
m_data += offset;
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return loadConstant(m_data+index); }
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Tensor::PacketReturnType packet(typename Tensor::Index index) const
{
return internal::ploadt_ro<typename Tensor::PacketReturnType, LoadMode>(m_data + index);
}
private:
typedef typename Tensor::Scalar Scalar;
const Scalar* m_data;
};
template<typename Scalar, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
int packet_size, bool inner_dim_contiguous, int Alignment>
class SimpleTensorContractionMapper {
public:
EIGEN_DEVICE_FUNC
SimpleTensorContractionMapper(const Tensor& tensor,
const nocontract_t& nocontract_strides,
const nocontract_t& ij_strides,
const contract_t& contract_strides,
const contract_t& k_strides) :
m_tensor(tensor),
m_nocontract_strides(nocontract_strides),
m_ij_strides(ij_strides),
m_contract_strides(contract_strides),
m_k_strides(k_strides) { }
enum {
DirectOffsets = CoeffLoader<Tensor, Tensor::RawAccess>::DirectOffsets
};
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) {
m_tensor.offsetBuffer(offset);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void prefetch(Index /*i*/) { }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar operator()(Index row) const {
// column major assumption
return operator()(row, 0);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const {
return m_tensor.coeff(computeIndex(row, col));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const {
const bool left = (side == Lhs);
Index nocontract_val = left ? row : col;
Index linidx = 0;
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
const Index idx = nocontract_val / m_ij_strides[i];
linidx += idx * m_nocontract_strides[i];
nocontract_val -= idx * m_ij_strides[i];
}
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
if (side == Lhs && inner_dim_contiguous) {
eigen_assert(m_nocontract_strides[0] == 1);
linidx += nocontract_val;
} else {
linidx += nocontract_val * m_nocontract_strides[0];
}
}
Index contract_val = left ? col : row;
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
const Index idx = contract_val / m_k_strides[i];
linidx += idx * m_contract_strides[i];
contract_val -= idx * m_k_strides[i];
}
if(array_size<contract_t>::value > 0) {
if (side == Rhs && inner_dim_contiguous) {
eigen_assert(m_contract_strides[0] == 1);
linidx += contract_val;
} else {
linidx += contract_val * m_contract_strides[0];
}
}
return linidx;
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE IndexPair<Index> computeIndexPair(Index row, Index col, const Index distance) const {
const bool left = (side == Lhs);
Index nocontract_val[2] = {left ? row : col, left ? row + distance : col};
Index linidx[2] = {0, 0};
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
const Index idx0 = nocontract_val[0] / m_ij_strides[i];
const Index idx1 = nocontract_val[1] / m_ij_strides[i];
linidx[0] += idx0 * m_nocontract_strides[i];
linidx[1] += idx1 * m_nocontract_strides[i];
nocontract_val[0] -= idx0 * m_ij_strides[i];
nocontract_val[1] -= idx1 * m_ij_strides[i];
}
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
if (side == Lhs && inner_dim_contiguous) {
eigen_assert(m_nocontract_strides[0] == 1);
linidx[0] += nocontract_val[0];
linidx[1] += nocontract_val[1];
} else {
linidx[0] += nocontract_val[0] * m_nocontract_strides[0];
linidx[1] += nocontract_val[1] * m_nocontract_strides[0];
}
}
Index contract_val[2] = {left ? col : row, left ? col : row + distance};
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
const Index idx0 = contract_val[0] / m_k_strides[i];
const Index idx1 = contract_val[1] / m_k_strides[i];
linidx[0] += idx0 * m_contract_strides[i];
linidx[1] += idx1 * m_contract_strides[i];
contract_val[0] -= idx0 * m_k_strides[i];
contract_val[1] -= idx1 * m_k_strides[i];
}
if (side == Rhs && inner_dim_contiguous) {
eigen_assert(m_contract_strides[0] == 1);
linidx[0] += contract_val[0];
linidx[1] += contract_val[1];
} else {
linidx[0] += contract_val[0] * m_contract_strides[0];
linidx[1] += contract_val[1] * m_contract_strides[0];
}
return IndexPair<Index>(linidx[0], linidx[1]);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index firstAligned(Index size) const {
// Only claim alignment when we can compute the actual stride (ie when we're
// dealing with the lhs with inner_dim_contiguous. This is because the
// matrix-vector product relies on the stride when dealing with aligned inputs.
return (Alignment == Aligned) && (side == Lhs) && inner_dim_contiguous ? 0 : size;
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index stride() const {
return ((side == Lhs) && inner_dim_contiguous) ? m_contract_strides[0] : 1;
}
protected:
CoeffLoader<Tensor, Tensor::RawAccess> m_tensor;
const nocontract_t m_nocontract_strides;
const nocontract_t m_ij_strides;
const contract_t m_contract_strides;
const contract_t m_k_strides;
};
template<typename Scalar, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
int packet_size, bool inner_dim_contiguous,
bool inner_dim_reordered, int Alignment>
class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, Alignment>
{
public:
typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, Alignment> ParentMapper;
EIGEN_DEVICE_FUNC
BaseTensorContractionMapper(const Tensor& tensor,
const nocontract_t& nocontract_strides,
const nocontract_t& ij_strides,
const contract_t& contract_strides,
const contract_t& k_strides) :
ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
template <int AlignmentType = Alignment>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
// whole method makes column major assumption
// don't need to add offsets for now (because operator handles that)
// current code assumes packet size must be a multiple of 2
EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) {
const Index index = this->computeIndex(i, j);
eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1);
return this->m_tensor.template packet<AlignmentType>(index);
}
const IndexPair<Index> indexPair = this->computeIndexPair(i, j, packet_size - 1);
const Index first = indexPair.first;
const Index last = indexPair.second;
// We can always do optimized packet reads from left hand side right now, because
// the vertical matrix dimension on the left hand side is never contracting.
// On the right hand side we need to check if the contracting dimensions may have
// been shuffled first.
if (Tensor::PacketAccess &&
(side == Lhs || internal::array_size<contract_t>::value <= 1 || !inner_dim_reordered) &&
(last - first) == (packet_size - 1)) {
return this->m_tensor.template packet<AlignmentType>(first);
}
EIGEN_ALIGN_MAX Scalar data[packet_size];
data[0] = this->m_tensor.coeff(first);
for (Index k = 1; k < packet_size - 1; k += 2) {
const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1);
data[k] = this->m_tensor.coeff(internal_pair.first);
data[k + 1] = this->m_tensor.coeff(internal_pair.second);
}
data[packet_size - 1] = this->m_tensor.coeff(last);
return pload<Packet>(data);
}
template <int AlignmentType = Alignment>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
// whole method makes column major assumption
// don't need to add offsets for now (because operator handles that)
const Index half_packet_size = unpacket_traits<HalfPacket>::size;
if (half_packet_size == packet_size) {
return loadPacket<AlignmentType>(i, j);
}
EIGEN_ALIGN_MAX Scalar data[half_packet_size];
for (Index k = 0; k < half_packet_size; k++) {
data[k] = operator()(i + k, j);
}
return pload<HalfPacket>(data);
}
};
template<typename Scalar, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
bool inner_dim_contiguous,
bool inner_dim_reordered, int Alignment>
class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment>
{
public:
typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment> ParentMapper;
EIGEN_DEVICE_FUNC
BaseTensorContractionMapper(const Tensor& tensor,
const nocontract_t& nocontract_strides,
const nocontract_t& ij_strides,
const contract_t& contract_strides,
const contract_t& k_strides) :
ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
typedef typename packet_traits<Scalar>::type Packet;
template <int> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
EIGEN_ALIGN_MAX Scalar data[1];
data[0] = this->m_tensor.coeff(this->computeIndex(i, j));
return pload<typename packet_traits<Scalar>::type>(data);
}
template <int> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const {
return loadPacket(i, j);
}
};
template<typename Scalar, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
int packet_size,
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
class TensorContractionSubMapper {
public:
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper;
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self;
typedef Self LinearMapper;
enum {
// We can use direct offsets iff the parent mapper supports then and we can compute the strides.
// TODO: we should also enable direct offsets for the Rhs case.
UseDirectOffsets = (side == Lhs) && inner_dim_contiguous && ParentMapper::DirectOffsets
};
EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)
: m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) {
// Bake the offsets into the buffer used by the base mapper whenever possible. This avoids the need to recompute
// this offset every time we attempt to access a coefficient.
if (UseDirectOffsets) {
Index stride = m_base_mapper.stride();
m_base_mapper.offsetBuffer(vert_offset + horiz_offset * stride);
}
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
if (UseDirectOffsets) {
return m_base_mapper(i, 0);
}
return m_base_mapper(i + m_vert_offset, m_horiz_offset);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const {
if (UseDirectOffsets) {
return m_base_mapper(i, j);
}
return m_base_mapper(i + m_vert_offset, j + m_horiz_offset);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
if (UseDirectOffsets) {
return m_base_mapper.template loadPacket<Alignment>(i, 0);
}
return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, m_horiz_offset);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
if (UseDirectOffsets) {
return m_base_mapper.template loadPacket<Alignment>(i, j);
}
return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, j + m_horiz_offset);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
if (UseDirectOffsets) {
return m_base_mapper.template loadHalfPacket<Alignment>(i, 0);
}
return m_base_mapper.template loadHalfPacket<Alignment>(i + m_vert_offset, m_horiz_offset);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const {
if (UseDirectOffsets) {
m_base_mapper.storePacket(i, 0, p);
}
m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
if (UseDirectOffsets) {
return LinearMapper(m_base_mapper, i, j);
}
return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
}
template <typename PacketT, int AlignmentType>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const {
EIGEN_STATIC_ASSERT((internal::is_same<PacketT, Packet>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
const int ActualAlignment = (AlignmentType == Aligned) && (Alignment == Aligned) ? Aligned : Unaligned;
if (UseDirectOffsets) {
return m_base_mapper.template loadPacket<ActualAlignment>(i, 0);
}
return m_base_mapper.template loadPacket<ActualAlignment>(i + m_vert_offset, m_horiz_offset);
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool aligned(Index) const {
return false;
}
private:
ParentMapper m_base_mapper;
const Index m_vert_offset;
const Index m_horiz_offset;
};
template<typename Scalar, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
int packet_size,
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
class TensorContractionInputMapper
: public BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> {
public:
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base;
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
typedef SubMapper VectorMapper;
EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor,
const nocontract_t& nocontract_strides,
const nocontract_t& ij_strides,
const contract_t& contract_strides,
const contract_t& k_strides)
: Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const {
return SubMapper(*this, i, j);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
return VectorMapper(*this, i, j);
}
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H

View File

@ -162,6 +162,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess && internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)

View File

@ -306,6 +306,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & TensorEvaluator<KernelArgType, Device>::PacketAccess,
Layout = TensorEvaluator<InputArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
@ -752,6 +753,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
PacketAccess = false,
Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device)

View File

@ -95,6 +95,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
BlockAccess = false,
Layout = TensorEvaluator<XprType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device)
@ -250,6 +251,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
BlockAccess = false,
Layout = TensorEvaluator<LhsXprType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)

View File

@ -10,7 +10,6 @@
#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H)
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H
namespace Eigen {
// This defines an interface that GPUDevice can take to use
@ -206,20 +205,45 @@ struct GpuDevice {
#endif
}
inline int getNumCudaMultiProcessors() const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
#ifndef __CUDA_ARCH__
return stream_->deviceProperties().multiProcessorCount;
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return 0;
#endif
}
inline int maxCudaThreadsPerBlock() const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
#ifndef __CUDA_ARCH__
return stream_->deviceProperties().maxThreadsPerBlock;
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return 0;
#endif
}
inline int maxCudaThreadsPerMultiProcessor() const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
#ifndef __CUDA_ARCH__
return stream_->deviceProperties().maxThreadsPerMultiProcessor;
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return 0;
#endif
}
inline int sharedMemPerBlock() const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
#ifndef __CUDA_ARCH__
return stream_->deviceProperties().sharedMemPerBlock;
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return 0;
#endif
}
inline int majorDeviceVersion() const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
#ifndef __CUDA_ARCH__
return stream_->deviceProperties().major;
#else
eigen_assert(false && "The default device should be used instead to generate kernel code");
return 0;
#endif
}
// This function checks if the CUDA runtime recorded an error for the
@ -239,23 +263,29 @@ struct GpuDevice {
};
#ifndef __CUDA_ARCH__
#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \
(kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \
#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \
(kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \
assert(cudaGetLastError() == cudaSuccess);
#else
#define LAUNCH_CUDA_KERNEL(...) \
eigen_assert(false && "Cannot launch a kernel from another kernel");
#define LAUNCH_CUDA_KERNEL(kernel, ...) \
{ const auto __attribute__((__unused__)) __makeTheKernelInstantiate = &(kernel); } \
eigen_assert(false && "Cannot launch a kernel from another kernel" __CUDA_ARCH__);
#endif
// FIXME: Should be device and kernel specific.
#ifdef __CUDACC__
static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
static EIGEN_DEVICE_FUNC inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
#ifndef __CUDA_ARCH__
cudaError_t status = cudaDeviceSetSharedMemConfig(config);
EIGEN_UNUSED_VARIABLE(status)
assert(status == cudaSuccess);
#else
EIGEN_UNUSED_VARIABLE(config)
#endif
}
#endif
} // end namespace Eigen
#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H

View File

@ -110,14 +110,14 @@ struct Sizes : internal::numeric_list<std::ptrdiff_t, Indices...> {
return internal::arg_prod(Indices...);
}
Sizes() { }
EIGEN_DEVICE_FUNC Sizes() { }
template <typename DenseIndex>
explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
explicit EIGEN_DEVICE_FUNC Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
// todo: add assertion
}
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
template <typename... DenseIndex> Sizes(DenseIndex...) { }
explicit Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { }
explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
// todo: add assertion
}
#endif
@ -289,7 +289,7 @@ struct DSizes : array<DenseIndex, NumDims> {
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, IndexTypes... otherDimensions) {
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE)
(*this) = array<DenseIndex, NumDims>{{firstDimension, otherDimensions...}};
(*this) = array<DenseIndex, NumDims>{firstDimension, otherDimensions...};
}
#else
EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0) {
@ -405,20 +405,20 @@ template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::si
template <typename Dims1, typename Dims2, size_t n, size_t m>
struct sizes_match_below_dim {
static inline bool run(Dims1&, Dims2&) {
static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) {
return false;
}
};
template <typename Dims1, typename Dims2, size_t n>
struct sizes_match_below_dim<Dims1, Dims2, n, n> {
static inline bool run(Dims1& dims1, Dims2& dims2) {
static EIGEN_DEVICE_FUNC inline bool run(Dims1& dims1, Dims2& dims2) {
return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &
sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2);
}
};
template <typename Dims1, typename Dims2>
struct sizes_match_below_dim<Dims1, Dims2, 0, 0> {
static inline bool run(Dims1&, Dims2&) {
static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) {
return true;
}
};
@ -427,7 +427,7 @@ struct sizes_match_below_dim<Dims1, Dims2, 0, 0> {
template <typename Dims1, typename Dims2>
bool dimensions_match(Dims1& dims1, Dims2& dims2) {
EIGEN_DEVICE_FUNC bool dimensions_match(Dims1& dims1, Dims2& dims2) {
return internal::sizes_match_below_dim<Dims1, Dims2, internal::array_size<Dims1>::value, internal::array_size<Dims2>::value>::run(dims1, dims2);
}

View File

@ -98,6 +98,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
PacketAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = true
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
@ -140,7 +141,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
return internal::ploadt<Packet, LoadMode>(m_buffer + index);
}
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_buffer; }
private:
TensorEvaluator<ArgType, Device> m_impl;

View File

@ -43,6 +43,7 @@ struct TensorEvaluator
PacketAccess = Derived::PacketAccess,
Layout = Derived::Layout,
CoordAccess = NumCoords > 0,
RawAccess = true
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
@ -148,6 +149,7 @@ struct TensorEvaluator<const Derived, Device>
PacketAccess = Derived::PacketAccess,
Layout = Derived::Layout,
CoordAccess = NumCoords > 0,
RawAccess = true
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
@ -207,6 +209,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC
@ -257,6 +260,7 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess & internal::functor_traits<UnaryOp>::PacketAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
@ -312,6 +316,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
internal::functor_traits<BinaryOp>::PacketAccess,
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
@ -378,6 +383,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
internal::packet_traits<Scalar>::HasBlend,
Layout = TensorEvaluator<IfArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)

View File

@ -156,14 +156,14 @@ template <typename Expression>
class TensorExecutor<Expression, GpuDevice, false> {
public:
typedef typename Expression::Index Index;
EIGEN_DEVICE_FUNC static void run(const Expression& expr, const GpuDevice& device);
static EIGEN_DEVICE_FUNC void run(const Expression& expr, const GpuDevice& device);
};
template <typename Expression>
class TensorExecutor<Expression, GpuDevice, true> {
public:
typedef typename Expression::Index Index;
EIGEN_DEVICE_FUNC static void run(const Expression& expr, const GpuDevice& device);
static EIGEN_DEVICE_FUNC void run(const Expression& expr, const GpuDevice& device);
};
#if defined(__CUDACC__)
@ -215,7 +215,6 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
template <typename Expression>
EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression& expr, const GpuDevice& device)
{
#ifndef __CUDA_ARCH__
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign)
@ -228,9 +227,6 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(
LAUNCH_CUDA_KERNEL((EigenMetaKernel_NonVectorizable<TensorEvaluator<Expression, GpuDevice>, Index>), num_blocks, block_size, 0, device, evaluator, size);
}
evaluator.cleanup();
#else
eigen_assert(false && "Cannot launch a kernel from another kernel");
#endif
}
@ -238,7 +234,6 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(
template<typename Expression>
EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(const Expression& expr, const GpuDevice& device)
{
#ifndef __CUDA_ARCH__
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign)
@ -251,9 +246,6 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(c
LAUNCH_CUDA_KERNEL((EigenMetaKernel_Vectorizable<TensorEvaluator<Expression, GpuDevice>, Index>), num_blocks, block_size, 0, device, evaluator, size);
}
evaluator.cleanup();
#else
eigen_assert(false && "Cannot launch a kernel from another kernel");
#endif
}
#endif // __CUDACC__

View File

@ -135,6 +135,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
BlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) {

View File

@ -44,7 +44,8 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
Layout = Options_ & RowMajor ? RowMajor : ColMajor,
CoordAccess = true,
};
RawAccess = true
};
typedef Dimensions_ Dimensions;
static const std::size_t NumIndices = Dimensions::count;
@ -53,7 +54,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
TensorStorage<Scalar, Dimensions, Options> m_storage;
public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); }
@ -72,7 +73,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
{
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return coeff(array<Index, NumIndices>{{firstIndex, otherIndices...}});
return coeff(array<Index, NumIndices>{firstIndex, otherIndices...});
}
#endif
@ -104,7 +105,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
{
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return coeffRef(array<Index, NumIndices>{{firstIndex, otherIndices...}});
return coeffRef(array<Index, NumIndices>{firstIndex, otherIndices...});
}
#endif
@ -136,7 +137,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
{
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
return this->operator()(array<Index, NumIndices>{firstIndex, otherIndices...});
}
#endif
@ -175,7 +176,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
{
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
return operator()(array<Index, NumIndices>{firstIndex, otherIndices...});
}
#endif

View File

@ -92,6 +92,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
IsAligned = true,
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = true
};
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)

View File

@ -95,6 +95,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
BlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)

Some files were not shown because too many files have changed in this diff Show More