mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-30 17:40:05 +08:00
Update to latest default branch
This commit is contained in:
commit
9b6c72958a
@ -12,7 +12,6 @@
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
#include <complex.h>
|
||||
extern "C" {
|
||||
#include <pastix_nompi.h>
|
||||
#include <pastix.h>
|
||||
|
@ -28,8 +28,8 @@ namespace internal {
|
||||
*
|
||||
* \brief Robust Cholesky decomposition of a matrix with pivoting
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
|
||||
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
|
||||
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
|
||||
|
@ -22,8 +22,8 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
*
|
||||
* \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
|
||||
* \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
|
||||
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
|
||||
@ -436,10 +436,7 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
|
||||
*
|
||||
* \param bAndX represents both the right-hand side matrix b and result x.
|
||||
*
|
||||
* \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
|
||||
*
|
||||
* This version avoids a copy when the right hand side matrix b is not
|
||||
* needed anymore.
|
||||
* This version avoids a copy when the right hand side matrix b is not needed anymore.
|
||||
*
|
||||
* \sa LLT::solve(), MatrixBase::llt()
|
||||
*/
|
||||
|
@ -12,7 +12,16 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Array
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
typedef ArrayXpr XprKind;
|
||||
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Array
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief General-purpose arrays with easy API for coefficient-wise operations
|
||||
@ -26,21 +35,12 @@ namespace Eigen {
|
||||
*
|
||||
* See documentation of class Matrix for detailed information on the template parameters
|
||||
* storage layout.
|
||||
*
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
|
||||
*
|
||||
* \sa \ref TutorialArrayClass, \ref TopicClassHierarchy
|
||||
* \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
|
||||
*/
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
typedef ArrayXpr XprKind;
|
||||
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
|
||||
};
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
class Array
|
||||
: public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
|
@ -682,9 +682,9 @@ template< typename DstXprType, typename SrcXprType, typename Functor,
|
||||
struct Assignment;
|
||||
|
||||
|
||||
// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition.
|
||||
// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated.
|
||||
// So this intermediate function removes everything related to AssumeAliasing such that Assignment
|
||||
// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
|
||||
// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
|
||||
// So this intermediate function removes everything related to "assume-aliasing" such that Assignment
|
||||
// does not has to bother about these annoying details.
|
||||
|
||||
template<typename Dst, typename Src>
|
||||
@ -698,21 +698,21 @@ EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src)
|
||||
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||
}
|
||||
|
||||
// Deal with AssumeAliasing
|
||||
// Deal with "assume-aliasing"
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0)
|
||||
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
||||
{
|
||||
typename plain_matrix_type<Src>::type tmp(src);
|
||||
call_assignment_no_alias(dst, tmp, func);
|
||||
}
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0)
|
||||
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
||||
{
|
||||
call_assignment_no_alias(dst, src, func);
|
||||
}
|
||||
|
||||
// by-pass AssumeAliasing
|
||||
// by-pass "assume-aliasing"
|
||||
// When there is no aliasing, we require that 'dst' has been properly resized
|
||||
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
|
||||
|
@ -161,15 +161,15 @@ class BandMatrixBase : public EigenBase<Derived>
|
||||
*
|
||||
* \brief Represents a rectangular matrix with a banded storage
|
||||
*
|
||||
* \param _Scalar Numeric type, i.e. float, double, int
|
||||
* \param Rows Number of rows, or \b Dynamic
|
||||
* \param Cols Number of columns, or \b Dynamic
|
||||
* \param Supers Number of super diagonal
|
||||
* \param Subs Number of sub diagonal
|
||||
* \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
|
||||
* The former controls \ref TopicStorageOrders "storage order", and defaults to
|
||||
* column-major. The latter controls whether the matrix represents a selfadjoint
|
||||
* matrix in which case either Supers of Subs have to be null.
|
||||
* \tparam _Scalar Numeric type, i.e. float, double, int
|
||||
* \tparam _Rows Number of rows, or \b Dynamic
|
||||
* \tparam _Cols Number of columns, or \b Dynamic
|
||||
* \tparam _Supers Number of super diagonal
|
||||
* \tparam _Subs Number of sub diagonal
|
||||
* \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
|
||||
* The former controls \ref TopicStorageOrders "storage order", and defaults to
|
||||
* column-major. The latter controls whether the matrix represents a selfadjoint
|
||||
* matrix in which case either Supers of Subs have to be null.
|
||||
*
|
||||
* \sa class TridiagonalMatrix
|
||||
*/
|
||||
@ -302,9 +302,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
|
||||
*
|
||||
* \brief Represents a tridiagonal matrix with a compact banded storage
|
||||
*
|
||||
* \param _Scalar Numeric type, i.e. float, double, int
|
||||
* \param Size Number of rows and cols, or \b Dynamic
|
||||
* \param _Options Can be 0 or \b SelfAdjoint
|
||||
* \tparam Scalar Numeric type, i.e. float, double, int
|
||||
* \tparam Size Number of rows and cols, or \b Dynamic
|
||||
* \tparam Options Can be 0 or \b SelfAdjoint
|
||||
*
|
||||
* \sa class BandMatrix
|
||||
*/
|
||||
|
@ -13,41 +13,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Block
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a fixed-size or dynamic-size block
|
||||
*
|
||||
* \param XprType the type of the expression in which we are taking a block
|
||||
* \param BlockRows the number of rows of the block we are taking at compile time (optional)
|
||||
* \param BlockCols the number of columns of the block we are taking at compile time (optional)
|
||||
* \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or
|
||||
* to set of columns of a column major matrix (optional). The parameter allows to determine
|
||||
* at compile time whether aligned access is possible on the block expression.
|
||||
*
|
||||
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
|
||||
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
|
||||
* most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to directly maniputate block expressions,
|
||||
* for instance if you want to write a function returning such an expression, you
|
||||
* will need to use this class.
|
||||
*
|
||||
* Here is an example illustrating the dynamic case:
|
||||
* \include class_Block.cpp
|
||||
* Output: \verbinclude class_Block.out
|
||||
*
|
||||
* \note Even though this expression has dynamic size, in the case where \a XprType
|
||||
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
|
||||
* it does not cause a dynamic memory allocation.
|
||||
*
|
||||
* Here is an example illustrating the fixed-size case:
|
||||
* \include class_FixedBlock.cpp
|
||||
* Output: \verbinclude class_FixedBlock.out
|
||||
*
|
||||
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
|
||||
@ -101,6 +66,40 @@ template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool In
|
||||
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
|
||||
|
||||
/** \class Block
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a fixed-size or dynamic-size block
|
||||
*
|
||||
* \tparam XprType the type of the expression in which we are taking a block
|
||||
* \tparam BlockRows the number of rows of the block we are taking at compile time (optional)
|
||||
* \tparam BlockCols the number of columns of the block we are taking at compile time (optional)
|
||||
* \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or
|
||||
* to set of columns of a column major matrix (optional). The parameter allows to determine
|
||||
* at compile time whether aligned access is possible on the block expression.
|
||||
*
|
||||
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
|
||||
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
|
||||
* most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to directly maniputate block expressions,
|
||||
* for instance if you want to write a function returning such an expression, you
|
||||
* will need to use this class.
|
||||
*
|
||||
* Here is an example illustrating the dynamic case:
|
||||
* \include class_Block.cpp
|
||||
* Output: \verbinclude class_Block.out
|
||||
*
|
||||
* \note Even though this expression has dynamic size, in the case where \a XprType
|
||||
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
|
||||
* it does not cause a dynamic memory allocation.
|
||||
*
|
||||
* Here is an example illustrating the fixed-size case:
|
||||
* \include class_FixedBlock.cpp
|
||||
* Output: \verbinclude class_FixedBlock.out
|
||||
*
|
||||
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
|
||||
*/
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
|
||||
: public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
|
||||
{
|
||||
|
@ -63,10 +63,6 @@ struct evaluator_traits_base
|
||||
// by default, get evaluator kind and shape from storage
|
||||
typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
|
||||
typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
|
||||
|
||||
// 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
|
||||
// temporary; 0 if not.
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
// Default evaluator traits
|
||||
@ -75,6 +71,10 @@ struct evaluator_traits : public evaluator_traits_base<T>
|
||||
{
|
||||
};
|
||||
|
||||
template<typename T, typename Shape = typename evaluator_traits<T>::Shape >
|
||||
struct evaluator_assume_aliasing {
|
||||
static const bool value = false;
|
||||
};
|
||||
|
||||
// By default, we assume a unary expression:
|
||||
template<typename T>
|
||||
|
@ -13,26 +13,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class CwiseBinaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
|
||||
*
|
||||
* \param BinaryOp template functor implementing the operator
|
||||
* \param Lhs the type of the left-hand side
|
||||
* \param Rhs the type of the right-hand side
|
||||
*
|
||||
* This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
|
||||
* It is the return type of binary operators, by which we mean only those binary operators where
|
||||
* both the left-hand side and the right-hand side are Eigen expressions.
|
||||
* For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically don't have to name
|
||||
* CwiseBinaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs>
|
||||
struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
@ -74,6 +54,25 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
|
||||
class CwiseBinaryOpImpl;
|
||||
|
||||
/** \class CwiseBinaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
|
||||
*
|
||||
* \tparam BinaryOp template functor implementing the operator
|
||||
* \tparam LhsType the type of the left-hand side
|
||||
* \tparam RhsType the type of the right-hand side
|
||||
*
|
||||
* This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
|
||||
* It is the return type of binary operators, by which we mean only those binary operators where
|
||||
* both the left-hand side and the right-hand side are Eigen expressions.
|
||||
* For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically don't have to name
|
||||
* CwiseBinaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
template<typename BinaryOp, typename LhsType, typename RhsType>
|
||||
class CwiseBinaryOp :
|
||||
public CwiseBinaryOpImpl<
|
||||
|
@ -12,24 +12,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class CwiseNullaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression of a matrix where all coefficients are defined by a functor
|
||||
*
|
||||
* \param NullaryOp template functor implementing the operator
|
||||
* \param PlainObjectType the underlying plain matrix/array type
|
||||
*
|
||||
* This class represents an expression of a generic nullary operator.
|
||||
* It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to write a function returning such an expression, you
|
||||
* will need to use this class.
|
||||
*
|
||||
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename NullaryOp, typename PlainObjectType>
|
||||
struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
|
||||
@ -40,6 +22,23 @@ struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectT
|
||||
};
|
||||
}
|
||||
|
||||
/** \class CwiseNullaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression of a matrix where all coefficients are defined by a functor
|
||||
*
|
||||
* \tparam NullaryOp template functor implementing the operator
|
||||
* \tparam PlainObjectType the underlying plain matrix/array type
|
||||
*
|
||||
* This class represents an expression of a generic nullary operator.
|
||||
* It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to write a function returning such an expression, you
|
||||
* will need to use this class.
|
||||
*
|
||||
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
|
||||
*/
|
||||
template<typename NullaryOp, typename PlainObjectType>
|
||||
class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
|
||||
{
|
||||
|
@ -13,26 +13,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class CwiseUnaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression
|
||||
*
|
||||
* \param UnaryOp template functor implementing the operator
|
||||
* \param XprType the type of the expression to which we are applying the unary operator
|
||||
*
|
||||
* This class represents an expression where a unary operator is applied to an expression.
|
||||
* It is the return type of all operations taking exactly 1 input expression, regardless of the
|
||||
* presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
|
||||
* is considered unary, because only the right-hand side is an expression, and its
|
||||
* return type is a specialization of CwiseUnaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically don't have to name
|
||||
* CwiseUnaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename UnaryOp, typename XprType>
|
||||
struct traits<CwiseUnaryOp<UnaryOp, XprType> >
|
||||
@ -52,6 +32,25 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
|
||||
template<typename UnaryOp, typename XprType, typename StorageKind>
|
||||
class CwiseUnaryOpImpl;
|
||||
|
||||
/** \class CwiseUnaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression
|
||||
*
|
||||
* \tparam UnaryOp template functor implementing the operator
|
||||
* \tparam XprType the type of the expression to which we are applying the unary operator
|
||||
*
|
||||
* This class represents an expression where a unary operator is applied to an expression.
|
||||
* It is the return type of all operations taking exactly 1 input expression, regardless of the
|
||||
* presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
|
||||
* is considered unary, because only the right-hand side is an expression, and its
|
||||
* return type is a specialization of CwiseUnaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically don't have to name
|
||||
* CwiseUnaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
template<typename UnaryOp, typename XprType>
|
||||
class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
|
||||
{
|
||||
|
@ -12,20 +12,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class CwiseUnaryView
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
|
||||
*
|
||||
* \param ViewOp template functor implementing the view
|
||||
* \param MatrixType the type of the matrix we are applying the unary operator
|
||||
*
|
||||
* This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
|
||||
* It is the return type of real() and imag(), and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename ViewOp, typename MatrixType>
|
||||
struct traits<CwiseUnaryView<ViewOp, MatrixType> >
|
||||
@ -55,6 +41,19 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
|
||||
template<typename ViewOp, typename MatrixType, typename StorageKind>
|
||||
class CwiseUnaryViewImpl;
|
||||
|
||||
/** \class CwiseUnaryView
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
|
||||
*
|
||||
* \tparam ViewOp template functor implementing the view
|
||||
* \tparam MatrixType the type of the matrix we are applying the unary operator
|
||||
*
|
||||
* This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
|
||||
* It is the return type of real() and imag(), and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
|
||||
*/
|
||||
template<typename ViewOp, typename MatrixType>
|
||||
class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
|
||||
{
|
||||
|
@ -191,19 +191,31 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
y() const { return (*this)[1]; }
|
||||
y() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[1];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](2). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
z() const { return (*this)[2]; }
|
||||
z() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[2];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](3). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
w() const { return (*this)[3]; }
|
||||
w() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[3];
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* \returns the packet of coefficients starting at the given row and column. It is your responsibility
|
||||
@ -424,19 +436,31 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
y() { return (*this)[1]; }
|
||||
y()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[1];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](2). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
z() { return (*this)[2]; }
|
||||
z()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[2];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](3). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
w() { return (*this)[3]; }
|
||||
w()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[3];
|
||||
}
|
||||
};
|
||||
|
||||
/** \brief Base class providing direct read-only coefficient access to matrices and arrays.
|
||||
|
@ -99,11 +99,13 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
|
||||
template<typename Derived>
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
|
||||
{
|
||||
EIGEN_USING_STD_MATH(sqrt)
|
||||
return sqrt(squaredNorm());
|
||||
return numext::sqrt(squaredNorm());
|
||||
}
|
||||
|
||||
/** \returns an expression of the quotient of *this by its own norm.
|
||||
/** \returns an expression of the quotient of \c *this by its own norm.
|
||||
*
|
||||
* \warning If the input vector is too small (i.e., this->norm()==0),
|
||||
* then this function returns a copy of the input.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
@ -115,19 +117,29 @@ MatrixBase<Derived>::normalized() const
|
||||
{
|
||||
typedef typename internal::nested_eval<Derived,2>::type _Nested;
|
||||
_Nested n(derived());
|
||||
return n / n.norm();
|
||||
RealScalar z = n.squaredNorm();
|
||||
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
|
||||
if(z>RealScalar(0))
|
||||
return n / numext::sqrt(z);
|
||||
else
|
||||
return n;
|
||||
}
|
||||
|
||||
/** Normalizes the vector, i.e. divides it by its own norm.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
|
||||
*
|
||||
* \sa norm(), normalized()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline void MatrixBase<Derived>::normalize()
|
||||
{
|
||||
*this /= norm();
|
||||
RealScalar z = squaredNorm();
|
||||
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
|
||||
if(z>RealScalar(0))
|
||||
derived() /= numext::sqrt(z);
|
||||
}
|
||||
|
||||
//---------- implementation of other norms ----------
|
||||
|
@ -80,7 +80,7 @@ struct IOFormat
|
||||
*
|
||||
* \brief Pseudo expression providing matrix output with given format
|
||||
*
|
||||
* \param ExpressionType the type of the object on which IO stream operations are performed
|
||||
* \tparam ExpressionType the type of the object on which IO stream operations are performed
|
||||
*
|
||||
* This class represents an expression with stream operators controlled by a given IOFormat.
|
||||
* It is the return type of DenseBase::format()
|
||||
|
@ -13,6 +13,28 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType>
|
||||
struct traits<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
: public traits<PlainObjectType>
|
||||
{
|
||||
typedef traits<PlainObjectType> TraitsBase;
|
||||
enum {
|
||||
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::InnerStrideAtCompileTime)
|
||||
: int(StrideType::InnerStrideAtCompileTime),
|
||||
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::OuterStrideAtCompileTime)
|
||||
: int(StrideType::OuterStrideAtCompileTime),
|
||||
Alignment = int(MapOptions)&int(AlignedMask),
|
||||
Flags0 = TraitsBase::Flags & (~NestByRefBit),
|
||||
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
|
||||
};
|
||||
private:
|
||||
enum { Options }; // Expressions don't have Options
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Map
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
@ -63,29 +85,6 @@ namespace Eigen {
|
||||
*
|
||||
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType>
|
||||
struct traits<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
: public traits<PlainObjectType>
|
||||
{
|
||||
typedef traits<PlainObjectType> TraitsBase;
|
||||
enum {
|
||||
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::InnerStrideAtCompileTime)
|
||||
: int(StrideType::InnerStrideAtCompileTime),
|
||||
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::OuterStrideAtCompileTime)
|
||||
: int(StrideType::OuterStrideAtCompileTime),
|
||||
Alignment = int(MapOptions)&int(AlignedMask),
|
||||
Flags0 = TraitsBase::Flags & (~NestByRefBit),
|
||||
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
|
||||
};
|
||||
private:
|
||||
enum { Options }; // Expressions don't have Options
|
||||
};
|
||||
}
|
||||
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
|
||||
: public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
{
|
||||
|
@ -26,7 +26,7 @@ long double abs(long double x) { return (fabsl(x)); }
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal \struct global_math_functions_filtering_base
|
||||
/** \internal \class global_math_functions_filtering_base
|
||||
*
|
||||
* What it does:
|
||||
* Defines a typedef 'type' as follows:
|
||||
@ -954,8 +954,8 @@ T (ceil)(const T& x)
|
||||
return ceil(x);
|
||||
}
|
||||
|
||||
// Log base 2 for 32 bits positive integers.
|
||||
// Conveniently returns 0 for x==0.
|
||||
/** Log base 2 for 32 bits positive integers.
|
||||
* Conveniently returns 0 for x==0. */
|
||||
inline int log2(int x)
|
||||
{
|
||||
eigen_assert(x>=0);
|
||||
@ -969,6 +969,22 @@ inline int log2(int x)
|
||||
return table[(v * 0x07C4ACDDU) >> 27];
|
||||
}
|
||||
|
||||
/** \returns the square root of \a x.
|
||||
*
|
||||
* It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
|
||||
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
|
||||
* specializations when SSE is enabled.
|
||||
*
|
||||
* It's usage is justified in performance critical functions, like norm/normalize.
|
||||
*/
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T sqrt(const T &x)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(sqrt);
|
||||
return sqrt(x);
|
||||
}
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
namespace internal {
|
||||
|
@ -13,6 +13,45 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
private:
|
||||
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
|
||||
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
|
||||
enum {
|
||||
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
|
||||
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
|
||||
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
|
||||
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
|
||||
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
|
||||
required_alignment = unpacket_traits<PacketScalar>::alignment,
|
||||
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
public:
|
||||
typedef _Scalar Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef Eigen::Index StorageIndex;
|
||||
typedef MatrixXpr XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = _Rows,
|
||||
ColsAtCompileTime = _Cols,
|
||||
MaxRowsAtCompileTime = _MaxRows,
|
||||
MaxColsAtCompileTime = _MaxCols,
|
||||
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
|
||||
Options = _Options,
|
||||
InnerStrideAtCompileTime = 1,
|
||||
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
|
||||
|
||||
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
|
||||
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
|
||||
Alignment = actual_alignment
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Matrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
@ -98,7 +137,7 @@ namespace Eigen {
|
||||
* </dl>
|
||||
*
|
||||
* <i><b>ABI and storage layout</b></i>
|
||||
*
|
||||
*
|
||||
* The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
|
||||
* <table class="manual">
|
||||
* <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
|
||||
@ -130,50 +169,11 @@ namespace Eigen {
|
||||
* </table>
|
||||
* Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two
|
||||
* smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
|
||||
*
|
||||
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
|
||||
* \ref TopicStorageOrders
|
||||
*
|
||||
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
|
||||
* \ref TopicStorageOrders
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
private:
|
||||
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
|
||||
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
|
||||
enum {
|
||||
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
|
||||
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
|
||||
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
|
||||
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
|
||||
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
|
||||
required_alignment = unpacket_traits<PacketScalar>::alignment,
|
||||
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
public:
|
||||
typedef _Scalar Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef Eigen::Index StorageIndex;
|
||||
typedef MatrixXpr XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = _Rows,
|
||||
ColsAtCompileTime = _Cols,
|
||||
MaxRowsAtCompileTime = _MaxRows,
|
||||
MaxColsAtCompileTime = _MaxCols,
|
||||
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
|
||||
Options = _Options,
|
||||
InnerStrideAtCompileTime = 1,
|
||||
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
|
||||
|
||||
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
|
||||
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
|
||||
Alignment = actual_alignment
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
class Matrix
|
||||
: public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
|
@ -13,25 +13,24 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class NestByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression which must be nested by value
|
||||
*
|
||||
* \param ExpressionType the type of the object of which we are requiring nesting-by-value
|
||||
*
|
||||
* This class is the return type of MatrixBase::nestByValue()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::nestByValue()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename ExpressionType>
|
||||
struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>
|
||||
{};
|
||||
}
|
||||
|
||||
/** \class NestByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression which must be nested by value
|
||||
*
|
||||
* \tparam ExpressionType the type of the object of which we are requiring nesting-by-value
|
||||
*
|
||||
* This class is the return type of MatrixBase::nestByValue()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::nestByValue()
|
||||
*/
|
||||
template<typename ExpressionType> class NestByValue
|
||||
: public internal::dense_xpr_base< NestByValue<ExpressionType> >::type
|
||||
{
|
||||
|
@ -17,7 +17,7 @@ namespace Eigen {
|
||||
*
|
||||
* \brief Pseudo expression providing an operator = assuming no aliasing
|
||||
*
|
||||
* \param ExpressionType the type of the object on which to do the lazy assignment
|
||||
* \tparam ExpressionType the type of the object on which to do the lazy assignment
|
||||
*
|
||||
* This class represents an expression with special assignment operators
|
||||
* assuming no aliasing between the target expression and the source expression.
|
||||
|
@ -17,7 +17,7 @@ namespace Eigen {
|
||||
*
|
||||
* \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
|
||||
*
|
||||
* \param T the numeric type at hand
|
||||
* \tparam T the numeric type at hand
|
||||
*
|
||||
* This class stores enums, typedefs and static methods giving information about a numeric type.
|
||||
*
|
||||
|
@ -13,12 +13,18 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
enum PermPermProduct_t {PermPermProduct};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class PermutationBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for permutations
|
||||
*
|
||||
* \param Derived the derived class
|
||||
* \tparam Derived the derived class
|
||||
*
|
||||
* This class is the base class for all expressions representing a permutation matrix,
|
||||
* internally stored as a vector of integers.
|
||||
@ -36,13 +42,6 @@ namespace Eigen {
|
||||
*
|
||||
* \sa class PermutationMatrix, class PermutationWrapper
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
|
||||
enum PermPermProduct_t {PermPermProduct};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<typename Derived>
|
||||
class PermutationBase : public EigenBase<Derived>
|
||||
{
|
||||
@ -280,20 +279,6 @@ class PermutationBase : public EigenBase<Derived>
|
||||
|
||||
};
|
||||
|
||||
/** \class PermutationMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Permutation matrix
|
||||
*
|
||||
* \param SizeAtCompileTime the number of rows/cols, or Dynamic
|
||||
* \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
* \param StorageIndex the integer type of the indices
|
||||
*
|
||||
* This class represents a permutation matrix, internally stored as a vector of integers.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
|
||||
@ -306,6 +291,19 @@ struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _Storag
|
||||
};
|
||||
}
|
||||
|
||||
/** \class PermutationMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Permutation matrix
|
||||
*
|
||||
* \tparam SizeAtCompileTime the number of rows/cols, or Dynamic
|
||||
* \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
* \tparam _StorageIndex the integer type of the indices
|
||||
*
|
||||
* This class represents a permutation matrix, internally stored as a vector of integers.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
|
||||
*/
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
|
||||
{
|
||||
@ -482,18 +480,6 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
|
||||
IndicesType m_indices;
|
||||
};
|
||||
|
||||
/** \class PermutationWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Class to view a vector of integers as a permutation matrix
|
||||
*
|
||||
* \param _IndicesType the type of the vector of integer (can be any compatible expression)
|
||||
*
|
||||
* This class allows to view any vector expression of integers as a permutation matrix.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationMatrix
|
||||
*/
|
||||
|
||||
template<typename _IndicesType> class TranspositionsWrapper;
|
||||
namespace internal {
|
||||
template<typename _IndicesType>
|
||||
@ -513,6 +499,17 @@ struct traits<PermutationWrapper<_IndicesType> >
|
||||
};
|
||||
}
|
||||
|
||||
/** \class PermutationWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Class to view a vector of integers as a permutation matrix
|
||||
*
|
||||
* \tparam _IndicesType the type of the vector of integer (can be any compatible expression)
|
||||
*
|
||||
* This class allows to view any vector expression of integers as a permutation matrix.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationMatrix
|
||||
*/
|
||||
template<typename _IndicesType>
|
||||
class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
|
||||
{
|
||||
|
@ -14,22 +14,6 @@ namespace Eigen {
|
||||
|
||||
template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
|
||||
|
||||
/** \class Product
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the product of two arbitrary matrices or vectors
|
||||
*
|
||||
* \param Lhs the type of the left-hand side expression
|
||||
* \param Rhs the type of the right-hand side expression
|
||||
*
|
||||
* This class represents an expression of the product of two arbitrary matrices.
|
||||
*
|
||||
* The other template parameters are:
|
||||
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times
|
||||
@ -102,7 +86,20 @@ struct traits<Product<Lhs, Rhs, Option> >
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
/** \class Product
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the product of two arbitrary matrices or vectors
|
||||
*
|
||||
* \tparam _Lhs the type of the left-hand side expression
|
||||
* \tparam _Rhs the type of the right-hand side expression
|
||||
*
|
||||
* This class represents an expression of the product of two arbitrary matrices.
|
||||
*
|
||||
* The other template parameters are:
|
||||
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
|
||||
*
|
||||
*/
|
||||
template<typename _Lhs, typename _Rhs, int Option>
|
||||
class Product : public ProductImpl<_Lhs,_Rhs,Option,
|
||||
typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
|
||||
|
@ -38,10 +38,9 @@ struct evaluator<Product<Lhs, Rhs, Options> >
|
||||
// Catch scalar * ( A * B ) and transform it to (A*scalar) * B
|
||||
// TODO we should apply that rule only if that's really helpful
|
||||
template<typename Lhs, typename Rhs, typename Scalar>
|
||||
struct evaluator_traits<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
|
||||
: evaluator_traits_base<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
|
||||
struct evaluator_assume_aliasing<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
|
||||
{
|
||||
enum { AssumeAliasing = 1 };
|
||||
static const bool value = true;
|
||||
};
|
||||
template<typename Lhs, typename Rhs, typename Scalar>
|
||||
struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
|
||||
@ -81,17 +80,8 @@ template< typename Lhs, typename Rhs,
|
||||
struct generic_product_impl;
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> >
|
||||
: evaluator_traits_base<Product<Lhs, Rhs, DefaultProduct> >
|
||||
{
|
||||
enum { AssumeAliasing = 1 };
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct evaluator_traits<Product<Lhs, Rhs, AliasFreeProduct> >
|
||||
: evaluator_traits_base<Product<Lhs, Rhs, AliasFreeProduct> >
|
||||
{
|
||||
enum { AssumeAliasing = 0 };
|
||||
struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
|
||||
static const bool value = true;
|
||||
};
|
||||
|
||||
// This is the default evaluator implementation for products:
|
||||
@ -189,6 +179,13 @@ struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBi
|
||||
//----------------------------------------
|
||||
// Catch "Dense ?= xpr + Product<>" expression to save one temporary
|
||||
// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
|
||||
// TODO enable it for "Dense ?= xpr - Product<>" as well.
|
||||
|
||||
template<typename OtherXpr, typename Lhs, typename Rhs>
|
||||
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar>, const OtherXpr,
|
||||
const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
|
||||
static const bool value = true;
|
||||
};
|
||||
|
||||
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2>
|
||||
struct assignment_from_xpr_plus_product
|
||||
|
@ -12,76 +12,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Ref
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief A matrix or vector expression mapping an existing expression
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
||||
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
|
||||
* The default is \c #Unaligned.
|
||||
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
|
||||
* but accepts a variable outer stride (leading dimension).
|
||||
* This can be overridden by specifying strides.
|
||||
* The type passed here must be a specialization of the Stride template, see examples below.
|
||||
*
|
||||
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
|
||||
* A Ref<> object can represent either a const expression or a l-value:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo1(Ref<VectorXf> x);
|
||||
*
|
||||
* // read-only const argument:
|
||||
* void foo2(const Ref<const VectorXf>& x);
|
||||
* \endcode
|
||||
*
|
||||
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
|
||||
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
|
||||
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
|
||||
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
|
||||
* can be greater than the number of rows.
|
||||
*
|
||||
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
|
||||
* Here are some examples:
|
||||
* \code
|
||||
* MatrixXf A;
|
||||
* VectorXf a;
|
||||
* foo1(a.head()); // OK
|
||||
* foo1(A.col()); // OK
|
||||
* foo1(A.row()); // Compilation error because here innerstride!=1
|
||||
* foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
|
||||
* foo2(A.row().transpose()); // The row is copied into a contiguous temporary
|
||||
* foo2(2*a); // The expression is evaluated into a temporary
|
||||
* foo2(A.col().segment(2,4)); // No temporary
|
||||
* \endcode
|
||||
*
|
||||
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
|
||||
* Here is an example accepting an innerstride!=1:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
|
||||
* foo3(A.row()); // OK
|
||||
* \endcode
|
||||
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
|
||||
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
|
||||
* template function, e.g.:
|
||||
* \code
|
||||
* // in the .h:
|
||||
* void foo(const Ref<MatrixXf>& A);
|
||||
* void foo(const Ref<MatrixXf,0,Stride<> >& A);
|
||||
*
|
||||
* // in the .cpp:
|
||||
* template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
|
||||
* ... // crazy code goes here
|
||||
* }
|
||||
* void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
|
||||
* void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
|
||||
* \endcode
|
||||
*
|
||||
*
|
||||
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename _PlainObjectType, int _Options, typename _StrideType>
|
||||
@ -182,7 +112,75 @@ protected:
|
||||
StrideBase m_stride;
|
||||
};
|
||||
|
||||
|
||||
/** \class Ref
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief A matrix or vector expression mapping an existing expression
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
||||
* \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
|
||||
* The default is \c #Unaligned.
|
||||
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
|
||||
* but accepts a variable outer stride (leading dimension).
|
||||
* This can be overridden by specifying strides.
|
||||
* The type passed here must be a specialization of the Stride template, see examples below.
|
||||
*
|
||||
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
|
||||
* A Ref<> object can represent either a const expression or a l-value:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo1(Ref<VectorXf> x);
|
||||
*
|
||||
* // read-only const argument:
|
||||
* void foo2(const Ref<const VectorXf>& x);
|
||||
* \endcode
|
||||
*
|
||||
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
|
||||
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
|
||||
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
|
||||
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
|
||||
* can be greater than the number of rows.
|
||||
*
|
||||
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
|
||||
* Here are some examples:
|
||||
* \code
|
||||
* MatrixXf A;
|
||||
* VectorXf a;
|
||||
* foo1(a.head()); // OK
|
||||
* foo1(A.col()); // OK
|
||||
* foo1(A.row()); // Compilation error because here innerstride!=1
|
||||
* foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
|
||||
* foo2(A.row().transpose()); // The row is copied into a contiguous temporary
|
||||
* foo2(2*a); // The expression is evaluated into a temporary
|
||||
* foo2(A.col().segment(2,4)); // No temporary
|
||||
* \endcode
|
||||
*
|
||||
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
|
||||
* Here is an example accepting an innerstride!=1:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
|
||||
* foo3(A.row()); // OK
|
||||
* \endcode
|
||||
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
|
||||
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
|
||||
* template function, e.g.:
|
||||
* \code
|
||||
* // in the .h:
|
||||
* void foo(const Ref<MatrixXf>& A);
|
||||
* void foo(const Ref<MatrixXf,0,Stride<> >& A);
|
||||
*
|
||||
* // in the .cpp:
|
||||
* template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
|
||||
* ... // crazy code goes here
|
||||
* }
|
||||
* void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
|
||||
* void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
|
||||
* \endcode
|
||||
*
|
||||
*
|
||||
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
|
||||
*/
|
||||
template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
|
||||
{
|
||||
@ -209,6 +207,7 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
#else
|
||||
/** Implicit constructor from any dense expression */
|
||||
template<typename Derived>
|
||||
inline Ref(DenseBase<Derived>& expr)
|
||||
#endif
|
||||
|
@ -12,21 +12,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/**
|
||||
* \class Replicate
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the multiple replication of a matrix or vector
|
||||
*
|
||||
* \param MatrixType the type of the object we are replicating
|
||||
*
|
||||
* This class represents an expression of the multiple replication of a matrix or vector.
|
||||
* It is the return type of DenseBase::replicate() and most of the time
|
||||
* this is the only way it is used.
|
||||
*
|
||||
* \sa DenseBase::replicate()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType,int RowFactor,int ColFactor>
|
||||
struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
|
||||
@ -57,6 +42,22 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* \class Replicate
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the multiple replication of a matrix or vector
|
||||
*
|
||||
* \tparam MatrixType the type of the object we are replicating
|
||||
* \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic.
|
||||
* \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic.
|
||||
*
|
||||
* This class represents an expression of the multiple replication of a matrix or vector.
|
||||
* It is the return type of DenseBase::replicate() and most of the time
|
||||
* this is the only way it is used.
|
||||
*
|
||||
* \sa DenseBase::replicate()
|
||||
*/
|
||||
template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
||||
: public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
|
||||
{
|
||||
|
@ -13,11 +13,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class ReturnByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived>
|
||||
@ -48,6 +43,10 @@ struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class ReturnByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
*/
|
||||
template<typename Derived> class ReturnByValue
|
||||
: public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator
|
||||
{
|
||||
|
@ -14,20 +14,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Reverse
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the reverse of a vector or matrix
|
||||
*
|
||||
* \param MatrixType the type of the object of which we are taking the reverse
|
||||
*
|
||||
* This class represents an expression of the reverse of a vector.
|
||||
* It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename MatrixType, int Direction>
|
||||
@ -60,6 +46,20 @@ template<typename PacketType> struct reverse_packet_cond<PacketType,false>
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class Reverse
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the reverse of a vector or matrix
|
||||
*
|
||||
* \tparam MatrixType the type of the object of which we are taking the reverse
|
||||
* \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections
|
||||
*
|
||||
* This class represents an expression of the reverse of a vector.
|
||||
* It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
|
||||
*/
|
||||
template<typename MatrixType, int Direction> class Reverse
|
||||
: public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
|
||||
{
|
||||
|
@ -203,8 +203,6 @@ struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||
typedef SelfAdjointShape Shape;
|
||||
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>
|
||||
|
@ -31,8 +31,8 @@ namespace Eigen {
|
||||
* arguments to the constructor.
|
||||
*
|
||||
* Indeed, this class takes two template parameters:
|
||||
* \param _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
|
||||
* \param _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
|
||||
* \tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
|
||||
* \tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
|
||||
*
|
||||
* Here is an example:
|
||||
* \include Map_general_stride.cpp
|
||||
|
@ -13,20 +13,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Transpose
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the transpose of a matrix
|
||||
*
|
||||
* \param MatrixType the type of the object of which we are taking the transpose
|
||||
*
|
||||
* This class represents an expression of the transpose of a matrix.
|
||||
* It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType>
|
||||
struct traits<Transpose<MatrixType> > : public traits<MatrixType>
|
||||
@ -50,6 +36,19 @@ struct traits<Transpose<MatrixType> > : public traits<MatrixType>
|
||||
|
||||
template<typename MatrixType, typename StorageKind> class TransposeImpl;
|
||||
|
||||
/** \class Transpose
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the transpose of a matrix
|
||||
*
|
||||
* \tparam MatrixType the type of the object of which we are taking the transpose
|
||||
*
|
||||
* This class represents an expression of the transpose of a matrix.
|
||||
* It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
|
||||
*/
|
||||
template<typename MatrixType> class Transpose
|
||||
: public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
|
||||
{
|
||||
|
@ -12,35 +12,6 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Transpositions
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a sequence of transpositions (row/column interchange)
|
||||
*
|
||||
* \param SizeAtCompileTime the number of transpositions, or Dynamic
|
||||
* \param MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
*
|
||||
* This class represents a permutation transformation as a sequence of \em n transpositions
|
||||
* \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
|
||||
* Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
|
||||
* the rows \c i and \c indices[i] of the matrix \c M.
|
||||
* A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
|
||||
*
|
||||
* Compared to the class PermutationMatrix, such a sequence of transpositions is what is
|
||||
* computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
|
||||
*
|
||||
* To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
|
||||
* \code
|
||||
* Transpositions tr;
|
||||
* MatrixXf mat;
|
||||
* mat = tr * mat;
|
||||
* \endcode
|
||||
* In this example, we detect that the matrix appears on both side, and so the transpositions
|
||||
* are applied in-place without any temporary or extra copy.
|
||||
*
|
||||
* \sa class PermutationMatrix
|
||||
*/
|
||||
|
||||
template<typename Derived>
|
||||
class TranspositionsBase
|
||||
{
|
||||
@ -154,6 +125,35 @@ struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageInde
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Transpositions
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a sequence of transpositions (row/column interchange)
|
||||
*
|
||||
* \tparam SizeAtCompileTime the number of transpositions, or Dynamic
|
||||
* \tparam MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
*
|
||||
* This class represents a permutation transformation as a sequence of \em n transpositions
|
||||
* \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
|
||||
* Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
|
||||
* the rows \c i and \c indices[i] of the matrix \c M.
|
||||
* A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
|
||||
*
|
||||
* Compared to the class PermutationMatrix, such a sequence of transpositions is what is
|
||||
* computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
|
||||
*
|
||||
* To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
|
||||
* \code
|
||||
* Transpositions tr;
|
||||
* MatrixXf mat;
|
||||
* mat = tr * mat;
|
||||
* \endcode
|
||||
* In this example, we detect that the matrix appears on both side, and so the transpositions
|
||||
* are applied in-place without any temporary or extra copy.
|
||||
*
|
||||
* \sa class PermutationMatrix
|
||||
*/
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
{
|
||||
|
@ -704,10 +704,6 @@ struct evaluator_traits<TriangularView<MatrixType,Mode> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||
typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;
|
||||
|
||||
// 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
|
||||
// temporary; 0 if not.
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
|
@ -13,13 +13,23 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename VectorType, int Size>
|
||||
struct traits<VectorBlock<VectorType, Size> >
|
||||
: public traits<Block<VectorType,
|
||||
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
|
||||
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
|
||||
{
|
||||
};
|
||||
}
|
||||
|
||||
/** \class VectorBlock
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a fixed-size or dynamic-size sub-vector
|
||||
*
|
||||
* \param VectorType the type of the object in which we are taking a sub-vector
|
||||
* \param Size size of the sub-vector we are taking at compile time (optional)
|
||||
* \tparam VectorType the type of the object in which we are taking a sub-vector
|
||||
* \tparam Size size of the sub-vector we are taking at compile time (optional)
|
||||
*
|
||||
* This class represents an expression of either a fixed-size or dynamic-size sub-vector.
|
||||
* It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and
|
||||
@ -43,17 +53,6 @@ namespace Eigen {
|
||||
*
|
||||
* \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename VectorType, int Size>
|
||||
struct traits<VectorBlock<VectorType, Size> >
|
||||
: public traits<Block<VectorType,
|
||||
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
|
||||
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
|
||||
{
|
||||
};
|
||||
}
|
||||
|
||||
template<typename VectorType, int Size> class VectorBlock
|
||||
: public Block<VectorType,
|
||||
internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
|
||||
|
@ -141,8 +141,8 @@ struct member_redux {
|
||||
*
|
||||
* \brief Pseudo expression providing partial reduction operations
|
||||
*
|
||||
* \param ExpressionType the type of the object on which to do partial reductions
|
||||
* \param Direction indicates the direction of the redux (#Vertical or #Horizontal)
|
||||
* \tparam ExpressionType the type of the object on which to do partial reductions
|
||||
* \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
|
||||
*
|
||||
* This class represents a pseudo expression with partial reduction features.
|
||||
* It is the return type of DenseBase::colwise() and DenseBase::rowwise()
|
||||
@ -187,11 +187,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
|
||||
protected:
|
||||
|
||||
/** \internal
|
||||
* \returns the i-th subvector according to the \c Direction */
|
||||
typedef typename internal::conditional<isVertical,
|
||||
typename ExpressionType::ColXpr,
|
||||
typename ExpressionType::RowXpr>::type SubVector;
|
||||
/** \internal
|
||||
* \returns the i-th subvector according to the \c Direction */
|
||||
EIGEN_DEVICE_FUNC
|
||||
SubVector subVector(Index i)
|
||||
{
|
||||
|
@ -518,6 +518,28 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
namespace numext {
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float sqrt(const float &x)
|
||||
{
|
||||
return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double sqrt(const double &x)
|
||||
{
|
||||
#if EIGEN_COMP_GNUC
|
||||
return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
|
||||
#else
|
||||
return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // end namespace numex
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_SSE_H
|
||||
|
@ -145,12 +145,9 @@ static void run(Index rows, Index cols, Index depth,
|
||||
|
||||
// Release all the sub blocks A'_i of A' for the current thread,
|
||||
// i.e., we simply decrement the number of users by 1
|
||||
#pragma omp critical
|
||||
{
|
||||
for(Index i=0; i<threads; ++i)
|
||||
#pragma omp atomic
|
||||
info[i].users -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -123,18 +123,18 @@ template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::R
|
||||
template<typename Scalar, typename Index>
|
||||
class BlasVectorMapper {
|
||||
public:
|
||||
EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
|
||||
|
||||
EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
|
||||
return m_data[i];
|
||||
}
|
||||
template <typename Packet, int AlignmentType>
|
||||
EIGEN_ALWAYS_INLINE Packet load(Index i) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
|
||||
return ploadt<Packet, AlignmentType>(m_data + i);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
bool aligned(Index i) const {
|
||||
EIGEN_DEVICE_FUNC bool aligned(Index i) const {
|
||||
return (size_t(m_data+i)%sizeof(Packet))==0;
|
||||
}
|
||||
|
||||
@ -148,25 +148,25 @@ class BlasLinearMapper {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
typedef typename packet_traits<Scalar>::half HalfPacket;
|
||||
|
||||
EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
|
||||
|
||||
EIGEN_ALWAYS_INLINE void prefetch(int i) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
|
||||
internal::prefetch(&operator()(i));
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
|
||||
return m_data[i];
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
|
||||
return ploadt<Packet, AlignmentType>(m_data + i);
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
|
||||
return ploadt<HalfPacket, AlignmentType>(m_data + i);
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
|
||||
pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
|
||||
}
|
||||
|
||||
@ -184,18 +184,18 @@ class blas_data_mapper {
|
||||
typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
|
||||
typedef BlasVectorMapper<Scalar, Index> VectorMapper;
|
||||
|
||||
EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
|
||||
|
||||
EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
|
||||
getSubMapper(Index i, Index j) const {
|
||||
return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
|
||||
return LinearMapper(&operator()(i, j));
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
|
||||
return VectorMapper(&operator()(i, j));
|
||||
}
|
||||
|
||||
@ -205,28 +205,28 @@ class blas_data_mapper {
|
||||
return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
|
||||
return ploadt<Packet, AlignmentType>(&operator()(i, j));
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
|
||||
return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
|
||||
}
|
||||
|
||||
template<typename SubPacket>
|
||||
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
|
||||
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
|
||||
}
|
||||
|
||||
template<typename SubPacket>
|
||||
EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
|
||||
return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
|
||||
}
|
||||
|
||||
const Index stride() const { return m_stride; }
|
||||
const Scalar* data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
|
||||
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
|
||||
|
||||
Index firstAligned(Index size) const {
|
||||
EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
|
||||
if (size_t(m_data)%sizeof(Scalar)) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -224,7 +224,7 @@ enum {
|
||||
|
||||
/** \ingroup enums
|
||||
* Enum for indicating whether a buffer is aligned or not. */
|
||||
enum Foo {
|
||||
enum {
|
||||
Unaligned=0, /**< Data pointer has no specific alignment. */
|
||||
Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */
|
||||
Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */
|
||||
|
@ -524,7 +524,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_align
|
||||
* \sa first_default_aligned()
|
||||
*/
|
||||
template<int Alignment, typename Scalar, typename Index>
|
||||
inline Index first_aligned(const Scalar* array, Index size)
|
||||
EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
|
||||
{
|
||||
static const Index ScalarSize = sizeof(Scalar);
|
||||
static const Index AlignmentSize = Alignment / ScalarSize;
|
||||
@ -544,14 +544,15 @@ inline Index first_aligned(const Scalar* array, Index size)
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::min<Index>( (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask, size);
|
||||
Index first = (AlignmentSize - (Index((std::size_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
|
||||
return (first < size) ? first : size;
|
||||
}
|
||||
}
|
||||
|
||||
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
|
||||
* \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
|
||||
template<typename Scalar, typename Index>
|
||||
inline Index first_default_aligned(const Scalar* array, Index size)
|
||||
EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type DefaultPacketType;
|
||||
return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
|
||||
|
@ -50,6 +50,7 @@
|
||||
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
|
||||
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
|
||||
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
|
||||
OUT_OF_RANGE_ACCESS,
|
||||
YOU_MADE_A_PROGRAMMING_MISTAKE,
|
||||
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
|
||||
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
|
||||
|
@ -390,9 +390,9 @@ struct transfer_constness
|
||||
* a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes
|
||||
* many coefficient accesses in the nested expressions -- as is the case with matrix product for example.
|
||||
*
|
||||
* \param T the type of the expression being nested.
|
||||
* \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
|
||||
* \param PlainObject the type of the temporary if needed.
|
||||
* \tparam T the type of the expression being nested.
|
||||
* \tparam n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
|
||||
* \tparam PlainObject the type of the temporary if needed.
|
||||
*/
|
||||
template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval
|
||||
{
|
||||
@ -575,7 +575,7 @@ template <int ProductTag> struct product_promote_storage_type<Dense,
|
||||
template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Dense, ProductTag> { typedef Dense ret; };
|
||||
|
||||
/** \internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type.
|
||||
* \param Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType.
|
||||
* \tparam Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType.
|
||||
*/
|
||||
template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
|
||||
struct plain_row_type
|
||||
|
@ -375,8 +375,12 @@ namespace internal {
|
||||
* Performs a QR step on a tridiagonal symmetric matrix represented as a
|
||||
* pair of two vectors \a diag and \a subdiag.
|
||||
*
|
||||
* \param matA the input selfadjoint matrix
|
||||
* \param hCoeffs returned Householder coefficients
|
||||
* \param diag the diagonal part of the input selfadjoint tridiagonal matrix
|
||||
* \param subdiag the sub-diagonal part of the input selfadjoint tridiagonal matrix
|
||||
* \param start starting index of the submatrix to work on
|
||||
* \param end last+1 index of the submatrix to work on
|
||||
* \param matrixQ pointer to the column-major matrix holding the eigenvectors, can be 0
|
||||
* \param n size of the input matrix
|
||||
*
|
||||
* For compilation efficiency reasons, this procedure does not use eigen expression
|
||||
* for its arguments.
|
||||
@ -467,9 +471,10 @@ namespace internal {
|
||||
* \brief Compute the eigendecomposition from a tridiagonal matrix
|
||||
*
|
||||
* \param[in,out] diag : On input, the diagonal of the matrix, on output the eigenvalues
|
||||
* \param[in] subdiag : The subdiagonal part of the matrix.
|
||||
* \param[in,out] : On input, the maximum number of iterations, on output, the effective number of iterations.
|
||||
* \param[out] eivec : The matrix to store the eigenvectors... if needed. allocated on input
|
||||
* \param[in,out] subdiag : The subdiagonal part of the matrix (entries are modified during the decomposition)
|
||||
* \param[in] maxIterations : the maximum number of iterations
|
||||
* \param[in] computeEigenvectors : whether the eigenvectors have to be computed or not
|
||||
* \param[out] eivec : The matrix to store the eigenvectors if computeEigenvectors==true. Must be allocated on input.
|
||||
* \returns \c Success or \c NoConvergence
|
||||
*/
|
||||
template<typename MatrixType, typename DiagType, typename SubDiagType>
|
||||
|
@ -304,7 +304,6 @@ struct evaluator_traits<Homogeneous<ArgType,Direction> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<typename ArgType::StorageKind>::Kind Kind;
|
||||
typedef HomogeneousShape Shape;
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
template<> struct AssignmentKind<DenseShape,HomogeneousShape> { typedef Dense2Dense Kind; };
|
||||
|
@ -22,8 +22,8 @@ namespace Eigen {
|
||||
* A hyperplane is an affine subspace of dimension n-1 in a space of dimension n.
|
||||
* For example, a hyperplane in a plane is a line; a hyperplane in 3-space is a plane.
|
||||
*
|
||||
* \param _Scalar the scalar type, i.e., the type of the coefficients
|
||||
* \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
|
||||
* \tparam _Scalar the scalar type, i.e., the type of the coefficients
|
||||
* \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
|
||||
* Notice that the dimension of the hyperplane is _AmbientDim-1.
|
||||
*
|
||||
* This class represents an hyperplane as the zero set of the implicit equation
|
||||
|
@ -23,8 +23,8 @@ namespace Eigen {
|
||||
* direction vector \f$ \mathbf{d} \f$ such that the line corresponds to
|
||||
* the set \f$ l(t) = \mathbf{o} + t \mathbf{d} \f$, \f$ t \in \mathbf{R} \f$.
|
||||
*
|
||||
* \param _Scalar the scalar type, i.e., the type of the coefficients
|
||||
* \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
|
||||
* \tparam _Scalar the scalar type, i.e., the type of the coefficients
|
||||
* \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
|
||||
*/
|
||||
template <typename _Scalar, int _AmbientDim, int _Options>
|
||||
class ParametrizedLine
|
||||
|
@ -18,7 +18,7 @@ namespace Eigen {
|
||||
*
|
||||
* \brief Represents a rotation/orientation in a 2 dimensional space.
|
||||
*
|
||||
* \param _Scalar the scalar type, i.e., the type of the coefficients
|
||||
* \tparam _Scalar the scalar type, i.e., the type of the coefficients
|
||||
*
|
||||
* This class is equivalent to a single scalar representing a counter clock wise rotation
|
||||
* as a single angle in radian. It provides some additional features such as the automatic
|
||||
|
@ -22,8 +22,8 @@ struct rotation_base_generic_product_selector;
|
||||
*
|
||||
* \brief Common base class for compact rotation representations
|
||||
*
|
||||
* \param Derived is the derived type, i.e., a rotation type
|
||||
* \param _Dim the dimension of the space
|
||||
* \tparam Derived is the derived type, i.e., a rotation type
|
||||
* \tparam _Dim the dimension of the space
|
||||
*/
|
||||
template<typename Derived, int _Dim>
|
||||
class RotationBase
|
||||
@ -164,8 +164,8 @@ namespace internal {
|
||||
*
|
||||
* Helper function to return an arbitrary rotation object to a rotation matrix.
|
||||
*
|
||||
* \param Scalar the numeric type of the matrix coefficients
|
||||
* \param Dim the dimension of the current space
|
||||
* \tparam Scalar the numeric type of the matrix coefficients
|
||||
* \tparam Dim the dimension of the current space
|
||||
*
|
||||
* It returns a Dim x Dim fixed size matrix.
|
||||
*
|
||||
|
@ -18,7 +18,7 @@ namespace Eigen {
|
||||
*
|
||||
* \brief Represents a generic uniform scaling transformation
|
||||
*
|
||||
* \param _Scalar the scalar type, i.e., the type of the coefficients.
|
||||
* \tparam _Scalar the scalar type, i.e., the type of the coefficients.
|
||||
*
|
||||
* This class represent a uniform scaling transformation. It is the return
|
||||
* type of Scaling(Scalar), and most of the time this is the only way it
|
||||
|
@ -18,8 +18,8 @@ namespace Eigen {
|
||||
*
|
||||
* \brief Represents a translation transformation
|
||||
*
|
||||
* \param _Scalar the scalar type, i.e., the type of the coefficients.
|
||||
* \param _Dim the dimension of the space, can be a compile time value or Dynamic
|
||||
* \tparam _Scalar the scalar type, i.e., the type of the coefficients.
|
||||
* \tparam _Dim the dimension of the space, can be a compile time value or Dynamic
|
||||
*
|
||||
* \note This class is not aimed to be used to store a translation transformation,
|
||||
* but rather to make easier the constructions and updates of Transform objects.
|
||||
|
@ -21,7 +21,7 @@ namespace Eigen {
|
||||
* References : C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with
|
||||
* Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999
|
||||
*
|
||||
* \tparam _MatrixType The type of the sparse matrix. It is advised to give a row-oriented sparse matrix
|
||||
* \tparam Scalar the scalar type of the input matrices
|
||||
* \tparam _UpLo The triangular part that will be used for the computations. It can be Lower
|
||||
* or Upper. Default is Lower.
|
||||
* \tparam _OrderingType The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<int>,
|
||||
|
@ -29,7 +29,7 @@ template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
|
||||
*
|
||||
* \brief LU decomposition of a matrix with complete pivoting, and related features
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which we are computing the LU decomposition
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
|
||||
*
|
||||
* This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is
|
||||
* decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is
|
||||
|
@ -34,7 +34,7 @@ template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
|
||||
*
|
||||
* \brief LU decomposition of a matrix with partial pivoting, and related features
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which we are computing the LU decomposition
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
|
||||
*
|
||||
* This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A
|
||||
* is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P
|
||||
|
@ -84,8 +84,11 @@ StorageIndex cs_tdfs(StorageIndex j, StorageIndex k, StorageIndex *head, const S
|
||||
/** \internal
|
||||
* \ingroup OrderingMethods_Module
|
||||
* Approximate minimum degree ordering algorithm.
|
||||
* \returns the permutation P reducing the fill-in of the input matrix \a C
|
||||
* The input matrix \a C must be a selfadjoint compressed column major SparseMatrix object. Both the upper and lower parts have to be stored, but the diagonal entries are optional.
|
||||
*
|
||||
* \param[in] C the input selfadjoint matrix stored in compressed column major format.
|
||||
* \param[out] perm the permutation P reducing the fill-in of the input matrix \a C
|
||||
*
|
||||
* Note that the input matrix \a C must be complete, that is both the upper and lower parts have to be stored, as well as the diagonal entries.
|
||||
* On exit the values of C are destroyed */
|
||||
template<typename Scalar, typename StorageIndex>
|
||||
void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm)
|
||||
|
@ -516,7 +516,7 @@ static IndexType init_rows_cols /* returns true if OK, or false otherwise */
|
||||
Col [col].start = p [col] ;
|
||||
Col [col].length = p [col+1] - p [col] ;
|
||||
|
||||
if (Col [col].length < 0)
|
||||
if ((Col [col].length) < 0) // extra parentheses to work-around gcc bug 10200
|
||||
{
|
||||
/* column pointers must be non-decreasing */
|
||||
stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
|
||||
|
@ -19,20 +19,21 @@ namespace internal {
|
||||
|
||||
/** \internal
|
||||
* \ingroup OrderingMethods_Module
|
||||
* \returns the symmetric pattern A^T+A from the input matrix A.
|
||||
* \param[in] A the input non-symmetric matrix
|
||||
* \param[out] symmat the symmetric pattern A^T+A from the input matrix \a A.
|
||||
* FIXME: The values should not be considered here
|
||||
*/
|
||||
template<typename MatrixType>
|
||||
void ordering_helper_at_plus_a(const MatrixType& mat, MatrixType& symmat)
|
||||
void ordering_helper_at_plus_a(const MatrixType& A, MatrixType& symmat)
|
||||
{
|
||||
MatrixType C;
|
||||
C = mat.transpose(); // NOTE: Could be costly
|
||||
C = A.transpose(); // NOTE: Could be costly
|
||||
for (int i = 0; i < C.rows(); i++)
|
||||
{
|
||||
for (typename MatrixType::InnerIterator it(C, i); it; ++it)
|
||||
it.valueRef() = 0.0;
|
||||
}
|
||||
symmat = C + mat;
|
||||
symmat = C + A;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -184,7 +184,7 @@ class PastixBase : public SparseSolverBase<Derived>
|
||||
* The statistics related to the different phases of factorization and solve are saved here as well
|
||||
* \sa analyzePattern() factorize()
|
||||
*/
|
||||
Array<RealScalar,IPARM_SIZE,1>& dparm()
|
||||
Array<double,DPARM_SIZE,1>& dparm()
|
||||
{
|
||||
return m_dparm;
|
||||
}
|
||||
@ -244,8 +244,8 @@ class PastixBase : public SparseSolverBase<Derived>
|
||||
mutable ComputationInfo m_info;
|
||||
mutable pastix_data_t *m_pastixdata; // Data structure for pastix
|
||||
mutable int m_comm; // The MPI communicator identifier
|
||||
mutable Matrix<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
|
||||
mutable Matrix<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters
|
||||
mutable Array<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
|
||||
mutable Array<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters
|
||||
mutable Matrix<StorageIndex,Dynamic,1> m_perm; // Permutation vector
|
||||
mutable Matrix<StorageIndex,Dynamic,1> m_invp; // Inverse permutation vector
|
||||
mutable int m_size; // Size of the matrix
|
||||
@ -268,7 +268,7 @@ void PastixBase<Derived>::init()
|
||||
0, 0, 0, 1, m_iparm.data(), m_dparm.data());
|
||||
|
||||
m_iparm[IPARM_MATRIX_VERIFICATION] = API_NO;
|
||||
m_iparm[IPARM_VERBOSE] = 2;
|
||||
m_iparm[IPARM_VERBOSE] = API_VERBOSE_NOT;
|
||||
m_iparm[IPARM_ORDERING] = API_ORDER_SCOTCH;
|
||||
m_iparm[IPARM_INCOMPLETE] = API_NO;
|
||||
m_iparm[IPARM_OOC_LIMIT] = 2000;
|
||||
@ -581,6 +581,7 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
|
||||
|
||||
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
|
||||
{
|
||||
out.resize(matrix.rows(), matrix.cols());
|
||||
// Pastix supports only lower, column-major matrices
|
||||
out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
|
||||
internal::c_to_fortran_numbering(out);
|
||||
@ -666,6 +667,7 @@ class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >
|
||||
void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
|
||||
{
|
||||
// Pastix supports only lower, column-major matrices
|
||||
out.resize(matrix.rows(), matrix.cols());
|
||||
out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
|
||||
internal::c_to_fortran_numbering(out);
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >
|
||||
*
|
||||
* \brief Householder rank-revealing QR decomposition of a matrix with column-pivoting
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which we are computing the QR decomposition
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
|
||||
*
|
||||
* This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b Q and \b R
|
||||
* such that
|
||||
|
@ -37,7 +37,7 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
|
||||
*
|
||||
* \brief Householder rank-revealing QR decomposition of a matrix with full pivoting
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which we are computing the QR decomposition
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
|
||||
*
|
||||
* This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b P', \b Q and \b R
|
||||
* such that
|
||||
|
@ -21,7 +21,7 @@ namespace Eigen {
|
||||
*
|
||||
* \brief Householder QR decomposition of a matrix
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which we are computing the QR decomposition
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
|
||||
*
|
||||
* This class performs a QR decomposition of a matrix \b A into matrices \b Q and \b R
|
||||
* such that
|
||||
|
@ -47,9 +47,8 @@ struct traits<BDCSVD<_MatrixType> >
|
||||
*
|
||||
* \brief class Bidiagonal Divide and Conquer SVD
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which we are computing the SVD decomposition
|
||||
* We plan to have a very similar interface to JacobiSVD on this class.
|
||||
* It should be used to speed up the calcul of SVD for big matrices.
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
|
||||
*
|
||||
*/
|
||||
template<typename _MatrixType>
|
||||
class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >
|
||||
|
@ -449,8 +449,8 @@ struct traits<JacobiSVD<_MatrixType,QRPreconditioner> >
|
||||
*
|
||||
* \brief Two-sided Jacobi SVD decomposition of a rectangular matrix
|
||||
*
|
||||
* \param MatrixType the type of the matrix of which we are computing the SVD decomposition
|
||||
* \param QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
|
||||
* \tparam QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally
|
||||
* for the R-SVD step for non-square matrices. See discussion of possible values below.
|
||||
*
|
||||
* SVD decomposition consists in decomposing any n-by-p matrix \a A as a product
|
||||
|
@ -39,18 +39,16 @@ namespace internal {
|
||||
} // end namespace internal
|
||||
|
||||
/** \ingroup SparseCholesky_Module
|
||||
* \brief A direct sparse Cholesky factorizations
|
||||
* \brief A base class for direct sparse Cholesky factorizations
|
||||
*
|
||||
* These classes provide LL^T and LDL^T Cholesky factorizations of sparse matrices that are
|
||||
* selfadjoint and positive definite. The factorization allows for solving A.X = B where
|
||||
* This is a base class for LL^T and LDL^T Cholesky factorizations of sparse matrices that are
|
||||
* selfadjoint and positive definite. These factorizations allow for solving A.X = B where
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
|
||||
* such that the factorized matrix is P A P^-1.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
* or Upper. Default is Lower.
|
||||
* \tparam Derived the type of the derived class, that is the actual factorization type.
|
||||
*
|
||||
*/
|
||||
template<typename Derived>
|
||||
|
@ -22,6 +22,16 @@ struct traits<SparseCompressedBase<Derived> > : traits<Derived>
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \ingroup SparseCore_Module
|
||||
* \class SparseCompressedBase
|
||||
* \brief Common base class for sparse [compressed]-{row|column}-storage format.
|
||||
*
|
||||
* This class defines the common interface for all derived classes implementing the compressed sparse storage format, such as:
|
||||
* - SparseMatrix
|
||||
* - Ref<SparseMatrixType,Options>
|
||||
* - Map<SparseMatrixType>
|
||||
*
|
||||
*/
|
||||
template<typename Derived>
|
||||
class SparseCompressedBase
|
||||
: public SparseMatrixBase<Derived>
|
||||
|
@ -37,11 +37,15 @@ struct traits<Map<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, St
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
template<typename Derived,
|
||||
int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
|
||||
> class SparseMapBase;
|
||||
|
||||
/** \ingroup SparseCore_Module
|
||||
* class SparseMapBase
|
||||
* \brief Common base class for Map and Ref instance of sparse matrix and vector.
|
||||
*/
|
||||
template<typename Derived>
|
||||
class SparseMapBase<Derived,ReadOnlyAccessors>
|
||||
: public SparseCompressedBase<Derived>
|
||||
@ -71,22 +75,33 @@ class SparseMapBase<Derived,ReadOnlyAccessors>
|
||||
|
||||
public:
|
||||
|
||||
/** \copydoc SparseMatrixBase::rows() */
|
||||
inline Index rows() const { return IsRowMajor ? m_outerSize : m_innerSize; }
|
||||
/** \copydoc SparseMatrixBase::cols() */
|
||||
inline Index cols() const { return IsRowMajor ? m_innerSize : m_outerSize; }
|
||||
/** \copydoc SparseMatrixBase::innerSize() */
|
||||
inline Index innerSize() const { return m_innerSize; }
|
||||
/** \copydoc SparseMatrixBase::outerSize() */
|
||||
inline Index outerSize() const { return m_outerSize; }
|
||||
/** \copydoc SparseCompressedBase::nonZeros */
|
||||
inline Index nonZeros() const { return m_zero_nnz[1]; }
|
||||
|
||||
/** \copydoc SparseCompressedBase::isCompressed */
|
||||
bool isCompressed() const { return m_innerNonZeros==0; }
|
||||
|
||||
//----------------------------------------
|
||||
// direct access interface
|
||||
/** \copydoc SparseMatrix::valuePtr */
|
||||
inline const Scalar* valuePtr() const { return m_values; }
|
||||
/** \copydoc SparseMatrix::innerIndexPtr */
|
||||
inline const StorageIndex* innerIndexPtr() const { return m_innerIndices; }
|
||||
/** \copydoc SparseMatrix::outerIndexPtr */
|
||||
inline const StorageIndex* outerIndexPtr() const { return m_outerIndex; }
|
||||
/** \copydoc SparseMatrix::innerNonZeroPtr */
|
||||
inline const StorageIndex* innerNonZeroPtr() const { return m_innerNonZeros; }
|
||||
//----------------------------------------
|
||||
|
||||
/** \copydoc SparseMatrix::coeff */
|
||||
inline Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
const Index outer = IsRowMajor ? row : col;
|
||||
@ -125,6 +140,10 @@ class SparseMapBase<Derived,ReadOnlyAccessors>
|
||||
inline SparseMapBase() {}
|
||||
};
|
||||
|
||||
/** \ingroup SparseCore_Module
|
||||
* class SparseMapBase
|
||||
* \brief Common base class for writable Map and Ref instance of sparse matrix and vector.
|
||||
*/
|
||||
template<typename Derived>
|
||||
class SparseMapBase<Derived,WriteAccessors>
|
||||
: public SparseMapBase<Derived,ReadOnlyAccessors>
|
||||
@ -185,9 +204,23 @@ class SparseMapBase<Derived,WriteAccessors>
|
||||
inline SparseMapBase() {}
|
||||
};
|
||||
|
||||
/** \ingroup SparseCore_Module
|
||||
*
|
||||
* \brief Specialization of class Map for SparseMatrix-like storage.
|
||||
*
|
||||
* \tparam SparseMatrixType the equivalent sparse matrix type of the referenced data, it must be a template instance of class SparseMatrix.
|
||||
*
|
||||
* \sa class Map, class SparseMatrix, class Ref<SparseMatrixType,Options>
|
||||
*/
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>
|
||||
class Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType>
|
||||
: public SparseMapBase<Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >
|
||||
#else
|
||||
template<typename SparseMatrixType>
|
||||
class Map<SparseMatrixType>
|
||||
: public SparseMapBase<Derived,WriteAccessors>
|
||||
#endif
|
||||
{
|
||||
public:
|
||||
typedef SparseMapBase<Map> Base;
|
||||
@ -196,6 +229,12 @@ class Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType>
|
||||
|
||||
public:
|
||||
|
||||
/** Constructs a read-write Map to a sparse matrix of size \a rows x \a cols, containing \a nnz non-zero coefficients,
|
||||
* stored as a sparse format as defined by the pointers \a outerIndexPtr, \a innerIndexPtr, and \a valuePtr.
|
||||
* If the optional parameter \a innerNonZerosPtr is the null pointer, then a standard compressed format is assumed.
|
||||
*
|
||||
* More details on the expected storage schemes are given in the \ref TutorialSparse "manual pages".
|
||||
*/
|
||||
inline Map(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr,
|
||||
StorageIndex* innerIndexPtr, Scalar* valuePtr, StorageIndex* innerNonZerosPtr = 0)
|
||||
: Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr)
|
||||
|
@ -108,20 +108,25 @@ protected:
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup Sparse_Module
|
||||
* \ingroup SparseCore_Module
|
||||
*
|
||||
* \brief A sparse matrix expression referencing an existing sparse expression
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent sparse matrix type of the referenced data
|
||||
* \tparam SparseMatrixType the equivalent sparse matrix type of the referenced data, it must be a template instance of class SparseMatrix.
|
||||
* \tparam Options specifies whether the a standard compressed format is required \c Options is \c #StandardCompressedFormat, or \c 0.
|
||||
* The default is \c 0.
|
||||
* \tparam StrideType Only used for dense Ref
|
||||
*
|
||||
* \sa class Ref
|
||||
*/
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>
|
||||
class Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType >
|
||||
: public internal::SparseRefBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType > >
|
||||
#else
|
||||
template<typename SparseMatrixType, int Options>
|
||||
class Ref<SparseMatrixType, Options>
|
||||
: public SparseMapBase<Derived,WriteAccessors> // yes, that's weird to use Derived here, but that works!
|
||||
#endif
|
||||
{
|
||||
typedef SparseMatrix<MatScalar,MatOptions,MatIndex> PlainObjectType;
|
||||
typedef internal::traits<Ref> Traits;
|
||||
@ -155,6 +160,7 @@ class Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType >
|
||||
template<typename Derived>
|
||||
inline Ref(const SparseCompressedBase<Derived>& expr)
|
||||
#else
|
||||
/** Implicit constructor from any sparse expression (2D matrix or 1D vector) */
|
||||
template<typename Derived>
|
||||
inline Ref(SparseCompressedBase<Derived>& expr)
|
||||
#endif
|
||||
@ -225,19 +231,23 @@ class Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup Sparse_Module
|
||||
* \ingroup SparseCore_Module
|
||||
*
|
||||
* \brief A sparse vector expression referencing an existing sparse vector expression
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent sparse matrix type of the referenced data
|
||||
* \tparam Options Not used for SparseVector.
|
||||
* \tparam StrideType Only used for dense Ref
|
||||
* \tparam SparseVectorType the equivalent sparse vector type of the referenced data, it must be a template instance of class SparseVector.
|
||||
*
|
||||
* \sa class Ref
|
||||
*/
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>
|
||||
class Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType >
|
||||
: public internal::SparseRefBase<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType > >
|
||||
#else
|
||||
template<typename SparseVectorType>
|
||||
class Ref<SparseVectorType>
|
||||
: public SparseMapBase<Derived,WriteAccessors>
|
||||
#endif
|
||||
{
|
||||
typedef SparseVector<MatScalar,MatOptions,MatIndex> PlainObjectType;
|
||||
typedef internal::traits<Ref> Traits;
|
||||
@ -259,6 +269,7 @@ class Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType >
|
||||
template<typename Derived>
|
||||
inline Ref(const SparseCompressedBase<Derived>& expr)
|
||||
#else
|
||||
/** Implicit constructor from any 1D sparse vector expression */
|
||||
template<typename Derived>
|
||||
inline Ref(SparseCompressedBase<Derived>& expr)
|
||||
#endif
|
||||
|
@ -211,8 +211,6 @@ struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||
typedef SparseSelfAdjointShape Shape;
|
||||
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
struct SparseSelfAdjoint2Sparse {};
|
||||
|
@ -14,22 +14,21 @@
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/**
|
||||
* \brief Performs numeric block updates from a given supernode to a single column
|
||||
*
|
||||
* \param segsize Size of the segment (and blocks ) to use for updates
|
||||
* \param[in,out] dense Packed values of the original matrix
|
||||
* \param tempv temporary vector to use for updates
|
||||
* \param lusup array containing the supernodes
|
||||
* \param lda Leading dimension in the supernode
|
||||
* \param nrow Number of rows in the rectangular part of the supernode
|
||||
* \param lsub compressed row subscripts of supernodes
|
||||
* \param lptr pointer to the first column of the current supernode in lsub
|
||||
* \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
|
||||
* \return 0 on success
|
||||
*/
|
||||
template <int SegSizeAtCompileTime> struct LU_kernel_bmod
|
||||
{
|
||||
/** \internal
|
||||
* \brief Performs numeric block updates from a given supernode to a single column
|
||||
*
|
||||
* \param segsize Size of the segment (and blocks ) to use for updates
|
||||
* \param[in,out] dense Packed values of the original matrix
|
||||
* \param tempv temporary vector to use for updates
|
||||
* \param lusup array containing the supernodes
|
||||
* \param lda Leading dimension in the supernode
|
||||
* \param nrow Number of rows in the rectangular part of the supernode
|
||||
* \param lsub compressed row subscripts of supernodes
|
||||
* \param lptr pointer to the first column of the current supernode in lsub
|
||||
* \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
|
||||
*/
|
||||
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
|
||||
static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
|
||||
const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
|
||||
|
@ -691,7 +691,6 @@ struct evaluator_traits<SparseQRMatrixQReturnType<SparseQRType> >
|
||||
typedef typename SparseQRType::MatrixType MatrixType;
|
||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||
typedef SparseShape Shape;
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
template< typename DstXprType, typename SparseQRType>
|
||||
|
@ -32,17 +32,19 @@ A << 1, 2, 3, // Initialize A. The elements can also be
|
||||
B << A, A, A; // B is three horizontally stacked A's.
|
||||
A.fill(10); // Fill A with all 10's.
|
||||
|
||||
// Eigen // Matlab
|
||||
MatrixXd::Identity(rows,cols) // eye(rows,cols)
|
||||
C.setIdentity(rows,cols) // C = eye(rows,cols)
|
||||
MatrixXd::Zero(rows,cols) // zeros(rows,cols)
|
||||
C.setZero(rows,cols) // C = ones(rows,cols)
|
||||
MatrixXd::Ones(rows,cols) // ones(rows,cols)
|
||||
C.setOnes(rows,cols) // C = ones(rows,cols)
|
||||
MatrixXd::Random(rows,cols) // rand(rows,cols)*2-1 // MatrixXd::Random returns uniform random numbers in (-1, 1).
|
||||
C.setRandom(rows,cols) // C = rand(rows,cols)*2-1
|
||||
VectorXd::LinSpaced(size,low,high) // linspace(low,high,size)'
|
||||
v.setLinSpaced(size,low,high) // v = linspace(low,high,size)'
|
||||
// Eigen // Matlab
|
||||
MatrixXd::Identity(rows,cols) // eye(rows,cols)
|
||||
C.setIdentity(rows,cols) // C = eye(rows,cols)
|
||||
MatrixXd::Zero(rows,cols) // zeros(rows,cols)
|
||||
C.setZero(rows,cols) // C = ones(rows,cols)
|
||||
MatrixXd::Ones(rows,cols) // ones(rows,cols)
|
||||
C.setOnes(rows,cols) // C = ones(rows,cols)
|
||||
MatrixXd::Random(rows,cols) // rand(rows,cols)*2-1 // MatrixXd::Random returns uniform random numbers in (-1, 1).
|
||||
C.setRandom(rows,cols) // C = rand(rows,cols)*2-1
|
||||
VectorXd::LinSpaced(size,low,high) // linspace(low,high,size)'
|
||||
v.setLinSpaced(size,low,high) // v = linspace(low,high,size)'
|
||||
VectorXi::LinSpaced(((hi-low)/step)+1, // low:step:hi
|
||||
low,low+step*(size-1))
|
||||
|
||||
|
||||
// Matrix slicing and blocks. All expressions listed here are read/write.
|
||||
@ -85,13 +87,15 @@ P.bottomRightCorner<rows,cols>() // P(end-rows+1:end, end-cols+1:end)
|
||||
R.row(i) = P.col(j); // R(i, :) = P(:, i)
|
||||
R.col(j1).swap(mat1.col(j2)); // R(:, [j1 j2]) = R(:, [j2, j1])
|
||||
|
||||
// Views, transpose, etc; all read-write except for .adjoint().
|
||||
// Views, transpose, etc;
|
||||
// Eigen // Matlab
|
||||
R.adjoint() // R'
|
||||
R.transpose() // R.' or conj(R')
|
||||
R.diagonal() // diag(R)
|
||||
R.transpose() // R.' or conj(R') // Read-write
|
||||
R.diagonal() // diag(R) // Read-write
|
||||
x.asDiagonal() // diag(x)
|
||||
R.transpose().colwise().reverse(); // rot90(R)
|
||||
R.transpose().colwise().reverse() // rot90(R) // Read-write
|
||||
R.replicate(i,j) // repmat(P,i,j)
|
||||
|
||||
|
||||
// All the same as Matlab, but matlab doesn't have *= style operators.
|
||||
// Matrix-vector. Matrix-matrix. Matrix-scalar.
|
||||
@ -103,37 +107,39 @@ a *= M; R = P + Q; R = P/s;
|
||||
R -= Q; R /= s;
|
||||
|
||||
// Vectorized operations on each element independently
|
||||
// Eigen // Matlab
|
||||
R = P.cwiseProduct(Q); // R = P .* Q
|
||||
R = P.array() * s.array();// R = P .* s
|
||||
R = P.cwiseQuotient(Q); // R = P ./ Q
|
||||
R = P.array() / Q.array();// R = P ./ Q
|
||||
R = P.array() + s.array();// R = P + s
|
||||
R = P.array() - s.array();// R = P - s
|
||||
R.array() += s; // R = R + s
|
||||
R.array() -= s; // R = R - s
|
||||
R.array() < Q.array(); // R < Q
|
||||
R.array() <= Q.array(); // R <= Q
|
||||
R.cwiseInverse(); // 1 ./ P
|
||||
R.array().inverse(); // 1 ./ P
|
||||
R.array().sin() // sin(P)
|
||||
R.array().cos() // cos(P)
|
||||
R.array().pow(s) // P .^ s
|
||||
R.array().square() // P .^ 2
|
||||
R.array().cube() // P .^ 3
|
||||
R.cwiseSqrt() // sqrt(P)
|
||||
R.array().sqrt() // sqrt(P)
|
||||
R.array().exp() // exp(P)
|
||||
R.array().log() // log(P)
|
||||
R.cwiseMax(P) // max(R, P)
|
||||
R.array().max(P.array()) // max(R, P)
|
||||
R.cwiseMin(P) // min(R, P)
|
||||
R.array().min(P.array()) // min(R, P)
|
||||
R.cwiseAbs() // abs(P)
|
||||
R.array().abs() // abs(P)
|
||||
R.cwiseAbs2() // abs(P.^2)
|
||||
R.array().abs2() // abs(P.^2)
|
||||
(R.array() < s).select(P,Q); // (R < s ? P : Q)
|
||||
// Eigen // Matlab
|
||||
R = P.cwiseProduct(Q); // R = P .* Q
|
||||
R = P.array() * s.array(); // R = P .* s
|
||||
R = P.cwiseQuotient(Q); // R = P ./ Q
|
||||
R = P.array() / Q.array(); // R = P ./ Q
|
||||
R = P.array() + s.array(); // R = P + s
|
||||
R = P.array() - s.array(); // R = P - s
|
||||
R.array() += s; // R = R + s
|
||||
R.array() -= s; // R = R - s
|
||||
R.array() < Q.array(); // R < Q
|
||||
R.array() <= Q.array(); // R <= Q
|
||||
R.cwiseInverse(); // 1 ./ P
|
||||
R.array().inverse(); // 1 ./ P
|
||||
R.array().sin() // sin(P)
|
||||
R.array().cos() // cos(P)
|
||||
R.array().pow(s) // P .^ s
|
||||
R.array().square() // P .^ 2
|
||||
R.array().cube() // P .^ 3
|
||||
R.cwiseSqrt() // sqrt(P)
|
||||
R.array().sqrt() // sqrt(P)
|
||||
R.array().exp() // exp(P)
|
||||
R.array().log() // log(P)
|
||||
R.cwiseMax(P) // max(R, P)
|
||||
R.array().max(P.array()) // max(R, P)
|
||||
R.cwiseMin(P) // min(R, P)
|
||||
R.array().min(P.array()) // min(R, P)
|
||||
R.cwiseAbs() // abs(P)
|
||||
R.array().abs() // abs(P)
|
||||
R.cwiseAbs2() // abs(P.^2)
|
||||
R.array().abs2() // abs(P.^2)
|
||||
(R.array() < s).select(P,Q ); // (R < s ? P : Q)
|
||||
R = (Q.array()==0).select(P,A) // R(Q==0) = P(Q==0)
|
||||
|
||||
|
||||
// Reductions.
|
||||
int r, c;
|
||||
@ -164,12 +170,12 @@ x.dot(y) // dot(x, y)
|
||||
x.cross(y) // cross(x, y) Requires #include <Eigen/Geometry>
|
||||
|
||||
//// Type conversion
|
||||
// Eigen // Matlab
|
||||
A.cast<double>(); // double(A)
|
||||
A.cast<float>(); // single(A)
|
||||
A.cast<int>(); // int32(A)
|
||||
A.real(); // real(A)
|
||||
A.imag(); // imag(A)
|
||||
// Eigen // Matlab
|
||||
A.cast<double>(); // double(A)
|
||||
A.cast<float>(); // single(A)
|
||||
A.cast<int>(); // int32(A)
|
||||
A.real(); // real(A)
|
||||
A.imag(); // imag(A)
|
||||
// if the original type equals destination type, no work is done
|
||||
|
||||
// Note that for most operations Eigen requires all operands to have the same type:
|
||||
|
@ -42,6 +42,15 @@ template<> struct adjoint_specific<false> {
|
||||
VERIFY_IS_APPROX(v1, v1.norm() * v3);
|
||||
VERIFY_IS_APPROX(v3, v1.normalized());
|
||||
VERIFY_IS_APPROX(v3.norm(), RealScalar(1));
|
||||
|
||||
// check null inputs
|
||||
VERIFY_IS_APPROX((v1*0).normalized(), (v1*0));
|
||||
RealScalar very_small = (std::numeric_limits<RealScalar>::min)();
|
||||
VERIFY( (v1*very_small).norm() == 0 );
|
||||
VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small));
|
||||
v3 = v1*very_small;
|
||||
v3.normalize();
|
||||
VERIFY_IS_APPROX(v3, (v1*very_small));
|
||||
|
||||
// check compatibility of dot and adjoint
|
||||
ref = NumTraits<Scalar>::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm()));
|
||||
|
@ -15,16 +15,18 @@
|
||||
template<typename T, typename I> void test_incomplete_cholesky_T()
|
||||
{
|
||||
typedef SparseMatrix<T,0,I> SparseMatrixType;
|
||||
ConjugateGradient<SparseMatrixType, Lower, IncompleteCholesky<T, Lower, AMDOrdering<I> > > cg_illt_lower_amd;
|
||||
ConjugateGradient<SparseMatrixType, Lower, IncompleteCholesky<T, Lower, NaturalOrdering<I> > > cg_illt_lower_nat;
|
||||
ConjugateGradient<SparseMatrixType, Upper, IncompleteCholesky<T, Upper, AMDOrdering<I> > > cg_illt_upper_amd;
|
||||
ConjugateGradient<SparseMatrixType, Upper, IncompleteCholesky<T, Upper, NaturalOrdering<I> > > cg_illt_upper_nat;
|
||||
ConjugateGradient<SparseMatrixType, Lower, IncompleteCholesky<T, Lower, AMDOrdering<I> > > cg_illt_lower_amd;
|
||||
ConjugateGradient<SparseMatrixType, Lower, IncompleteCholesky<T, Lower, NaturalOrdering<I> > > cg_illt_lower_nat;
|
||||
ConjugateGradient<SparseMatrixType, Upper, IncompleteCholesky<T, Upper, AMDOrdering<I> > > cg_illt_upper_amd;
|
||||
ConjugateGradient<SparseMatrixType, Upper, IncompleteCholesky<T, Upper, NaturalOrdering<I> > > cg_illt_upper_nat;
|
||||
ConjugateGradient<SparseMatrixType, Upper|Lower, IncompleteCholesky<T, Lower, AMDOrdering<I> > > cg_illt_uplo_amd;
|
||||
|
||||
|
||||
CALL_SUBTEST( check_sparse_spd_solving(cg_illt_lower_amd) );
|
||||
CALL_SUBTEST( check_sparse_spd_solving(cg_illt_lower_nat) );
|
||||
CALL_SUBTEST( check_sparse_spd_solving(cg_illt_upper_amd) );
|
||||
CALL_SUBTEST( check_sparse_spd_solving(cg_illt_upper_nat) );
|
||||
CALL_SUBTEST( check_sparse_spd_solving(cg_illt_uplo_amd) );
|
||||
}
|
||||
|
||||
void test_incomplete_cholesky()
|
||||
|
@ -27,6 +27,14 @@ template<typename T> void test_pastix_T()
|
||||
check_sparse_spd_solving(pastix_llt_upper);
|
||||
check_sparse_spd_solving(pastix_ldlt_upper);
|
||||
check_sparse_square_solving(pastix_lu);
|
||||
|
||||
// Some compilation check:
|
||||
pastix_llt_lower.iparm();
|
||||
pastix_llt_lower.dparm();
|
||||
pastix_ldlt_lower.iparm();
|
||||
pastix_ldlt_lower.dparm();
|
||||
pastix_lu.iparm();
|
||||
pastix_lu.dparm();
|
||||
}
|
||||
|
||||
// There is no support for selfadjoint matrices with PaStiX.
|
||||
|
@ -145,14 +145,31 @@ template<typename MatrixType> void product(const MatrixType& m)
|
||||
VERIFY_IS_APPROX(res.col(r).noalias() = square * square.col(r), (square * square.col(r)).eval());
|
||||
|
||||
// inner product
|
||||
Scalar x = square2.row(c) * square2.col(c2);
|
||||
VERIFY_IS_APPROX(x, square2.row(c).transpose().cwiseProduct(square2.col(c2)).sum());
|
||||
|
||||
{
|
||||
Scalar x = square2.row(c) * square2.col(c2);
|
||||
VERIFY_IS_APPROX(x, square2.row(c).transpose().cwiseProduct(square2.col(c2)).sum());
|
||||
}
|
||||
|
||||
// outer product
|
||||
VERIFY_IS_APPROX(m1.col(c) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
|
||||
VERIFY_IS_APPROX(m1.row(r).transpose() * m1.col(c).transpose(), m1.block(r,0,1,cols).transpose() * m1.block(0,c,rows,1).transpose());
|
||||
VERIFY_IS_APPROX(m1.block(0,c,rows,1) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
|
||||
VERIFY_IS_APPROX(m1.col(c) * m1.block(r,0,1,cols), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
|
||||
VERIFY_IS_APPROX(m1.leftCols(1) * m1.row(r), m1.block(0,0,rows,1) * m1.block(r,0,1,cols));
|
||||
VERIFY_IS_APPROX(m1.col(c) * m1.topRows(1), m1.block(0,c,rows,1) * m1.block(0,0,1,cols));
|
||||
{
|
||||
VERIFY_IS_APPROX(m1.col(c) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
|
||||
VERIFY_IS_APPROX(m1.row(r).transpose() * m1.col(c).transpose(), m1.block(r,0,1,cols).transpose() * m1.block(0,c,rows,1).transpose());
|
||||
VERIFY_IS_APPROX(m1.block(0,c,rows,1) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
|
||||
VERIFY_IS_APPROX(m1.col(c) * m1.block(r,0,1,cols), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
|
||||
VERIFY_IS_APPROX(m1.leftCols(1) * m1.row(r), m1.block(0,0,rows,1) * m1.block(r,0,1,cols));
|
||||
VERIFY_IS_APPROX(m1.col(c) * m1.topRows(1), m1.block(0,c,rows,1) * m1.block(0,0,1,cols));
|
||||
}
|
||||
|
||||
// Aliasing
|
||||
{
|
||||
ColVectorType x(cols); x.setRandom();
|
||||
ColVectorType z(x);
|
||||
ColVectorType y(cols); y.setZero();
|
||||
ColSquareMatrixType A(cols,cols); A.setRandom();
|
||||
// CwiseBinaryOp
|
||||
VERIFY_IS_APPROX(x = y + A*x, A*z);
|
||||
x = z;
|
||||
// CwiseUnaryOp
|
||||
VERIFY_IS_APPROX(x = Scalar(1.)*(A*x), A*z);
|
||||
}
|
||||
}
|
||||
|
@ -9,6 +9,27 @@
|
||||
|
||||
#include "product.h"
|
||||
|
||||
template<typename T>
|
||||
void test_aliasing()
|
||||
{
|
||||
int rows = internal::random<int>(1,12);
|
||||
int cols = internal::random<int>(1,12);
|
||||
typedef Matrix<T,Dynamic,Dynamic> MatrixType;
|
||||
typedef Matrix<T,Dynamic,1> VectorType;
|
||||
VectorType x(cols); x.setRandom();
|
||||
VectorType z(x);
|
||||
VectorType y(rows); y.setZero();
|
||||
MatrixType A(rows,cols); A.setRandom();
|
||||
// CwiseBinaryOp
|
||||
VERIFY_IS_APPROX(x = y + A*x, A*z); // OK because "y + A*x" is marked as "assume-aliasing"
|
||||
x = z;
|
||||
// CwiseUnaryOp
|
||||
VERIFY_IS_APPROX(x = T(1.)*(A*x), A*z); // OK because 1*(A*x) is replaced by (1*A*x) which is a Product<> expression
|
||||
x = z;
|
||||
// VERIFY_IS_APPROX(x = y-A*x, -A*z); // Not OK in 3.3 because x is resized before A*x gets evaluated
|
||||
x = z;
|
||||
}
|
||||
|
||||
void test_product_large()
|
||||
{
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
@ -17,6 +38,8 @@ void test_product_large()
|
||||
CALL_SUBTEST_3( product(MatrixXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
CALL_SUBTEST_4( product(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
|
||||
CALL_SUBTEST_5( product(Matrix<float,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
|
||||
CALL_SUBTEST_1( test_aliasing<float>() );
|
||||
}
|
||||
|
||||
#if defined EIGEN_TEST_PART_6
|
||||
|
@ -43,10 +43,16 @@ template<typename MatrixType> void product_notemporary(const MatrixType& m)
|
||||
r1 = internal::random<Index>(8,rows-r0);
|
||||
|
||||
VERIFY_EVALUATION_COUNT( m3 = (m1 * m2.adjoint()), 1);
|
||||
VERIFY_EVALUATION_COUNT( m3 = (m1 * m2.adjoint()).transpose(), 1);
|
||||
VERIFY_EVALUATION_COUNT( m3.noalias() = m1 * m2.adjoint(), 0);
|
||||
|
||||
VERIFY_EVALUATION_COUNT( m3 = s1 * (m1 * m2.transpose()), 1);
|
||||
// VERIFY_EVALUATION_COUNT( m3 = m3 + s1 * (m1 * m2.transpose()), 1);
|
||||
VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * (m1 * m2.transpose()), 0);
|
||||
|
||||
VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()), 1);
|
||||
|
||||
VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()).transpose(), 1);
|
||||
VERIFY_EVALUATION_COUNT( m3.noalias() = m3 + m1 * m2.transpose(), 0);
|
||||
VERIFY_EVALUATION_COUNT( m3.noalias() += m3 + m1 * m2.transpose(), 0);
|
||||
VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 + m1 * m2.transpose(), 0);
|
||||
|
@ -88,6 +88,7 @@ typedef unsigned __int64 uint64_t;
|
||||
#include "src/Tensor/TensorReductionCuda.h"
|
||||
#include "src/Tensor/TensorArgMax.h"
|
||||
#include "src/Tensor/TensorConcatenation.h"
|
||||
#include "src/Tensor/TensorContractionMapper.h"
|
||||
#include "src/Tensor/TensorContraction.h"
|
||||
#include "src/Tensor/TensorContractionThreadPool.h"
|
||||
#include "src/Tensor/TensorContractionCuda.h"
|
||||
|
@ -109,11 +109,9 @@ template<int n, typename x> struct get;
|
||||
|
||||
template<int n, typename a, typename... as> struct get<n, type_list<a, as...>> : get<n-1, type_list<as...>> {};
|
||||
template<typename a, typename... as> struct get<0, type_list<a, as...>> { typedef a type; };
|
||||
template<int n EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, as)> struct get<n, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(as)>> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); };
|
||||
|
||||
template<typename T, int n, T a, T... as> struct get<n, numeric_list<T, a, as...>> : get<n-1, numeric_list<T, as...>> {};
|
||||
template<typename T, T a, T... as> struct get<0, numeric_list<T, a, as...>> { constexpr static T value = a; };
|
||||
template<typename T, int n EIGEN_TPL_PP_SPEC_HACK_DEFC(T, as)> struct get<n, numeric_list<T EIGEN_TPL_PP_SPEC_HACK_USEC(as)>> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); };
|
||||
|
||||
/* always get type, regardless of dummy; good for parameter pack expansion */
|
||||
|
||||
@ -406,7 +404,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) {
|
||||
template<typename Op, typename A, typename B, std::size_t N, int... n>
|
||||
constexpr inline array<decltype(Op::run(A(), B())),N> h_array_zip(array<A, N> a, array<B, N> b, numeric_list<int, n...>)
|
||||
{
|
||||
return array<decltype(Op::run(A(), B())),N>{{ Op::run(array_get<n>(a), array_get<n>(b))... }};
|
||||
return array<decltype(Op::run(A(), B())),N>{ Op::run(array_get<n>(a), array_get<n>(b))... };
|
||||
}
|
||||
|
||||
template<typename Op, typename A, typename B, std::size_t N>
|
||||
@ -434,7 +432,7 @@ constexpr inline auto array_zip_and_reduce(array<A, N> a, array<B, N> b) -> decl
|
||||
template<typename Op, typename A, std::size_t N, int... n>
|
||||
constexpr inline array<decltype(Op::run(A())),N> h_array_apply(array<A, N> a, numeric_list<int, n...>)
|
||||
{
|
||||
return array<decltype(Op::run(A())),N>{{ Op::run(array_get<n>(a))... }};
|
||||
return array<decltype(Op::run(A())),N>{ Op::run(array_get<n>(a))... };
|
||||
}
|
||||
|
||||
template<typename Op, typename A, std::size_t N>
|
||||
|
@ -132,13 +132,13 @@ template <typename T> class array<T, 0> {
|
||||
return *static_cast<const T*>(NULL);
|
||||
}
|
||||
|
||||
static EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE array() { }
|
||||
|
||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
array(std::initializer_list<T> l) {
|
||||
EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) {
|
||||
eigen_assert(l.size() == 0);
|
||||
}
|
||||
#endif
|
||||
|
@ -78,7 +78,8 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign),
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
Layout = Options_ & RowMajor ? RowMajor : ColMajor,
|
||||
CoordAccess = true
|
||||
CoordAccess = true,
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
static const int Options = Options_;
|
||||
@ -118,7 +119,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
{
|
||||
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
|
||||
return coeff(array<Index, NumIndices>{firstIndex, secondIndex, otherIndices...});
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -158,7 +159,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
{
|
||||
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
|
||||
return coeffRef(array<Index, NumIndices>{firstIndex, secondIndex, otherIndices...});
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -198,7 +199,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
{
|
||||
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
|
||||
return this->operator()(array<Index, NumIndices>{firstIndex, secondIndex, otherIndices...});
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
@ -265,7 +266,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
{
|
||||
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
|
||||
return operator()(array<Index, NumIndices>{firstIndex, secondIndex, otherIndices...});
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
@ -341,7 +342,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions)
|
||||
: m_storage(internal::array_prod(array<Index, NumIndices>{{firstDimension, otherDimensions...}}), array<Index, NumIndices>{{firstDimension, otherDimensions...}})
|
||||
: m_storage(internal::array_prod(array<Index, NumIndices>{firstDimension, otherDimensions...}), array<Index, NumIndices>{firstDimension, otherDimensions...})
|
||||
{
|
||||
// The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
@ -426,7 +427,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
{
|
||||
// The number of dimensions used to resize a tensor must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
resize(array<Index, NumIndices>{{firstDimension, otherDimensions...}});
|
||||
resize(array<Index, NumIndices>{firstDimension, otherDimensions...});
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -89,6 +89,7 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
|
||||
BlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -134,7 +135,7 @@ struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<Xp
|
||||
typedef Index Scalar;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions;
|
||||
static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
};
|
||||
|
||||
@ -210,6 +211,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
|
||||
BlockAccess = false,
|
||||
Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -97,6 +97,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
||||
IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & TensorEvaluator<RightArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess,
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
|
||||
@ -152,6 +153,8 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
||||
return m_leftImpl.template packet<LoadMode>(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); }
|
||||
|
||||
private:
|
||||
TensorEvaluator<LeftArgType, Device> m_leftImpl;
|
||||
TensorEvaluator<RightArgType, Device> m_rightImpl;
|
||||
|
@ -46,6 +46,21 @@ struct nested<TensorBroadcastingOp<Broadcast, XprType>, 1, typename eval<TensorB
|
||||
typedef TensorBroadcastingOp<Broadcast, XprType> type;
|
||||
};
|
||||
|
||||
template <typename Dims>
|
||||
struct is_input_scalar {
|
||||
static const bool value = false;
|
||||
};
|
||||
template <>
|
||||
struct is_input_scalar<Sizes<> > {
|
||||
static const bool value = true;
|
||||
};
|
||||
#ifndef EIGEN_EMULATE_CXX11_META_H
|
||||
template <typename std::size_t... Indices>
|
||||
struct is_input_scalar<Sizes<Indices...> > {
|
||||
static const bool value = (Sizes<Indices...>::total_size == 1);
|
||||
};
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@ -94,6 +109,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -103,7 +119,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
// and store the result in a scalar. Instead one should reshape the scalar into a a N-D
|
||||
// tensor with N >= 1 of 1 element first and then broadcast.
|
||||
EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||
const InputDimensions& input_dims = m_impl.dimensions();
|
||||
const Broadcast& broadcast = op.broadcast();
|
||||
for (int i = 0; i < NumDims; ++i) {
|
||||
eigen_assert(input_dims[i] > 0);
|
||||
@ -143,6 +159,10 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
|
||||
return m_impl.coeff(0);
|
||||
}
|
||||
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
return coeffColMajor(index);
|
||||
} else {
|
||||
@ -214,6 +234,10 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
|
||||
return internal::pset1<PacketReturnType>(m_impl.coeff(0));
|
||||
}
|
||||
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
return packetColMajor<LoadMode>(index);
|
||||
} else {
|
||||
|
@ -145,6 +145,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -304,6 +305,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -125,6 +125,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -287,6 +288,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device)
|
||||
|
@ -21,358 +21,6 @@ namespace Eigen {
|
||||
*/
|
||||
namespace internal {
|
||||
|
||||
enum {
|
||||
Rhs = 0,
|
||||
Lhs = 1,
|
||||
};
|
||||
|
||||
/*
|
||||
* Implementation of the Eigen blas_data_mapper class for tensors.
|
||||
*/
|
||||
template<typename Scalar, typename Index, int side,
|
||||
typename Tensor,
|
||||
typename nocontract_t, typename contract_t,
|
||||
int packet_size, bool inner_dim_contiguous>
|
||||
class SimpleTensorContractionMapper {
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC
|
||||
SimpleTensorContractionMapper(const Tensor& tensor,
|
||||
const nocontract_t& nocontract_strides,
|
||||
const nocontract_t& ij_strides,
|
||||
const contract_t& contract_strides,
|
||||
const contract_t& k_strides) :
|
||||
m_tensor(tensor),
|
||||
m_nocontract_strides(nocontract_strides),
|
||||
m_ij_strides(ij_strides),
|
||||
m_contract_strides(contract_strides),
|
||||
m_k_strides(k_strides) { }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void prefetch(Index /*i*/) { }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar operator()(Index row) const {
|
||||
// column major assumption
|
||||
return operator()(row, 0);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const {
|
||||
return m_tensor.coeff(computeIndex(row, col));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const {
|
||||
const bool left = (side == Lhs);
|
||||
Index nocontract_val = left ? row : col;
|
||||
Index linidx = 0;
|
||||
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
|
||||
const Index idx = nocontract_val / m_ij_strides[i];
|
||||
linidx += idx * m_nocontract_strides[i];
|
||||
nocontract_val -= idx * m_ij_strides[i];
|
||||
}
|
||||
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
|
||||
if (side == Lhs && inner_dim_contiguous) {
|
||||
eigen_assert(m_nocontract_strides[0] == 1);
|
||||
linidx += nocontract_val;
|
||||
} else {
|
||||
linidx += nocontract_val * m_nocontract_strides[0];
|
||||
}
|
||||
}
|
||||
|
||||
Index contract_val = left ? col : row;
|
||||
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
|
||||
const Index idx = contract_val / m_k_strides[i];
|
||||
linidx += idx * m_contract_strides[i];
|
||||
contract_val -= idx * m_k_strides[i];
|
||||
}
|
||||
|
||||
if(array_size<contract_t>::value > 0) {
|
||||
if (side == Rhs && inner_dim_contiguous) {
|
||||
eigen_assert(m_contract_strides[0] == 1);
|
||||
linidx += contract_val;
|
||||
} else {
|
||||
linidx += contract_val * m_contract_strides[0];
|
||||
}
|
||||
}
|
||||
|
||||
return linidx;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE IndexPair<Index> computeIndexPair(Index row, Index col, const Index distance) const {
|
||||
const bool left = (side == Lhs);
|
||||
Index nocontract_val[2] = {left ? row : col, left ? row + distance : col};
|
||||
Index linidx[2] = {0, 0};
|
||||
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
|
||||
const Index idx0 = nocontract_val[0] / m_ij_strides[i];
|
||||
const Index idx1 = nocontract_val[1] / m_ij_strides[i];
|
||||
linidx[0] += idx0 * m_nocontract_strides[i];
|
||||
linidx[1] += idx1 * m_nocontract_strides[i];
|
||||
nocontract_val[0] -= idx0 * m_ij_strides[i];
|
||||
nocontract_val[1] -= idx1 * m_ij_strides[i];
|
||||
}
|
||||
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
|
||||
if (side == Lhs && inner_dim_contiguous) {
|
||||
eigen_assert(m_nocontract_strides[0] == 1);
|
||||
linidx[0] += nocontract_val[0];
|
||||
linidx[1] += nocontract_val[1];
|
||||
} else {
|
||||
linidx[0] += nocontract_val[0] * m_nocontract_strides[0];
|
||||
linidx[1] += nocontract_val[1] * m_nocontract_strides[0];
|
||||
}
|
||||
}
|
||||
|
||||
Index contract_val[2] = {left ? col : row, left ? col : row + distance};
|
||||
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
|
||||
const Index idx0 = contract_val[0] / m_k_strides[i];
|
||||
const Index idx1 = contract_val[1] / m_k_strides[i];
|
||||
linidx[0] += idx0 * m_contract_strides[i];
|
||||
linidx[1] += idx1 * m_contract_strides[i];
|
||||
contract_val[0] -= idx0 * m_k_strides[i];
|
||||
contract_val[1] -= idx1 * m_k_strides[i];
|
||||
}
|
||||
|
||||
if (side == Rhs && inner_dim_contiguous) {
|
||||
eigen_assert(m_contract_strides[0] == 1);
|
||||
linidx[0] += contract_val[0];
|
||||
linidx[1] += contract_val[1];
|
||||
} else {
|
||||
linidx[0] += contract_val[0] * m_contract_strides[0];
|
||||
linidx[1] += contract_val[1] * m_contract_strides[0];
|
||||
}
|
||||
return IndexPair<Index>(linidx[0], linidx[1]);
|
||||
}
|
||||
|
||||
Index firstAligned(Index size) const {
|
||||
return size;
|
||||
}
|
||||
Index stride() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
protected:
|
||||
const Tensor m_tensor;
|
||||
const nocontract_t m_nocontract_strides;
|
||||
const nocontract_t m_ij_strides;
|
||||
const contract_t m_contract_strides;
|
||||
const contract_t m_k_strides;
|
||||
};
|
||||
|
||||
|
||||
template<typename Scalar, typename Index, int side,
|
||||
typename Tensor,
|
||||
typename nocontract_t, typename contract_t,
|
||||
int packet_size, bool inner_dim_contiguous,
|
||||
bool inner_dim_reordered, int Alignment>
|
||||
class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous>
|
||||
{
|
||||
public:
|
||||
typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous> ParentMapper;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
BaseTensorContractionMapper(const Tensor& tensor,
|
||||
const nocontract_t& nocontract_strides,
|
||||
const nocontract_t& ij_strides,
|
||||
const contract_t& contract_strides,
|
||||
const contract_t& k_strides) :
|
||||
ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
|
||||
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
typedef typename packet_traits<Scalar>::half HalfPacket;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
|
||||
// whole method makes column major assumption
|
||||
|
||||
// don't need to add offsets for now (because operator handles that)
|
||||
// current code assumes packet size must be a multiple of 2
|
||||
EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
|
||||
if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) {
|
||||
const Index index = this->computeIndex(i, j);
|
||||
eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1);
|
||||
return this->m_tensor.template packet<Alignment>(index);
|
||||
}
|
||||
|
||||
const IndexPair<Index> indexPair = this->computeIndexPair(i, j, packet_size - 1);
|
||||
const Index first = indexPair.first;
|
||||
const Index last = indexPair.second;
|
||||
|
||||
// We can always do optimized packet reads from left hand side right now, because
|
||||
// the vertical matrix dimension on the left hand side is never contracting.
|
||||
// On the right hand side we need to check if the contracting dimensions may have
|
||||
// been shuffled first.
|
||||
if (Tensor::PacketAccess &&
|
||||
(side == Lhs || internal::array_size<contract_t>::value <= 1 || !inner_dim_reordered) &&
|
||||
(last - first) == (packet_size - 1)) {
|
||||
|
||||
return this->m_tensor.template packet<Alignment>(first);
|
||||
}
|
||||
|
||||
EIGEN_ALIGN_MAX Scalar data[packet_size];
|
||||
|
||||
data[0] = this->m_tensor.coeff(first);
|
||||
for (Index k = 1; k < packet_size - 1; k += 2) {
|
||||
const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1);
|
||||
data[k] = this->m_tensor.coeff(internal_pair.first);
|
||||
data[k + 1] = this->m_tensor.coeff(internal_pair.second);
|
||||
}
|
||||
data[packet_size - 1] = this->m_tensor.coeff(last);
|
||||
|
||||
return pload<Packet>(data);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
|
||||
// whole method makes column major assumption
|
||||
|
||||
// don't need to add offsets for now (because operator handles that)
|
||||
const Index half_packet_size = unpacket_traits<HalfPacket>::size;
|
||||
if (half_packet_size == packet_size) {
|
||||
return loadPacket(i, j);
|
||||
}
|
||||
EIGEN_ALIGN_MAX Scalar data[half_packet_size];
|
||||
for (Index k = 0; k < half_packet_size; k++) {
|
||||
data[k] = operator()(i + k, j);
|
||||
}
|
||||
return pload<HalfPacket>(data);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename Scalar, typename Index, int side,
|
||||
typename Tensor,
|
||||
typename nocontract_t, typename contract_t,
|
||||
bool inner_dim_contiguous,
|
||||
bool inner_dim_reordered, int Alignment>
|
||||
class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous>
|
||||
{
|
||||
public:
|
||||
typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous> ParentMapper;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
BaseTensorContractionMapper(const Tensor& tensor,
|
||||
const nocontract_t& nocontract_strides,
|
||||
const nocontract_t& ij_strides,
|
||||
const contract_t& contract_strides,
|
||||
const contract_t& k_strides) :
|
||||
ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
|
||||
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
|
||||
EIGEN_ALIGN_MAX Scalar data[1];
|
||||
data[0] = this->m_tensor.coeff(this->computeIndex(i, j));
|
||||
return pload<typename packet_traits<Scalar>::type>(data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const {
|
||||
return loadPacket(i, j);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar, typename Index, int side,
|
||||
typename Tensor,
|
||||
typename nocontract_t, typename contract_t,
|
||||
int packet_size,
|
||||
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
|
||||
class TensorContractionInputMapper;
|
||||
|
||||
template<typename Scalar, typename Index, int side,
|
||||
typename Tensor,
|
||||
typename nocontract_t, typename contract_t,
|
||||
int packet_size,
|
||||
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
|
||||
class TensorContractionSubMapper {
|
||||
public:
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
typedef typename packet_traits<Scalar>::half HalfPacket;
|
||||
|
||||
typedef TensorContractionInputMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper;
|
||||
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self;
|
||||
typedef Self LinearMapper;
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)
|
||||
: m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
|
||||
return m_base_mapper(i + m_vert_offset, m_horiz_offset);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const {
|
||||
return m_base_mapper(i + m_vert_offset, j + m_horiz_offset);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
|
||||
return m_base_mapper.loadPacket(i + m_vert_offset, m_horiz_offset);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
|
||||
return m_base_mapper.loadPacket(i + m_vert_offset, j + m_horiz_offset);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
|
||||
return m_base_mapper.loadHalfPacket(i + m_vert_offset, m_horiz_offset);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const {
|
||||
m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
|
||||
return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
|
||||
}
|
||||
|
||||
template <typename PacketT, int AlignmentType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const {
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<PacketT, Packet>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
EIGEN_STATIC_ASSERT((AlignmentType == Aligned || Alignment == Unaligned), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
return loadPacket(i);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC bool aligned(Index) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
const ParentMapper& m_base_mapper;
|
||||
const Index m_vert_offset;
|
||||
const Index m_horiz_offset;
|
||||
};
|
||||
|
||||
|
||||
template<typename Scalar, typename Index, int side,
|
||||
typename Tensor,
|
||||
typename nocontract_t, typename contract_t,
|
||||
int packet_size,
|
||||
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
|
||||
class TensorContractionInputMapper
|
||||
: public BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> {
|
||||
|
||||
public:
|
||||
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base;
|
||||
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
|
||||
typedef SubMapper VectorMapper;
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor,
|
||||
const nocontract_t& nocontract_strides,
|
||||
const nocontract_t& ij_strides,
|
||||
const contract_t& contract_strides,
|
||||
const contract_t& k_strides)
|
||||
: Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const {
|
||||
return SubMapper(*this, i, j);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
|
||||
return VectorMapper(*this, i, j);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<typename Dimensions, typename LhsXprType, typename RhsXprType>
|
||||
struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >
|
||||
{
|
||||
@ -480,6 +128,7 @@ struct TensorContractionEvaluatorBase
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
// Most of the code is assuming that both input tensors are ColMajor. If the
|
||||
@ -498,8 +147,6 @@ struct TensorContractionEvaluatorBase
|
||||
static const int ContractDims = internal::array_size<Indices>::value;
|
||||
static const int NumDims = max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||
|
||||
typedef array<Index, LDims> left_dim_mapper_t;
|
||||
typedef array<Index, RDims> right_dim_mapper_t;
|
||||
typedef array<Index, ContractDims> contract_t;
|
||||
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||
@ -546,8 +193,8 @@ struct TensorContractionEvaluatorBase
|
||||
// We need to flip all the pairs of contracting indices as well as
|
||||
// reversing the dimensions.
|
||||
for (int i = 0; i < ContractDims; i++) {
|
||||
eval_op_indices[i].first = LDims - 1 - op.indices()[i].second;
|
||||
eval_op_indices[i].second = RDims - 1 - op.indices()[i].first;
|
||||
eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second;
|
||||
eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first;
|
||||
}
|
||||
}
|
||||
|
||||
@ -741,17 +388,19 @@ struct TensorContractionEvaluatorBase
|
||||
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
|
||||
const Index lhs_packet_size = internal::packet_traits<LhsScalar>::size;
|
||||
const Index rhs_packet_size = internal::packet_traits<RhsScalar>::size;
|
||||
const int lhs_alignment = LeftEvaluator::IsAligned ? Aligned : Unaligned;
|
||||
const int rhs_alignment = RightEvaluator::IsAligned ? Aligned : Unaligned;
|
||||
typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
|
||||
LeftEvaluator, left_nocontract_t,
|
||||
contract_t, lhs_packet_size,
|
||||
lhs_inner_dim_contiguous,
|
||||
false, Unaligned> LhsMapper;
|
||||
false, lhs_alignment> LhsMapper;
|
||||
|
||||
typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,
|
||||
RightEvaluator, right_nocontract_t,
|
||||
contract_t, rhs_packet_size,
|
||||
rhs_inner_dim_contiguous,
|
||||
rhs_inner_dim_reordered, Unaligned> RhsMapper;
|
||||
rhs_inner_dim_reordered, rhs_alignment> RhsMapper;
|
||||
|
||||
LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides,
|
||||
m_left_contracting_strides, m_k_strides);
|
||||
@ -784,11 +433,11 @@ struct TensorContractionEvaluatorBase
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {
|
||||
return internal::ploadt<Packet, LoadMode>(m_result + index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { return m_result; }
|
||||
|
||||
protected:
|
||||
// Prevent assignment
|
||||
@ -853,9 +502,6 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
|
||||
static const int ContractDims = internal::array_size<Indices>::value;
|
||||
|
||||
typedef array<Index, LDims> left_dim_mapper_t;
|
||||
typedef array<Index, RDims> right_dim_mapper_t;
|
||||
|
||||
typedef array<Index, ContractDims> contract_t;
|
||||
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||
|
@ -1261,7 +1261,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
Base(op, device) {}
|
||||
|
||||
// We need to redefine this method to make nvcc happy
|
||||
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
|
||||
this->m_leftImpl.evalSubExprsIfNeeded(NULL);
|
||||
this->m_rightImpl.evalSubExprsIfNeeded(NULL);
|
||||
if (data) {
|
||||
@ -1317,6 +1317,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
void evalTyped(Scalar* buffer) const {
|
||||
// columns in left side, rows in right side
|
||||
const Index k = this->m_k_size;
|
||||
EIGEN_UNUSED_VARIABLE(k)
|
||||
|
||||
// rows in left side
|
||||
const Index m = this->m_i_size;
|
||||
@ -1361,7 +1362,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
const dim3 block_size(16, 16, 1);
|
||||
LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel16x16<Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, this->m_device, lhs, rhs, output, m, n, k);
|
||||
} else {
|
||||
const Index m_blocks = (m + 127) / 128;
|
||||
const Index m_blocks = (m + 127) / 128;
|
||||
const Index n_blocks = (n + 63) / 64;
|
||||
const dim3 num_blocks(m_blocks, n_blocks, 1);
|
||||
const dim3 block_size(8, 32, 1);
|
||||
|
464
unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
Normal file
464
unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
Normal file
@ -0,0 +1,464 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
enum {
|
||||
Rhs = 0,
|
||||
Lhs = 1,
|
||||
};
|
||||
|
||||
/*
|
||||
* Implementation of the Eigen blas_data_mapper class for tensors.
|
||||
*/
|
||||
|
||||
template <typename Tensor, bool HasRawAccess> struct CoeffLoader {
|
||||
enum {
|
||||
DirectOffsets = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_tensor(tensor) { }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index) {
|
||||
eigen_assert(false && "unsupported");
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return m_tensor.coeff(index); }
|
||||
|
||||
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename Tensor::PacketReturnType packet(typename Tensor::Index index) const
|
||||
{
|
||||
return m_tensor.template packet<LoadMode>(index);
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
const Tensor m_tensor;
|
||||
};
|
||||
|
||||
template <typename Tensor> struct CoeffLoader<Tensor, true> {
|
||||
enum {
|
||||
DirectOffsets = true
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_data(tensor.data()) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) {
|
||||
m_data += offset;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return loadConstant(m_data+index); }
|
||||
|
||||
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename Tensor::PacketReturnType packet(typename Tensor::Index index) const
|
||||
{
|
||||
return internal::ploadt_ro<typename Tensor::PacketReturnType, LoadMode>(m_data + index);
|
||||
}
|
||||
private:
|
||||
typedef typename Tensor::Scalar Scalar;
|
||||
const Scalar* m_data;
|
||||
};
|
||||
|
||||
template<typename Scalar, typename Index, int side,
|
||||
typename Tensor,
|
||||
typename nocontract_t, typename contract_t,
|
||||
int packet_size, bool inner_dim_contiguous, int Alignment>
|
||||
class SimpleTensorContractionMapper {
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC
|
||||
SimpleTensorContractionMapper(const Tensor& tensor,
|
||||
const nocontract_t& nocontract_strides,
|
||||
const nocontract_t& ij_strides,
|
||||
const contract_t& contract_strides,
|
||||
const contract_t& k_strides) :
|
||||
m_tensor(tensor),
|
||||
m_nocontract_strides(nocontract_strides),
|
||||
m_ij_strides(ij_strides),
|
||||
m_contract_strides(contract_strides),
|
||||
m_k_strides(k_strides) { }
|
||||
|
||||
enum {
|
||||
DirectOffsets = CoeffLoader<Tensor, Tensor::RawAccess>::DirectOffsets
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) {
|
||||
m_tensor.offsetBuffer(offset);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void prefetch(Index /*i*/) { }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar operator()(Index row) const {
|
||||
// column major assumption
|
||||
return operator()(row, 0);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const {
|
||||
return m_tensor.coeff(computeIndex(row, col));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const {
|
||||
const bool left = (side == Lhs);
|
||||
Index nocontract_val = left ? row : col;
|
||||
Index linidx = 0;
|
||||
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
|
||||
const Index idx = nocontract_val / m_ij_strides[i];
|
||||
linidx += idx * m_nocontract_strides[i];
|
||||
nocontract_val -= idx * m_ij_strides[i];
|
||||
}
|
||||
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
|
||||
if (side == Lhs && inner_dim_contiguous) {
|
||||
eigen_assert(m_nocontract_strides[0] == 1);
|
||||
linidx += nocontract_val;
|
||||
} else {
|
||||
linidx += nocontract_val * m_nocontract_strides[0];
|
||||
}
|
||||
}
|
||||
|
||||
Index contract_val = left ? col : row;
|
||||
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
|
||||
const Index idx = contract_val / m_k_strides[i];
|
||||
linidx += idx * m_contract_strides[i];
|
||||
contract_val -= idx * m_k_strides[i];
|
||||
}
|
||||
|
||||
if(array_size<contract_t>::value > 0) {
|
||||
if (side == Rhs && inner_dim_contiguous) {
|
||||
eigen_assert(m_contract_strides[0] == 1);
|
||||
linidx += contract_val;
|
||||
} else {
|
||||
linidx += contract_val * m_contract_strides[0];
|
||||
}
|
||||
}
|
||||
|
||||
return linidx;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE IndexPair<Index> computeIndexPair(Index row, Index col, const Index distance) const {
|
||||
const bool left = (side == Lhs);
|
||||
Index nocontract_val[2] = {left ? row : col, left ? row + distance : col};
|
||||
Index linidx[2] = {0, 0};
|
||||
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
|
||||
const Index idx0 = nocontract_val[0] / m_ij_strides[i];
|
||||
const Index idx1 = nocontract_val[1] / m_ij_strides[i];
|
||||
linidx[0] += idx0 * m_nocontract_strides[i];
|
||||
linidx[1] += idx1 * m_nocontract_strides[i];
|
||||
nocontract_val[0] -= idx0 * m_ij_strides[i];
|
||||
nocontract_val[1] -= idx1 * m_ij_strides[i];
|
||||
}
|
||||
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
|
||||
if (side == Lhs && inner_dim_contiguous) {
|
||||
eigen_assert(m_nocontract_strides[0] == 1);
|
||||
linidx[0] += nocontract_val[0];
|
||||
linidx[1] += nocontract_val[1];
|
||||
} else {
|
||||
linidx[0] += nocontract_val[0] * m_nocontract_strides[0];
|
||||
linidx[1] += nocontract_val[1] * m_nocontract_strides[0];
|
||||
}
|
||||
}
|
||||
|
||||
Index contract_val[2] = {left ? col : row, left ? col : row + distance};
|
||||
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
|
||||
const Index idx0 = contract_val[0] / m_k_strides[i];
|
||||
const Index idx1 = contract_val[1] / m_k_strides[i];
|
||||
linidx[0] += idx0 * m_contract_strides[i];
|
||||
linidx[1] += idx1 * m_contract_strides[i];
|
||||
contract_val[0] -= idx0 * m_k_strides[i];
|
||||
contract_val[1] -= idx1 * m_k_strides[i];
|
||||
}
|
||||
|
||||
if (side == Rhs && inner_dim_contiguous) {
|
||||
eigen_assert(m_contract_strides[0] == 1);
|
||||
linidx[0] += contract_val[0];
|
||||
linidx[1] += contract_val[1];
|
||||
} else {
|
||||
linidx[0] += contract_val[0] * m_contract_strides[0];
|
||||
linidx[1] += contract_val[1] * m_contract_strides[0];
|
||||
}
|
||||
return IndexPair<Index>(linidx[0], linidx[1]);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index firstAligned(Index size) const {
|
||||
// Only claim alignment when we can compute the actual stride (ie when we're
|
||||
// dealing with the lhs with inner_dim_contiguous. This is because the
|
||||
// matrix-vector product relies on the stride when dealing with aligned inputs.
|
||||
return (Alignment == Aligned) && (side == Lhs) && inner_dim_contiguous ? 0 : size;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index stride() const {
|
||||
return ((side == Lhs) && inner_dim_contiguous) ? m_contract_strides[0] : 1;
|
||||
}
|
||||
|
||||
protected:
|
||||
CoeffLoader<Tensor, Tensor::RawAccess> m_tensor;
|
||||
const nocontract_t m_nocontract_strides;
|
||||
const nocontract_t m_ij_strides;
|
||||
const contract_t m_contract_strides;
|
||||
const contract_t m_k_strides;
|
||||
};
|
||||
|
||||
|
||||
template<typename Scalar, typename Index, int side,
|
||||
typename Tensor,
|
||||
typename nocontract_t, typename contract_t,
|
||||
int packet_size, bool inner_dim_contiguous,
|
||||
bool inner_dim_reordered, int Alignment>
|
||||
class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, Alignment>
|
||||
{
|
||||
public:
|
||||
typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, Alignment> ParentMapper;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
BaseTensorContractionMapper(const Tensor& tensor,
|
||||
const nocontract_t& nocontract_strides,
|
||||
const nocontract_t& ij_strides,
|
||||
const contract_t& contract_strides,
|
||||
const contract_t& k_strides) :
|
||||
ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
|
||||
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
typedef typename packet_traits<Scalar>::half HalfPacket;
|
||||
|
||||
template <int AlignmentType = Alignment>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
|
||||
// whole method makes column major assumption
|
||||
|
||||
// don't need to add offsets for now (because operator handles that)
|
||||
// current code assumes packet size must be a multiple of 2
|
||||
EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
|
||||
if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) {
|
||||
const Index index = this->computeIndex(i, j);
|
||||
eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1);
|
||||
return this->m_tensor.template packet<AlignmentType>(index);
|
||||
}
|
||||
|
||||
const IndexPair<Index> indexPair = this->computeIndexPair(i, j, packet_size - 1);
|
||||
const Index first = indexPair.first;
|
||||
const Index last = indexPair.second;
|
||||
|
||||
// We can always do optimized packet reads from left hand side right now, because
|
||||
// the vertical matrix dimension on the left hand side is never contracting.
|
||||
// On the right hand side we need to check if the contracting dimensions may have
|
||||
// been shuffled first.
|
||||
if (Tensor::PacketAccess &&
|
||||
(side == Lhs || internal::array_size<contract_t>::value <= 1 || !inner_dim_reordered) &&
|
||||
(last - first) == (packet_size - 1)) {
|
||||
|
||||
return this->m_tensor.template packet<AlignmentType>(first);
|
||||
}
|
||||
|
||||
EIGEN_ALIGN_MAX Scalar data[packet_size];
|
||||
|
||||
data[0] = this->m_tensor.coeff(first);
|
||||
for (Index k = 1; k < packet_size - 1; k += 2) {
|
||||
const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1);
|
||||
data[k] = this->m_tensor.coeff(internal_pair.first);
|
||||
data[k + 1] = this->m_tensor.coeff(internal_pair.second);
|
||||
}
|
||||
data[packet_size - 1] = this->m_tensor.coeff(last);
|
||||
|
||||
return pload<Packet>(data);
|
||||
}
|
||||
|
||||
template <int AlignmentType = Alignment>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
|
||||
// whole method makes column major assumption
|
||||
|
||||
// don't need to add offsets for now (because operator handles that)
|
||||
const Index half_packet_size = unpacket_traits<HalfPacket>::size;
|
||||
if (half_packet_size == packet_size) {
|
||||
return loadPacket<AlignmentType>(i, j);
|
||||
}
|
||||
EIGEN_ALIGN_MAX Scalar data[half_packet_size];
|
||||
for (Index k = 0; k < half_packet_size; k++) {
|
||||
data[k] = operator()(i + k, j);
|
||||
}
|
||||
return pload<HalfPacket>(data);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename Scalar, typename Index, int side,
|
||||
typename Tensor,
|
||||
typename nocontract_t, typename contract_t,
|
||||
bool inner_dim_contiguous,
|
||||
bool inner_dim_reordered, int Alignment>
|
||||
class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment>
|
||||
{
|
||||
public:
|
||||
typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment> ParentMapper;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
BaseTensorContractionMapper(const Tensor& tensor,
|
||||
const nocontract_t& nocontract_strides,
|
||||
const nocontract_t& ij_strides,
|
||||
const contract_t& contract_strides,
|
||||
const contract_t& k_strides) :
|
||||
ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
|
||||
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
template <int> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
|
||||
EIGEN_ALIGN_MAX Scalar data[1];
|
||||
data[0] = this->m_tensor.coeff(this->computeIndex(i, j));
|
||||
return pload<typename packet_traits<Scalar>::type>(data);
|
||||
}
|
||||
template <int> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const {
|
||||
return loadPacket(i, j);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename Scalar, typename Index, int side,
|
||||
typename Tensor,
|
||||
typename nocontract_t, typename contract_t,
|
||||
int packet_size,
|
||||
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
|
||||
class TensorContractionSubMapper {
|
||||
public:
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
typedef typename packet_traits<Scalar>::half HalfPacket;
|
||||
|
||||
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper;
|
||||
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self;
|
||||
typedef Self LinearMapper;
|
||||
|
||||
enum {
|
||||
// We can use direct offsets iff the parent mapper supports then and we can compute the strides.
|
||||
// TODO: we should also enable direct offsets for the Rhs case.
|
||||
UseDirectOffsets = (side == Lhs) && inner_dim_contiguous && ParentMapper::DirectOffsets
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)
|
||||
: m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) {
|
||||
// Bake the offsets into the buffer used by the base mapper whenever possible. This avoids the need to recompute
|
||||
// this offset every time we attempt to access a coefficient.
|
||||
if (UseDirectOffsets) {
|
||||
Index stride = m_base_mapper.stride();
|
||||
m_base_mapper.offsetBuffer(vert_offset + horiz_offset * stride);
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
|
||||
if (UseDirectOffsets) {
|
||||
return m_base_mapper(i, 0);
|
||||
}
|
||||
return m_base_mapper(i + m_vert_offset, m_horiz_offset);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const {
|
||||
if (UseDirectOffsets) {
|
||||
return m_base_mapper(i, j);
|
||||
}
|
||||
return m_base_mapper(i + m_vert_offset, j + m_horiz_offset);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
|
||||
if (UseDirectOffsets) {
|
||||
return m_base_mapper.template loadPacket<Alignment>(i, 0);
|
||||
}
|
||||
return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, m_horiz_offset);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
|
||||
if (UseDirectOffsets) {
|
||||
return m_base_mapper.template loadPacket<Alignment>(i, j);
|
||||
}
|
||||
return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, j + m_horiz_offset);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
|
||||
if (UseDirectOffsets) {
|
||||
return m_base_mapper.template loadHalfPacket<Alignment>(i, 0);
|
||||
}
|
||||
return m_base_mapper.template loadHalfPacket<Alignment>(i + m_vert_offset, m_horiz_offset);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const {
|
||||
if (UseDirectOffsets) {
|
||||
m_base_mapper.storePacket(i, 0, p);
|
||||
}
|
||||
m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
|
||||
if (UseDirectOffsets) {
|
||||
return LinearMapper(m_base_mapper, i, j);
|
||||
}
|
||||
return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
|
||||
}
|
||||
|
||||
template <typename PacketT, int AlignmentType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const {
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<PacketT, Packet>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
const int ActualAlignment = (AlignmentType == Aligned) && (Alignment == Aligned) ? Aligned : Unaligned;
|
||||
if (UseDirectOffsets) {
|
||||
return m_base_mapper.template loadPacket<ActualAlignment>(i, 0);
|
||||
}
|
||||
return m_base_mapper.template loadPacket<ActualAlignment>(i + m_vert_offset, m_horiz_offset);
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool aligned(Index) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
ParentMapper m_base_mapper;
|
||||
const Index m_vert_offset;
|
||||
const Index m_horiz_offset;
|
||||
};
|
||||
|
||||
|
||||
template<typename Scalar, typename Index, int side,
|
||||
typename Tensor,
|
||||
typename nocontract_t, typename contract_t,
|
||||
int packet_size,
|
||||
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
|
||||
class TensorContractionInputMapper
|
||||
: public BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> {
|
||||
|
||||
public:
|
||||
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base;
|
||||
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
|
||||
typedef SubMapper VectorMapper;
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor,
|
||||
const nocontract_t& nocontract_strides,
|
||||
const nocontract_t& ij_strides,
|
||||
const contract_t& contract_strides,
|
||||
const contract_t& k_strides)
|
||||
: Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const {
|
||||
return SubMapper(*this, i, j);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
|
||||
return VectorMapper(*this, i, j);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
|
@ -162,6 +162,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess && internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -306,6 +306,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & TensorEvaluator<KernelArgType, Device>::PacketAccess,
|
||||
Layout = TensorEvaluator<InputArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -752,6 +753,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
PacketAccess = false,
|
||||
Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device)
|
||||
|
@ -95,6 +95,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
||||
BlockAccess = false,
|
||||
Layout = TensorEvaluator<XprType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device)
|
||||
@ -250,6 +251,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
||||
BlockAccess = false,
|
||||
Layout = TensorEvaluator<LhsXprType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -10,7 +10,6 @@
|
||||
#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H)
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H
|
||||
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// This defines an interface that GPUDevice can take to use
|
||||
@ -206,20 +205,45 @@ struct GpuDevice {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int getNumCudaMultiProcessors() const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
|
||||
#ifndef __CUDA_ARCH__
|
||||
return stream_->deviceProperties().multiProcessorCount;
|
||||
#else
|
||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
inline int maxCudaThreadsPerBlock() const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
|
||||
#ifndef __CUDA_ARCH__
|
||||
return stream_->deviceProperties().maxThreadsPerBlock;
|
||||
#else
|
||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
inline int maxCudaThreadsPerMultiProcessor() const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
|
||||
#ifndef __CUDA_ARCH__
|
||||
return stream_->deviceProperties().maxThreadsPerMultiProcessor;
|
||||
#else
|
||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
inline int sharedMemPerBlock() const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
|
||||
#ifndef __CUDA_ARCH__
|
||||
return stream_->deviceProperties().sharedMemPerBlock;
|
||||
#else
|
||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
inline int majorDeviceVersion() const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||
#ifndef __CUDA_ARCH__
|
||||
return stream_->deviceProperties().major;
|
||||
#else
|
||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// This function checks if the CUDA runtime recorded an error for the
|
||||
@ -239,23 +263,29 @@ struct GpuDevice {
|
||||
};
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \
|
||||
(kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \
|
||||
#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \
|
||||
(kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \
|
||||
assert(cudaGetLastError() == cudaSuccess);
|
||||
#else
|
||||
#define LAUNCH_CUDA_KERNEL(...) \
|
||||
eigen_assert(false && "Cannot launch a kernel from another kernel");
|
||||
#define LAUNCH_CUDA_KERNEL(kernel, ...) \
|
||||
{ const auto __attribute__((__unused__)) __makeTheKernelInstantiate = &(kernel); } \
|
||||
eigen_assert(false && "Cannot launch a kernel from another kernel" __CUDA_ARCH__);
|
||||
#endif
|
||||
|
||||
|
||||
// FIXME: Should be device and kernel specific.
|
||||
#ifdef __CUDACC__
|
||||
static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
|
||||
static EIGEN_DEVICE_FUNC inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
|
||||
#ifndef __CUDA_ARCH__
|
||||
cudaError_t status = cudaDeviceSetSharedMemConfig(config);
|
||||
EIGEN_UNUSED_VARIABLE(status)
|
||||
assert(status == cudaSuccess);
|
||||
#else
|
||||
EIGEN_UNUSED_VARIABLE(config)
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H
|
||||
|
@ -110,14 +110,14 @@ struct Sizes : internal::numeric_list<std::ptrdiff_t, Indices...> {
|
||||
return internal::arg_prod(Indices...);
|
||||
}
|
||||
|
||||
Sizes() { }
|
||||
EIGEN_DEVICE_FUNC Sizes() { }
|
||||
template <typename DenseIndex>
|
||||
explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
|
||||
explicit EIGEN_DEVICE_FUNC Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
|
||||
// todo: add assertion
|
||||
}
|
||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template <typename... DenseIndex> Sizes(DenseIndex...) { }
|
||||
explicit Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
|
||||
template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { }
|
||||
explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
|
||||
// todo: add assertion
|
||||
}
|
||||
#endif
|
||||
@ -289,7 +289,7 @@ struct DSizes : array<DenseIndex, NumDims> {
|
||||
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, IndexTypes... otherDimensions) {
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
(*this) = array<DenseIndex, NumDims>{{firstDimension, otherDimensions...}};
|
||||
(*this) = array<DenseIndex, NumDims>{firstDimension, otherDimensions...};
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0) {
|
||||
@ -405,20 +405,20 @@ template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::si
|
||||
|
||||
template <typename Dims1, typename Dims2, size_t n, size_t m>
|
||||
struct sizes_match_below_dim {
|
||||
static inline bool run(Dims1&, Dims2&) {
|
||||
static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename Dims1, typename Dims2, size_t n>
|
||||
struct sizes_match_below_dim<Dims1, Dims2, n, n> {
|
||||
static inline bool run(Dims1& dims1, Dims2& dims2) {
|
||||
static EIGEN_DEVICE_FUNC inline bool run(Dims1& dims1, Dims2& dims2) {
|
||||
return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &
|
||||
sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2);
|
||||
}
|
||||
};
|
||||
template <typename Dims1, typename Dims2>
|
||||
struct sizes_match_below_dim<Dims1, Dims2, 0, 0> {
|
||||
static inline bool run(Dims1&, Dims2&) {
|
||||
static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
@ -427,7 +427,7 @@ struct sizes_match_below_dim<Dims1, Dims2, 0, 0> {
|
||||
|
||||
|
||||
template <typename Dims1, typename Dims2>
|
||||
bool dimensions_match(Dims1& dims1, Dims2& dims2) {
|
||||
EIGEN_DEVICE_FUNC bool dimensions_match(Dims1& dims1, Dims2& dims2) {
|
||||
return internal::sizes_match_below_dim<Dims1, Dims2, internal::array_size<Dims1>::value, internal::array_size<Dims2>::value>::run(dims1, dims2);
|
||||
}
|
||||
|
||||
|
@ -98,6 +98,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
|
||||
PacketAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -140,7 +141,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
|
||||
return internal::ploadt<Packet, LoadMode>(m_buffer + index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_buffer; }
|
||||
|
||||
private:
|
||||
TensorEvaluator<ArgType, Device> m_impl;
|
||||
|
@ -43,6 +43,7 @@ struct TensorEvaluator
|
||||
PacketAccess = Derived::PacketAccess,
|
||||
Layout = Derived::Layout,
|
||||
CoordAccess = NumCoords > 0,
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
|
||||
@ -148,6 +149,7 @@ struct TensorEvaluator<const Derived, Device>
|
||||
PacketAccess = Derived::PacketAccess,
|
||||
Layout = Derived::Layout,
|
||||
CoordAccess = NumCoords > 0,
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
|
||||
@ -207,6 +209,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
|
||||
PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@ -257,6 +260,7 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess & internal::functor_traits<UnaryOp>::PacketAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -312,6 +316,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
internal::functor_traits<BinaryOp>::PacketAccess,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -378,6 +383,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
internal::packet_traits<Scalar>::HasBlend,
|
||||
Layout = TensorEvaluator<IfArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -156,14 +156,14 @@ template <typename Expression>
|
||||
class TensorExecutor<Expression, GpuDevice, false> {
|
||||
public:
|
||||
typedef typename Expression::Index Index;
|
||||
EIGEN_DEVICE_FUNC static void run(const Expression& expr, const GpuDevice& device);
|
||||
static EIGEN_DEVICE_FUNC void run(const Expression& expr, const GpuDevice& device);
|
||||
};
|
||||
|
||||
template <typename Expression>
|
||||
class TensorExecutor<Expression, GpuDevice, true> {
|
||||
public:
|
||||
typedef typename Expression::Index Index;
|
||||
EIGEN_DEVICE_FUNC static void run(const Expression& expr, const GpuDevice& device);
|
||||
static EIGEN_DEVICE_FUNC void run(const Expression& expr, const GpuDevice& device);
|
||||
};
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
@ -215,7 +215,6 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
|
||||
template <typename Expression>
|
||||
EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression& expr, const GpuDevice& device)
|
||||
{
|
||||
#ifndef __CUDA_ARCH__
|
||||
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
|
||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||
if (needs_assign)
|
||||
@ -228,9 +227,6 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(
|
||||
LAUNCH_CUDA_KERNEL((EigenMetaKernel_NonVectorizable<TensorEvaluator<Expression, GpuDevice>, Index>), num_blocks, block_size, 0, device, evaluator, size);
|
||||
}
|
||||
evaluator.cleanup();
|
||||
#else
|
||||
eigen_assert(false && "Cannot launch a kernel from another kernel");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -238,7 +234,6 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(
|
||||
template<typename Expression>
|
||||
EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(const Expression& expr, const GpuDevice& device)
|
||||
{
|
||||
#ifndef __CUDA_ARCH__
|
||||
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
|
||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||
if (needs_assign)
|
||||
@ -251,9 +246,6 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(c
|
||||
LAUNCH_CUDA_KERNEL((EigenMetaKernel_Vectorizable<TensorEvaluator<Expression, GpuDevice>, Index>), num_blocks, block_size, 0, device, evaluator, size);
|
||||
}
|
||||
evaluator.cleanup();
|
||||
#else
|
||||
eigen_assert(false && "Cannot launch a kernel from another kernel");
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // __CUDACC__
|
||||
|
@ -135,6 +135,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
||||
BlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) {
|
||||
|
@ -44,7 +44,8 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
Layout = Options_ & RowMajor ? RowMajor : ColMajor,
|
||||
CoordAccess = true,
|
||||
};
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
typedef Dimensions_ Dimensions;
|
||||
static const std::size_t NumIndices = Dimensions::count;
|
||||
@ -53,7 +54,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
TensorStorage<Scalar, Dimensions, Options> m_storage;
|
||||
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); }
|
||||
@ -72,7 +73,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
{
|
||||
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return coeff(array<Index, NumIndices>{{firstIndex, otherIndices...}});
|
||||
return coeff(array<Index, NumIndices>{firstIndex, otherIndices...});
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -104,7 +105,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
{
|
||||
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return coeffRef(array<Index, NumIndices>{{firstIndex, otherIndices...}});
|
||||
return coeffRef(array<Index, NumIndices>{firstIndex, otherIndices...});
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -136,7 +137,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
{
|
||||
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
|
||||
return this->operator()(array<Index, NumIndices>{firstIndex, otherIndices...});
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -175,7 +176,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
{
|
||||
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
|
||||
return operator()(array<Index, NumIndices>{firstIndex, otherIndices...});
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -92,6 +92,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
||||
IsAligned = true,
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = true
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -95,6 +95,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
BlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user