mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-02-23 18:20:47 +08:00
Merge.
This commit is contained in:
commit
093f2b3c01
@ -76,32 +76,6 @@ public:
|
||||
#endif
|
||||
};
|
||||
|
||||
// template<typename Lhs, typename Rhs> struct product_tag
|
||||
// {
|
||||
// private:
|
||||
//
|
||||
// typedef typename remove_all<Lhs>::type _Lhs;
|
||||
// typedef typename remove_all<Rhs>::type _Rhs;
|
||||
// enum {
|
||||
// Rows = _Lhs::RowsAtCompileTime,
|
||||
// Cols = _Rhs::ColsAtCompileTime,
|
||||
// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime)
|
||||
// };
|
||||
//
|
||||
// enum {
|
||||
// rows_select = Rows==1 ? int(Rows) : int(Large),
|
||||
// cols_select = Cols==1 ? int(Cols) : int(Large),
|
||||
// depth_select = Depth==1 ? int(Depth) : int(Large)
|
||||
// };
|
||||
// typedef product_type_selector<rows_select, cols_select, depth_select> selector;
|
||||
//
|
||||
// public:
|
||||
// enum {
|
||||
// ret = selector::ret
|
||||
// };
|
||||
//
|
||||
// };
|
||||
|
||||
/* The following allows to select the kind of product at compile time
|
||||
* based on the three dimensions of the product.
|
||||
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
|
||||
@ -125,8 +99,8 @@ template<> struct product_type_selector<Small,Small,Large> { enum
|
||||
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
|
||||
|
||||
} // end namespace internal
|
||||
|
@ -134,6 +134,11 @@ pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
|
||||
return static_cast<TgtPacket>(a);
|
||||
}
|
||||
|
||||
template <typename SrcPacket, typename TgtPacket>
|
||||
EIGEN_DEVICE_FUNC inline TgtPacket
|
||||
pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
|
||||
return static_cast<TgtPacket>(a);
|
||||
}
|
||||
|
||||
/** \internal \returns a + b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
|
@ -134,7 +134,24 @@ struct lgamma_impl<double> {
|
||||
* Implementation of digamma (psi) *
|
||||
****************************************************************************/
|
||||
|
||||
#ifdef EIGEN_HAS_C99_MATH
|
||||
template <typename Scalar>
|
||||
struct digamma_retval {
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
#ifndef EIGEN_HAS_C99_MATH
|
||||
|
||||
template <typename Scalar>
|
||||
struct digamma_impl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static Scalar run(Scalar x) {
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
*
|
||||
@ -202,14 +219,6 @@ struct digamma_impl_maybe_poly<double> {
|
||||
}
|
||||
};
|
||||
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
template <typename Scalar>
|
||||
struct digamma_retval {
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
#ifdef EIGEN_HAS_C99_MATH
|
||||
template <typename Scalar>
|
||||
struct digamma_impl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
|
@ -178,7 +178,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
|
||||
// We also include a register-level block of the result (mx x nr).
|
||||
// (In an ideal world only the lhs panel would stay in L1)
|
||||
// Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of:
|
||||
const Index max_kc = ((l1-k_sub)/k_div) & (~(k_peeling-1));
|
||||
const Index max_kc = std::max<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);
|
||||
const Index old_k = k;
|
||||
if(k>max_kc)
|
||||
{
|
||||
|
@ -20,6 +20,7 @@
|
||||
#pragma warning( push )
|
||||
#endif
|
||||
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800)
|
||||
|
||||
#elif defined __INTEL_COMPILER
|
||||
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
|
||||
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
|
||||
@ -32,6 +33,7 @@
|
||||
#pragma warning push
|
||||
#endif
|
||||
#pragma warning disable 2196 279 1684 2259
|
||||
|
||||
#elif defined __clang__
|
||||
// -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
|
||||
// this is really a stupid warning as it warns on compile-time expressions involving enums
|
||||
@ -39,6 +41,17 @@
|
||||
#pragma clang diagnostic push
|
||||
#endif
|
||||
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
|
||||
|
||||
#elif defined __NVCC__
|
||||
// Disable the "statement is unreachable" message
|
||||
#pragma diag_suppress code_is_unreachable
|
||||
// Disable the "dynamic initialization in unreachable code" message
|
||||
#pragma diag_suppress initialization_not_reachable
|
||||
// Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are 4 of them)
|
||||
#pragma diag_suppress 2651
|
||||
#pragma diag_suppress 2653
|
||||
#pragma diag_suppress 2668
|
||||
#pragma diag_suppress 2670
|
||||
#endif
|
||||
|
||||
#endif // not EIGEN_WARNINGS_DISABLED
|
||||
|
@ -94,10 +94,6 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
|
||||
template<typename Decomposition, typename Rhstype> class Solve;
|
||||
template<typename XprType> class Inverse;
|
||||
|
||||
namespace internal {
|
||||
template<typename Lhs, typename Rhs> struct product_tag;
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
|
||||
|
||||
template<typename Derived> class DiagonalBase;
|
||||
|
@ -8,6 +8,14 @@
|
||||
#pragma warning pop
|
||||
#elif defined __clang__
|
||||
#pragma clang diagnostic pop
|
||||
#elif defined __NVCC__
|
||||
// Don't reenable the diagnostic messages, as it turns out these messages need
|
||||
// to be disabled at the point of the template instantiation (i.e the user code)
|
||||
// otherwise they'll be triggeredby nvcc.
|
||||
// #pragma diag_default code_is_unreachable
|
||||
// #pragma diag_default initialization_not_reachable
|
||||
// #pragma diag_default 2651
|
||||
// #pragma diag_default 2653
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -73,8 +73,15 @@ public:
|
||||
Index m_outerStart;
|
||||
const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
|
||||
|
||||
public:
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
|
||||
protected:
|
||||
// Disable assignment with clear error message.
|
||||
// Note that simply removing operator= yields compilation errors with ICC+MSVC
|
||||
template<typename T>
|
||||
BlockImpl& operator=(const T&)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -424,8 +431,6 @@ public:
|
||||
friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
|
||||
|
||||
Index nonZeros() const { return Dynamic; }
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
|
||||
|
||||
typename internal::ref_selector<XprType>::non_const_type m_matrix;
|
||||
const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
|
||||
@ -433,6 +438,16 @@ public:
|
||||
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
|
||||
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
|
||||
|
||||
protected:
|
||||
// Disable assignment with clear error message.
|
||||
// Note that simply removing operator= yields compilation errors with ICC+MSVC
|
||||
template<typename T>
|
||||
BlockImpl& operator=(const T&)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
|
||||
return *this;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
@ -13,32 +13,24 @@
|
||||
|
||||
#include "details.h"
|
||||
|
||||
// Define the explicit instantiation (e.g. necessary for the Intel compiler)
|
||||
#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC
|
||||
#define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...) template class std::deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >;
|
||||
#else
|
||||
#define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* This section contains a convenience MACRO which allows an easy specialization of
|
||||
* std::deque such that for data types with alignment issues the correct allocator
|
||||
* is used automatically.
|
||||
*/
|
||||
#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) \
|
||||
EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(__VA_ARGS__) \
|
||||
namespace std \
|
||||
{ \
|
||||
template<typename _Ay> \
|
||||
class deque<__VA_ARGS__, _Ay> \
|
||||
template<> \
|
||||
class deque<__VA_ARGS__, std::allocator<__VA_ARGS__> > \
|
||||
: public deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \
|
||||
{ \
|
||||
typedef deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > deque_base; \
|
||||
public: \
|
||||
typedef __VA_ARGS__ value_type; \
|
||||
typedef typename deque_base::allocator_type allocator_type; \
|
||||
typedef typename deque_base::size_type size_type; \
|
||||
typedef typename deque_base::iterator iterator; \
|
||||
typedef deque_base::allocator_type allocator_type; \
|
||||
typedef deque_base::size_type size_type; \
|
||||
typedef deque_base::iterator iterator; \
|
||||
explicit deque(const allocator_type& a = allocator_type()) : deque_base(a) {} \
|
||||
template<typename InputIterator> \
|
||||
deque(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : deque_base(first, last, a) {} \
|
||||
|
@ -12,32 +12,24 @@
|
||||
|
||||
#include "details.h"
|
||||
|
||||
// Define the explicit instantiation (e.g. necessary for the Intel compiler)
|
||||
#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC
|
||||
#define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...) template class std::list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >;
|
||||
#else
|
||||
#define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* This section contains a convenience MACRO which allows an easy specialization of
|
||||
* std::list such that for data types with alignment issues the correct allocator
|
||||
* is used automatically.
|
||||
*/
|
||||
#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) \
|
||||
EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(__VA_ARGS__) \
|
||||
namespace std \
|
||||
{ \
|
||||
template<typename _Ay> \
|
||||
class list<__VA_ARGS__, _Ay> \
|
||||
template<> \
|
||||
class list<__VA_ARGS__, std::allocator<__VA_ARGS__> > \
|
||||
: public list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \
|
||||
{ \
|
||||
typedef list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > list_base; \
|
||||
public: \
|
||||
typedef __VA_ARGS__ value_type; \
|
||||
typedef typename list_base::allocator_type allocator_type; \
|
||||
typedef typename list_base::size_type size_type; \
|
||||
typedef typename list_base::iterator iterator; \
|
||||
typedef list_base::allocator_type allocator_type; \
|
||||
typedef list_base::size_type size_type; \
|
||||
typedef list_base::iterator iterator; \
|
||||
explicit list(const allocator_type& a = allocator_type()) : list_base(a) {} \
|
||||
template<typename InputIterator> \
|
||||
list(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : list_base(first, last, a) {} \
|
||||
|
@ -21,7 +21,7 @@ i.e either row major or column major. The default is column major. Most arithmet
|
||||
<td> Resize/Reserve</td>
|
||||
<td>
|
||||
\code
|
||||
sm1.resize(m,n); //Change sm1 to a m x n matrix.
|
||||
sm1.resize(m,n); // Change sm1 to a m x n matrix.
|
||||
sm1.reserve(nnz); // Allocate room for nnz nonzeros elements.
|
||||
\endcode
|
||||
</td>
|
||||
@ -151,10 +151,10 @@ It is easy to perform arithmetic operations on sparse matrices provided that the
|
||||
<td> Permutation </td>
|
||||
<td>
|
||||
\code
|
||||
perm.indices(); // Reference to the vector of indices
|
||||
perm.indices(); // Reference to the vector of indices
|
||||
sm1.twistedBy(perm); // Permute rows and columns
|
||||
sm2 = sm1 * perm; //Permute the columns
|
||||
sm2 = perm * sm1; // Permute the columns
|
||||
sm2 = sm1 * perm; // Permute the columns
|
||||
sm2 = perm * sm1; // Permute the columns
|
||||
\endcode
|
||||
</td>
|
||||
<td>
|
||||
@ -181,9 +181,9 @@ sm2 = perm * sm1; // Permute the columns
|
||||
|
||||
\section sparseotherops Other supported operations
|
||||
<table class="manual">
|
||||
<tr><th>Operations</th> <th> Code </th> <th> Notes</th> </tr>
|
||||
<tr><th style="min-width:initial"> Code </th> <th> Notes</th> </tr>
|
||||
<tr><td colspan="2">Sub-matrices</td></tr>
|
||||
<tr>
|
||||
<td>Sub-matrices</td>
|
||||
<td>
|
||||
\code
|
||||
sm1.block(startRow, startCol, rows, cols);
|
||||
@ -193,25 +193,31 @@ sm2 = perm * sm1; // Permute the columns
|
||||
sm1.bottomLeftCorner( rows, cols);
|
||||
sm1.bottomRightCorner( rows, cols);
|
||||
\endcode
|
||||
</td> <td> </td>
|
||||
</td><td>
|
||||
Contrary to dense matrices, here <strong>all these methods are read-only</strong>.\n
|
||||
See \ref TutorialSparse_SubMatrices and below for read-write sub-matrices.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> Range </td>
|
||||
<tr class="alt"><td colspan="2"> Range </td></tr>
|
||||
<tr class="alt">
|
||||
<td>
|
||||
\code
|
||||
sm1.innerVector(outer);
|
||||
sm1.innerVectors(start, size);
|
||||
sm1.leftCols(size);
|
||||
sm2.rightCols(size);
|
||||
sm1.middleRows(start, numRows);
|
||||
sm1.middleCols(start, numCols);
|
||||
sm1.col(j);
|
||||
sm1.innerVector(outer); // RW
|
||||
sm1.innerVectors(start, size); // RW
|
||||
sm1.leftCols(size); // RW
|
||||
sm2.rightCols(size); // RO because sm2 is row-major
|
||||
sm1.middleRows(start, numRows); // RO becasue sm1 is column-major
|
||||
sm1.middleCols(start, numCols); // RW
|
||||
sm1.col(j); // RW
|
||||
\endcode
|
||||
</td>
|
||||
<td>A inner vector is either a row (for row-major) or a column (for column-major). As stated earlier, the evaluation can be done in a matrix with different storage order </td>
|
||||
<td>
|
||||
A inner vector is either a row (for row-major) or a column (for column-major).\n
|
||||
As stated earlier, for a read-write sub-matrix (RW), the evaluation can be done in a matrix with different storage order.
|
||||
</td>
|
||||
</tr>
|
||||
<tr><td colspan="2"> Triangular and selfadjoint views</td></tr>
|
||||
<tr>
|
||||
<td> Triangular and selfadjoint views</td>
|
||||
<td>
|
||||
\code
|
||||
sm2 = sm1.triangularview<Lower>();
|
||||
@ -222,26 +228,30 @@ sm2 = perm * sm1; // Permute the columns
|
||||
\code
|
||||
\endcode </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Triangular solve </td>
|
||||
<tr class="alt"><td colspan="2">Triangular solve </td></tr>
|
||||
<tr class="alt">
|
||||
<td>
|
||||
\code
|
||||
dv2 = sm1.triangularView<Upper>().solve(dv1);
|
||||
dv2 = sm1.topLeftCorner(size, size).triangularView<Lower>().solve(dv1);
|
||||
dv2 = sm1.topLeftCorner(size, size)
|
||||
.triangularView<Lower>().solve(dv1);
|
||||
\endcode
|
||||
</td>
|
||||
<td> For general sparse solve, Use any suitable module described at \ref TopicSparseSystems </td>
|
||||
</tr>
|
||||
<tr><td colspan="2"> Low-level API</td></tr>
|
||||
<tr>
|
||||
<td> Low-level API</td>
|
||||
<td>
|
||||
\code
|
||||
sm1.valuePtr(); // Pointer to the values
|
||||
sm1.innerIndextr(); // Pointer to the indices.
|
||||
sm1.outerIndexPtr(); //Pointer to the beginning of each inner vector
|
||||
sm1.valuePtr(); // Pointer to the values
|
||||
sm1.innerIndextr(); // Pointer to the indices.
|
||||
sm1.outerIndexPtr(); // Pointer to the beginning of each inner vector
|
||||
\endcode
|
||||
</td>
|
||||
<td> If the matrix is not in compressed form, makeCompressed() should be called before. Note that these functions are mostly provided for interoperability purposes with external libraries. A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section</td>
|
||||
<td>
|
||||
If the matrix is not in compressed form, makeCompressed() should be called before.\n
|
||||
Note that these functions are mostly provided for interoperability purposes with external libraries.\n
|
||||
A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section</td>
|
||||
</tr>
|
||||
</table>
|
||||
*/
|
||||
|
@ -241,11 +241,11 @@ In the following \em sm denotes a sparse matrix, \em sv a sparse vector, \em dm
|
||||
sm1.real() sm1.imag() -sm1 0.5*sm1
|
||||
sm1+sm2 sm1-sm2 sm1.cwiseProduct(sm2)
|
||||
\endcode
|
||||
However, a strong restriction is that the storage orders must match. For instance, in the following example:
|
||||
However, <strong>a strong restriction is that the storage orders must match</strong>. For instance, in the following example:
|
||||
\code
|
||||
sm4 = sm1 + sm2 + sm3;
|
||||
\endcode
|
||||
sm1, sm2, and sm3 must all be row-major or all column major.
|
||||
sm1, sm2, and sm3 must all be row-major or all column-major.
|
||||
On the other hand, there is no restriction on the target matrix sm4.
|
||||
For instance, this means that for computing \f$ A^T + A \f$, the matrix \f$ A^T \f$ must be evaluated into a temporary matrix of compatible storage order:
|
||||
\code
|
||||
@ -311,6 +311,26 @@ sm2 = sm1.transpose() * P;
|
||||
\endcode
|
||||
|
||||
|
||||
\subsection TutorialSparse_SubMatrices Block operations
|
||||
|
||||
Regarding read-access, sparse matrices expose the same API than for dense matrices to access to sub-matrices such as blocks, columns, and rows. See \ref TutorialBlockOperations for a detailed introduction.
|
||||
However, for performance reasons, writing to a sub-sparse-matrix is much more limited, and currently only contiguous sets of columns (resp. rows) of a column-major (resp. row-major) SparseMatrix are writable. Moreover, this information has to be known at compile-time, leaving out methods such as <tt>block(...)</tt> and <tt>corner*(...)</tt>. The available API for write-access to a SparseMatrix are summarized below:
|
||||
\code
|
||||
SparseMatrix<double,ColMajor> sm1;
|
||||
sm1.col(j) = ...;
|
||||
sm1.leftCols(ncols) = ...;
|
||||
sm1.middleCols(j,ncols) = ...;
|
||||
sm1.rightCols(ncols) = ...;
|
||||
|
||||
SparseMatrix<double,RowMajor> sm2;
|
||||
sm2.row(i) = ...;
|
||||
sm2.topRows(nrows) = ...;
|
||||
sm2.middleRows(i,nrows) = ...;
|
||||
sm2.bottomRows(nrows) = ...;
|
||||
\endcode
|
||||
|
||||
In addition, sparse matrices expose the SparseMatrixBase::innerVector() and SparseMatrixBase::innerVectors() methods, which are aliases to the col/middleCols methods for a column-major storage, and to the row/middleRows methods for a row-major storage.
|
||||
|
||||
\subsection TutorialSparse_TriangularSelfadjoint Triangular and selfadjoint views
|
||||
|
||||
Just as with dense matrices, the triangularView() function can be used to address a triangular part of the matrix, and perform triangular solves with a dense right hand side:
|
||||
|
@ -226,7 +226,9 @@ ei_add_test(geo_homogeneous)
|
||||
ei_add_test(stdvector)
|
||||
ei_add_test(stdvector_overload)
|
||||
ei_add_test(stdlist)
|
||||
ei_add_test(stdlist_overload)
|
||||
ei_add_test(stddeque)
|
||||
ei_add_test(stddeque_overload)
|
||||
ei_add_test(sparse_basic)
|
||||
ei_add_test(sparse_block)
|
||||
ei_add_test(sparse_vector)
|
||||
|
159
test/stddeque_overload.cpp
Normal file
159
test/stddeque_overload.cpp
Normal file
@ -0,0 +1,159 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "main.h"
|
||||
|
||||
#include <Eigen/StdDeque>
|
||||
#include <Eigen/Geometry>
|
||||
|
||||
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Vector4f)
|
||||
|
||||
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix2f)
|
||||
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix4f)
|
||||
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix4d)
|
||||
|
||||
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Affine3f)
|
||||
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Affine3d)
|
||||
|
||||
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Quaternionf)
|
||||
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Quaterniond)
|
||||
|
||||
template<typename MatrixType>
|
||||
void check_stddeque_matrix(const MatrixType& m)
|
||||
{
|
||||
typename MatrixType::Index rows = m.rows();
|
||||
typename MatrixType::Index cols = m.cols();
|
||||
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
|
||||
std::deque<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
|
||||
v[5] = x;
|
||||
w[6] = v[5];
|
||||
VERIFY_IS_APPROX(w[6], v[5]);
|
||||
v = w;
|
||||
for(int i = 0; i < 20; i++)
|
||||
{
|
||||
VERIFY_IS_APPROX(w[i], v[i]);
|
||||
}
|
||||
|
||||
v.resize(21);
|
||||
v[20] = x;
|
||||
VERIFY_IS_APPROX(v[20], x);
|
||||
v.resize(22,y);
|
||||
VERIFY_IS_APPROX(v[21], y);
|
||||
v.push_back(x);
|
||||
VERIFY_IS_APPROX(v[22], x);
|
||||
VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(MatrixType));
|
||||
|
||||
// do a lot of push_back such that the deque gets internally resized
|
||||
// (with memory reallocation)
|
||||
MatrixType* ref = &w[0];
|
||||
for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i)
|
||||
v.push_back(w[i%w.size()]);
|
||||
for(unsigned int i=23; i<v.size(); ++i)
|
||||
{
|
||||
VERIFY(v[i]==w[(i-23)%w.size()]);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename TransformType>
|
||||
void check_stddeque_transform(const TransformType&)
|
||||
{
|
||||
typedef typename TransformType::MatrixType MatrixType;
|
||||
TransformType x(MatrixType::Random()), y(MatrixType::Random());
|
||||
std::deque<TransformType> v(10), w(20, y);
|
||||
v[5] = x;
|
||||
w[6] = v[5];
|
||||
VERIFY_IS_APPROX(w[6], v[5]);
|
||||
v = w;
|
||||
for(int i = 0; i < 20; i++)
|
||||
{
|
||||
VERIFY_IS_APPROX(w[i], v[i]);
|
||||
}
|
||||
|
||||
v.resize(21);
|
||||
v[20] = x;
|
||||
VERIFY_IS_APPROX(v[20], x);
|
||||
v.resize(22,y);
|
||||
VERIFY_IS_APPROX(v[21], y);
|
||||
v.push_back(x);
|
||||
VERIFY_IS_APPROX(v[22], x);
|
||||
|
||||
// do a lot of push_back such that the deque gets internally resized
|
||||
// (with memory reallocation)
|
||||
TransformType* ref = &w[0];
|
||||
for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i)
|
||||
v.push_back(w[i%w.size()]);
|
||||
for(unsigned int i=23; i<v.size(); ++i)
|
||||
{
|
||||
VERIFY(v[i].matrix()==w[(i-23)%w.size()].matrix());
|
||||
}
|
||||
}
|
||||
|
||||
template<typename QuaternionType>
|
||||
void check_stddeque_quaternion(const QuaternionType&)
|
||||
{
|
||||
typedef typename QuaternionType::Coefficients Coefficients;
|
||||
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
|
||||
std::deque<QuaternionType> v(10), w(20, y);
|
||||
v[5] = x;
|
||||
w[6] = v[5];
|
||||
VERIFY_IS_APPROX(w[6], v[5]);
|
||||
v = w;
|
||||
for(int i = 0; i < 20; i++)
|
||||
{
|
||||
VERIFY_IS_APPROX(w[i], v[i]);
|
||||
}
|
||||
|
||||
v.resize(21);
|
||||
v[20] = x;
|
||||
VERIFY_IS_APPROX(v[20], x);
|
||||
v.resize(22,y);
|
||||
VERIFY_IS_APPROX(v[21], y);
|
||||
v.push_back(x);
|
||||
VERIFY_IS_APPROX(v[22], x);
|
||||
|
||||
// do a lot of push_back such that the deque gets internally resized
|
||||
// (with memory reallocation)
|
||||
QuaternionType* ref = &w[0];
|
||||
for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i)
|
||||
v.push_back(w[i%w.size()]);
|
||||
for(unsigned int i=23; i<v.size(); ++i)
|
||||
{
|
||||
VERIFY(v[i].coeffs()==w[(i-23)%w.size()].coeffs());
|
||||
}
|
||||
}
|
||||
|
||||
void test_stddeque_overload()
|
||||
{
|
||||
// some non vectorizable fixed sizes
|
||||
CALL_SUBTEST_1(check_stddeque_matrix(Vector2f()));
|
||||
CALL_SUBTEST_1(check_stddeque_matrix(Matrix3f()));
|
||||
CALL_SUBTEST_2(check_stddeque_matrix(Matrix3d()));
|
||||
|
||||
// some vectorizable fixed sizes
|
||||
CALL_SUBTEST_1(check_stddeque_matrix(Matrix2f()));
|
||||
CALL_SUBTEST_1(check_stddeque_matrix(Vector4f()));
|
||||
CALL_SUBTEST_1(check_stddeque_matrix(Matrix4f()));
|
||||
CALL_SUBTEST_2(check_stddeque_matrix(Matrix4d()));
|
||||
|
||||
// some dynamic sizes
|
||||
CALL_SUBTEST_3(check_stddeque_matrix(MatrixXd(1,1)));
|
||||
CALL_SUBTEST_3(check_stddeque_matrix(VectorXd(20)));
|
||||
CALL_SUBTEST_3(check_stddeque_matrix(RowVectorXf(20)));
|
||||
CALL_SUBTEST_3(check_stddeque_matrix(MatrixXcf(10,10)));
|
||||
|
||||
// some Transform
|
||||
CALL_SUBTEST_4(check_stddeque_transform(Affine2f())); // does not need the specialization (2+1)^2 = 9
|
||||
CALL_SUBTEST_4(check_stddeque_transform(Affine3f()));
|
||||
CALL_SUBTEST_4(check_stddeque_transform(Affine3d()));
|
||||
|
||||
// some Quaternion
|
||||
CALL_SUBTEST_5(check_stddeque_quaternion(Quaternionf()));
|
||||
CALL_SUBTEST_5(check_stddeque_quaternion(Quaterniond()));
|
||||
}
|
192
test/stdlist_overload.cpp
Normal file
192
test/stdlist_overload.cpp
Normal file
@ -0,0 +1,192 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "main.h"
|
||||
|
||||
#include <Eigen/StdList>
|
||||
#include <Eigen/Geometry>
|
||||
|
||||
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Vector4f)
|
||||
|
||||
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix2f)
|
||||
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix4f)
|
||||
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix4d)
|
||||
|
||||
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Affine3f)
|
||||
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Affine3d)
|
||||
|
||||
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Quaternionf)
|
||||
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Quaterniond)
|
||||
|
||||
template <class Container, class Position>
|
||||
typename Container::iterator get(Container & c, Position position)
|
||||
{
|
||||
typename Container::iterator it = c.begin();
|
||||
std::advance(it, position);
|
||||
return it;
|
||||
}
|
||||
|
||||
template <class Container, class Position, class Value>
|
||||
void set(Container & c, Position position, const Value & value)
|
||||
{
|
||||
typename Container::iterator it = c.begin();
|
||||
std::advance(it, position);
|
||||
*it = value;
|
||||
}
|
||||
|
||||
template<typename MatrixType>
|
||||
void check_stdlist_matrix(const MatrixType& m)
|
||||
{
|
||||
typename MatrixType::Index rows = m.rows();
|
||||
typename MatrixType::Index cols = m.cols();
|
||||
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
|
||||
std::list<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
|
||||
typename std::list<MatrixType>::iterator itv = get(v, 5);
|
||||
typename std::list<MatrixType>::iterator itw = get(w, 6);
|
||||
*itv = x;
|
||||
*itw = *itv;
|
||||
VERIFY_IS_APPROX(*itw, *itv);
|
||||
v = w;
|
||||
itv = v.begin();
|
||||
itw = w.begin();
|
||||
for(int i = 0; i < 20; i++)
|
||||
{
|
||||
VERIFY_IS_APPROX(*itw, *itv);
|
||||
++itv;
|
||||
++itw;
|
||||
}
|
||||
|
||||
v.resize(21);
|
||||
set(v, 20, x);
|
||||
VERIFY_IS_APPROX(*get(v, 20), x);
|
||||
v.resize(22,y);
|
||||
VERIFY_IS_APPROX(*get(v, 21), y);
|
||||
v.push_back(x);
|
||||
VERIFY_IS_APPROX(*get(v, 22), x);
|
||||
|
||||
// do a lot of push_back such that the list gets internally resized
|
||||
// (with memory reallocation)
|
||||
MatrixType* ref = &(*get(w, 0));
|
||||
for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i)
|
||||
v.push_back(*get(w, i%w.size()));
|
||||
for(unsigned int i=23; i<v.size(); ++i)
|
||||
{
|
||||
VERIFY((*get(v, i))==(*get(w, (i-23)%w.size())));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename TransformType>
|
||||
void check_stdlist_transform(const TransformType&)
|
||||
{
|
||||
typedef typename TransformType::MatrixType MatrixType;
|
||||
TransformType x(MatrixType::Random()), y(MatrixType::Random());
|
||||
std::list<TransformType> v(10), w(20, y);
|
||||
typename std::list<TransformType>::iterator itv = get(v, 5);
|
||||
typename std::list<TransformType>::iterator itw = get(w, 6);
|
||||
*itv = x;
|
||||
*itw = *itv;
|
||||
VERIFY_IS_APPROX(*itw, *itv);
|
||||
v = w;
|
||||
itv = v.begin();
|
||||
itw = w.begin();
|
||||
for(int i = 0; i < 20; i++)
|
||||
{
|
||||
VERIFY_IS_APPROX(*itw, *itv);
|
||||
++itv;
|
||||
++itw;
|
||||
}
|
||||
|
||||
v.resize(21);
|
||||
set(v, 20, x);
|
||||
VERIFY_IS_APPROX(*get(v, 20), x);
|
||||
v.resize(22,y);
|
||||
VERIFY_IS_APPROX(*get(v, 21), y);
|
||||
v.push_back(x);
|
||||
VERIFY_IS_APPROX(*get(v, 22), x);
|
||||
|
||||
// do a lot of push_back such that the list gets internally resized
|
||||
// (with memory reallocation)
|
||||
TransformType* ref = &(*get(w, 0));
|
||||
for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i)
|
||||
v.push_back(*get(w, i%w.size()));
|
||||
for(unsigned int i=23; i<v.size(); ++i)
|
||||
{
|
||||
VERIFY(get(v, i)->matrix()==get(w, (i-23)%w.size())->matrix());
|
||||
}
|
||||
}
|
||||
|
||||
template<typename QuaternionType>
|
||||
void check_stdlist_quaternion(const QuaternionType&)
|
||||
{
|
||||
typedef typename QuaternionType::Coefficients Coefficients;
|
||||
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
|
||||
std::list<QuaternionType> v(10), w(20, y);
|
||||
typename std::list<QuaternionType>::iterator itv = get(v, 5);
|
||||
typename std::list<QuaternionType>::iterator itw = get(w, 6);
|
||||
*itv = x;
|
||||
*itw = *itv;
|
||||
VERIFY_IS_APPROX(*itw, *itv);
|
||||
v = w;
|
||||
itv = v.begin();
|
||||
itw = w.begin();
|
||||
for(int i = 0; i < 20; i++)
|
||||
{
|
||||
VERIFY_IS_APPROX(*itw, *itv);
|
||||
++itv;
|
||||
++itw;
|
||||
}
|
||||
|
||||
v.resize(21);
|
||||
set(v, 20, x);
|
||||
VERIFY_IS_APPROX(*get(v, 20), x);
|
||||
v.resize(22,y);
|
||||
VERIFY_IS_APPROX(*get(v, 21), y);
|
||||
v.push_back(x);
|
||||
VERIFY_IS_APPROX(*get(v, 22), x);
|
||||
|
||||
// do a lot of push_back such that the list gets internally resized
|
||||
// (with memory reallocation)
|
||||
QuaternionType* ref = &(*get(w, 0));
|
||||
for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i)
|
||||
v.push_back(*get(w, i%w.size()));
|
||||
for(unsigned int i=23; i<v.size(); ++i)
|
||||
{
|
||||
VERIFY(get(v, i)->coeffs()==get(w, (i-23)%w.size())->coeffs());
|
||||
}
|
||||
}
|
||||
|
||||
void test_stdlist_overload()
|
||||
{
|
||||
// some non vectorizable fixed sizes
|
||||
CALL_SUBTEST_1(check_stdlist_matrix(Vector2f()));
|
||||
CALL_SUBTEST_1(check_stdlist_matrix(Matrix3f()));
|
||||
CALL_SUBTEST_2(check_stdlist_matrix(Matrix3d()));
|
||||
|
||||
// some vectorizable fixed sizes
|
||||
CALL_SUBTEST_1(check_stdlist_matrix(Matrix2f()));
|
||||
CALL_SUBTEST_1(check_stdlist_matrix(Vector4f()));
|
||||
CALL_SUBTEST_1(check_stdlist_matrix(Matrix4f()));
|
||||
CALL_SUBTEST_2(check_stdlist_matrix(Matrix4d()));
|
||||
|
||||
// some dynamic sizes
|
||||
CALL_SUBTEST_3(check_stdlist_matrix(MatrixXd(1,1)));
|
||||
CALL_SUBTEST_3(check_stdlist_matrix(VectorXd(20)));
|
||||
CALL_SUBTEST_3(check_stdlist_matrix(RowVectorXf(20)));
|
||||
CALL_SUBTEST_3(check_stdlist_matrix(MatrixXcf(10,10)));
|
||||
|
||||
// some Transform
|
||||
CALL_SUBTEST_4(check_stdlist_transform(Affine2f())); // does not need the specialization (2+1)^2 = 9
|
||||
CALL_SUBTEST_4(check_stdlist_transform(Affine3f()));
|
||||
CALL_SUBTEST_4(check_stdlist_transform(Affine3d()));
|
||||
|
||||
// some Quaternion
|
||||
CALL_SUBTEST_5(check_stdlist_quaternion(Quaternionf()));
|
||||
CALL_SUBTEST_5(check_stdlist_quaternion(Quaterniond()));
|
||||
}
|
@ -165,10 +165,10 @@ template <typename T> class array<T, 0> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE array() { }
|
||||
EIGEN_STRONG_INLINE array() : dummy() { }
|
||||
|
||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) {
|
||||
EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) : dummy() {
|
||||
eigen_assert(l.size() == 0);
|
||||
}
|
||||
#endif
|
||||
|
@ -344,7 +344,7 @@ class TensorContractionSubMapper {
|
||||
enum {
|
||||
// We can use direct offsets iff the parent mapper supports then and we can compute the strides.
|
||||
// TODO: we should also enable direct offsets for the Rhs case.
|
||||
UseDirectOffsets = (side == Lhs) && inner_dim_contiguous && ParentMapper::DirectOffsets
|
||||
UseDirectOffsets = ParentMapper::DirectOffsets && (side == Lhs) && inner_dim_contiguous && (array_size<contract_t>::value > 0)
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)
|
||||
|
@ -86,6 +86,26 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
|
||||
const TensorEvaluator& m_impl;
|
||||
};
|
||||
|
||||
template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
|
||||
struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
|
||||
PacketConverter(const TensorEvaluator& impl)
|
||||
: m_impl(impl) {}
|
||||
|
||||
template<int LoadMode, typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
|
||||
const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
||||
|
||||
SrcPacket src1 = m_impl.template packet<LoadMode>(index);
|
||||
SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
|
||||
SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
|
||||
SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
|
||||
TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
const TensorEvaluator& m_impl;
|
||||
};
|
||||
|
||||
template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
|
||||
struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
|
||||
|
@ -109,10 +109,12 @@ class CudaStreamDevice : public StreamInterface {
|
||||
struct GpuDevice {
|
||||
// The StreamInterface is not owned: the caller is
|
||||
// responsible for its initialization and eventual destruction.
|
||||
explicit GpuDevice(const StreamInterface* stream) : stream_(stream) {
|
||||
explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) {
|
||||
eigen_assert(stream);
|
||||
}
|
||||
explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) {
|
||||
eigen_assert(stream);
|
||||
}
|
||||
|
||||
// TODO(bsteiner): This is an internal API, we should not expose it.
|
||||
EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
|
||||
return stream_->stream();
|
||||
@ -246,6 +248,10 @@ struct GpuDevice {
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
|
||||
return max_blocks_;
|
||||
}
|
||||
|
||||
// This function checks if the CUDA runtime recorded an error for the
|
||||
// underlying stream device.
|
||||
inline bool ok() const {
|
||||
@ -259,7 +265,7 @@ struct GpuDevice {
|
||||
|
||||
private:
|
||||
const StreamInterface* stream_;
|
||||
|
||||
int max_blocks_;
|
||||
};
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
|
@ -220,7 +220,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(
|
||||
if (needs_assign)
|
||||
{
|
||||
const int block_size = device.maxCudaThreadsPerBlock();
|
||||
const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size;
|
||||
const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
|
||||
const Index size = array_prod(evaluator.dimensions());
|
||||
// Create a least one block to ensure we won't crash if we're called with tensors of size 0.
|
||||
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
|
||||
@ -239,7 +239,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(c
|
||||
if (needs_assign)
|
||||
{
|
||||
const int block_size = device.maxCudaThreadsPerBlock();
|
||||
const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size;
|
||||
const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
|
||||
const Index size = array_prod(evaluator.dimensions());
|
||||
// Create a least one block to ensure we won't crash if we're called with tensors of size 0.
|
||||
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
|
||||
|
@ -33,18 +33,19 @@ struct TensorUInt128
|
||||
HIGH high;
|
||||
LOW low;
|
||||
|
||||
template<typename OTHER_HIGH, typename OTHER_LOW>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
TensorUInt128(int32_t x) : high(0), low(x) {
|
||||
TensorUInt128(const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) : high(other.high), low(other.low) {
|
||||
EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), "high too wide");
|
||||
EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), "low too wide");
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
explicit TensorUInt128(const T& x) : high(0), low(x) {
|
||||
eigen_assert(x >= 0);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
TensorUInt128(uint32_t x) : high(0), low(x) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
TensorUInt128(int64_t x) : high(0), low(x) {
|
||||
eigen_assert(x >= 0);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
TensorUInt128(uint64_t x) : high(0), low(x) { }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
TensorUInt128(uint64_t y, uint64_t x) : high(y), low(x) { }
|
||||
|
||||
|
@ -158,7 +158,7 @@ if(CUDA_FOUND)
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
|
||||
endif()
|
||||
set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_30")
|
||||
set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_30 -Xcudafe \"--display_error_number\"")
|
||||
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
|
||||
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
||||
|
||||
|
@ -22,16 +22,16 @@ using Eigen::Tensor;
|
||||
typedef Tensor<float, 1>::DimensionPair DimPair;
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_cuda_contraction(int m_size, int k_size, int n_size)
|
||||
void test_cuda_contraction(int m_size, int k_size, int n_size)
|
||||
{
|
||||
std::cout << "Calling with (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
|
||||
std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
|
||||
// with these dimensions, the output has 300 * 140 elements, which is
|
||||
// more than 30 * 1024, which is the number of threads in blocks on
|
||||
// a 15 SM GK110 GPU
|
||||
Tensor<float, 2, DataLayout> t_left(Eigen::array<int, 2>(m_size, k_size));
|
||||
Tensor<float, 2, DataLayout> t_right(Eigen::array<int, 2>(k_size, n_size));
|
||||
Tensor<float, 2, DataLayout> t_result(Eigen::array<int, 2>(m_size, n_size));
|
||||
Tensor<float, 2, DataLayout> t_result_gpu(Eigen::array<int, 2>(m_size, n_size));
|
||||
Tensor<float, 2, DataLayout> t_left(m_size, k_size);
|
||||
Tensor<float, 2, DataLayout> t_right(k_size, n_size);
|
||||
Tensor<float, 2, DataLayout> t_result(m_size, n_size);
|
||||
Tensor<float, 2, DataLayout> t_result_gpu(m_size, n_size);
|
||||
Eigen::array<DimPair, 1> dims(DimPair(1, 0));
|
||||
|
||||
t_left.setRandom();
|
||||
@ -84,41 +84,69 @@ static void test_cuda_contraction(int m_size, int k_size, int n_size)
|
||||
cudaFree((void*)d_t_result);
|
||||
}
|
||||
|
||||
template<int DataLayout>
|
||||
void test_cuda_contraction_m() {
|
||||
for (int k = 32; k < 256; k++) {
|
||||
test_cuda_contraction<ColMajor>(k, 128, 128);
|
||||
test_cuda_contraction<RowMajor>(k, 128, 128);
|
||||
}
|
||||
}
|
||||
|
||||
template<int DataLayout>
|
||||
void test_cuda_contraction_k() {
|
||||
for (int k = 32; k < 256; k++) {
|
||||
test_cuda_contraction<ColMajor>(128, k, 128);
|
||||
test_cuda_contraction<RowMajor>(128, k, 128);
|
||||
}
|
||||
}
|
||||
|
||||
template<int DataLayout>
|
||||
void test_cuda_contraction_n() {
|
||||
for (int k = 32; k < 256; k++) {
|
||||
test_cuda_contraction<ColMajor>(128, 128, k);
|
||||
test_cuda_contraction<RowMajor>(128, 128, k);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<int DataLayout>
|
||||
void test_cuda_contraction_sizes() {
|
||||
int m_sizes[] = { 31, 39, 63, 64, 65,
|
||||
127, 129, 255, 257 , 511,
|
||||
512, 513, 1023, 1024, 1025};
|
||||
|
||||
int n_sizes[] = { 31, 39, 63, 64, 65,
|
||||
127, 129, 255, 257, 511,
|
||||
512, 513, 1023, 1024, 1025};
|
||||
|
||||
int k_sizes[] = { 31, 39, 63, 64, 65,
|
||||
95, 96, 127, 129, 255,
|
||||
257, 511, 512, 513, 1023,
|
||||
1024, 1025};
|
||||
|
||||
for (int i = 0; i < 15; i++) {
|
||||
for (int j = 0; j < 15; j++) {
|
||||
for (int k = 0; k < 17; k++) {
|
||||
test_cuda_contraction<DataLayout>(m_sizes[i], n_sizes[j], k_sizes[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test_cxx11_tensor_cuda()
|
||||
{
|
||||
std::cout << "Calling contraction tests" << std::endl;
|
||||
CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, 128, 128));
|
||||
CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, 128, 128));
|
||||
for (int k = 32; k < 256; k++) {
|
||||
CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, k, 128));
|
||||
CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, k, 128));
|
||||
}
|
||||
for (int k = 32; k < 256; k++) {
|
||||
CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, 128, k));
|
||||
CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, 128, k));
|
||||
}
|
||||
for (int k = 32; k < 256; k++) {
|
||||
CALL_SUBTEST(test_cuda_contraction<ColMajor>(k, 128, 128));
|
||||
CALL_SUBTEST(test_cuda_contraction<RowMajor>(k, 128, 128));
|
||||
}
|
||||
CALL_SUBTEST_1(test_cuda_contraction<ColMajor>(128, 128, 128));
|
||||
CALL_SUBTEST_1(test_cuda_contraction<RowMajor>(128, 128, 128));
|
||||
|
||||
int m_sizes[] = {31, 39, 63, 64, 65,
|
||||
127, 129, 255, 257, 511,
|
||||
512, 513, 1023, 1024, 1025 };
|
||||
int n_sizes[] = {31, 39, 63, 64, 65,
|
||||
127, 129, 255, 257, 511,
|
||||
512, 513, 1023, 1024, 1025 };
|
||||
CALL_SUBTEST_2(test_cuda_contraction_m<ColMajor>());
|
||||
CALL_SUBTEST_3(test_cuda_contraction_m<RowMajor>());
|
||||
|
||||
int k_sizes[] = { 31, 39, 63, 64, 65,
|
||||
95, 96, 127, 129, 255,
|
||||
257, 511, 512, 513, 1023,
|
||||
1024, 1025};
|
||||
CALL_SUBTEST_4(test_cuda_contraction_k<ColMajor>());
|
||||
CALL_SUBTEST_5(test_cuda_contraction_k<RowMajor>());
|
||||
|
||||
for (int i = 0; i <15; i++)
|
||||
for (int j = 0; j < 15; j++)
|
||||
for (int k = 0; k < 17; k++) {
|
||||
CALL_SUBTEST(test_cuda_contraction<ColMajor>(m_sizes[i], n_sizes[j], k_sizes[k]));
|
||||
CALL_SUBTEST(test_cuda_contraction<RowMajor>(m_sizes[i], n_sizes[j], k_sizes[k]));
|
||||
}
|
||||
CALL_SUBTEST_6(test_cuda_contraction_n<ColMajor>());
|
||||
CALL_SUBTEST_7(test_cuda_contraction_n<RowMajor>());
|
||||
|
||||
CALL_SUBTEST_8(test_cuda_contraction_sizes<ColMajor>());
|
||||
CALL_SUBTEST_9(test_cuda_contraction_sizes<RowMajor>());
|
||||
}
|
||||
|
@ -109,19 +109,19 @@ struct GPUContext {
|
||||
|
||||
// The actual expression to evaluate
|
||||
template <typename Context>
|
||||
static void test_contextual_eval(Context* context)
|
||||
void test_contextual_eval(Context* context)
|
||||
{
|
||||
context->out().device(context->device()) = context->in1() + context->in2() * 3.14f + context->in1().constant(2.718f);
|
||||
}
|
||||
|
||||
template <typename Context>
|
||||
static void test_forced_contextual_eval(Context* context)
|
||||
void test_forced_contextual_eval(Context* context)
|
||||
{
|
||||
context->out().device(context->device()) = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f);
|
||||
}
|
||||
|
||||
template <typename Context>
|
||||
static void test_compound_assignment(Context* context)
|
||||
void test_compound_assignment(Context* context)
|
||||
{
|
||||
context->out().device(context->device()) = context->in1().constant(2.718f);
|
||||
context->out().device(context->device()) += context->in1() + context->in2() * 3.14f;
|
||||
@ -129,7 +129,7 @@ static void test_compound_assignment(Context* context)
|
||||
|
||||
|
||||
template <typename Context>
|
||||
static void test_contraction(Context* context)
|
||||
void test_contraction(Context* context)
|
||||
{
|
||||
Eigen::array<std::pair<int, int>, 2> dims;
|
||||
dims[0] = std::make_pair(1, 1);
|
||||
@ -145,7 +145,7 @@ static void test_contraction(Context* context)
|
||||
|
||||
|
||||
template <typename Context>
|
||||
static void test_1d_convolution(Context* context)
|
||||
void test_1d_convolution(Context* context)
|
||||
{
|
||||
Eigen::DSizes<int, 3> indices(0,0,0);
|
||||
Eigen::DSizes<int, 3> sizes(40,49,70);
|
||||
@ -155,7 +155,7 @@ static void test_1d_convolution(Context* context)
|
||||
}
|
||||
|
||||
template <typename Context>
|
||||
static void test_2d_convolution(Context* context)
|
||||
void test_2d_convolution(Context* context)
|
||||
{
|
||||
Eigen::DSizes<int, 3> indices(0,0,0);
|
||||
Eigen::DSizes<int, 3> sizes(40,49,69);
|
||||
@ -165,7 +165,7 @@ static void test_2d_convolution(Context* context)
|
||||
}
|
||||
|
||||
template <typename Context>
|
||||
static void test_3d_convolution(Context* context)
|
||||
void test_3d_convolution(Context* context)
|
||||
{
|
||||
Eigen::DSizes<int, 3> indices(0,0,0);
|
||||
Eigen::DSizes<int, 3> sizes(39,49,69);
|
||||
@ -175,7 +175,7 @@ static void test_3d_convolution(Context* context)
|
||||
}
|
||||
|
||||
|
||||
static void test_cpu() {
|
||||
void test_cpu() {
|
||||
Eigen::Tensor<float, 3> in1(40,50,70);
|
||||
Eigen::Tensor<float, 3> in2(40,50,70);
|
||||
Eigen::Tensor<float, 3> out(40,50,70);
|
||||
@ -267,7 +267,7 @@ static void test_cpu() {
|
||||
}
|
||||
}
|
||||
|
||||
static void test_gpu() {
|
||||
void test_gpu() {
|
||||
Eigen::Tensor<float, 3> in1(40,50,70);
|
||||
Eigen::Tensor<float, 3> in2(40,50,70);
|
||||
Eigen::Tensor<float, 3> out(40,50,70);
|
||||
|
@ -127,7 +127,7 @@ void test_misc2() {
|
||||
|
||||
TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1));
|
||||
uint64_t actual = static_cast<uint64_t>(result);
|
||||
VERIFY_EQUAL(actual, expected);
|
||||
VERIFY_IS_EQUAL(actual, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user