This commit is contained in:
Rasmus Munk Larsen 2016-02-04 14:32:19 -08:00
commit 093f2b3c01
25 changed files with 607 additions and 165 deletions

View File

@ -76,32 +76,6 @@ public:
#endif
};
// template<typename Lhs, typename Rhs> struct product_tag
// {
// private:
//
// typedef typename remove_all<Lhs>::type _Lhs;
// typedef typename remove_all<Rhs>::type _Rhs;
// enum {
// Rows = _Lhs::RowsAtCompileTime,
// Cols = _Rhs::ColsAtCompileTime,
// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime)
// };
//
// enum {
// rows_select = Rows==1 ? int(Rows) : int(Large),
// cols_select = Cols==1 ? int(Cols) : int(Large),
// depth_select = Depth==1 ? int(Depth) : int(Large)
// };
// typedef product_type_selector<rows_select, cols_select, depth_select> selector;
//
// public:
// enum {
// ret = selector::ret
// };
//
// };
/* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
@ -125,8 +99,8 @@ template<> struct product_type_selector<Small,Small,Large> { enum
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal

View File

@ -134,6 +134,11 @@ pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
return static_cast<TgtPacket>(a);
}
template <typename SrcPacket, typename TgtPacket>
EIGEN_DEVICE_FUNC inline TgtPacket
pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
return static_cast<TgtPacket>(a);
}
/** \internal \returns a + b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet

View File

@ -134,7 +134,24 @@ struct lgamma_impl<double> {
* Implementation of digamma (psi) *
****************************************************************************/
#ifdef EIGEN_HAS_C99_MATH
template <typename Scalar>
struct digamma_retval {
typedef Scalar type;
};
#ifndef EIGEN_HAS_C99_MATH
template <typename Scalar>
struct digamma_impl {
EIGEN_DEVICE_FUNC
static Scalar run(Scalar x) {
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED);
return Scalar(0);
}
};
#else
/*
*
@ -202,14 +219,6 @@ struct digamma_impl_maybe_poly<double> {
}
};
#endif // EIGEN_HAS_C99_MATH
template <typename Scalar>
struct digamma_retval {
typedef Scalar type;
};
#ifdef EIGEN_HAS_C99_MATH
template <typename Scalar>
struct digamma_impl {
EIGEN_DEVICE_FUNC

View File

@ -178,7 +178,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
// We also include a register-level block of the result (mx x nr).
// (In an ideal world only the lhs panel would stay in L1)
// Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of:
const Index max_kc = ((l1-k_sub)/k_div) & (~(k_peeling-1));
const Index max_kc = std::max<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);
const Index old_k = k;
if(k>max_kc)
{

View File

@ -20,6 +20,7 @@
#pragma warning( push )
#endif
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800)
#elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
@ -32,6 +33,7 @@
#pragma warning push
#endif
#pragma warning disable 2196 279 1684 2259
#elif defined __clang__
// -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
// this is really a stupid warning as it warns on compile-time expressions involving enums
@ -39,6 +41,17 @@
#pragma clang diagnostic push
#endif
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
#elif defined __NVCC__
// Disable the "statement is unreachable" message
#pragma diag_suppress code_is_unreachable
// Disable the "dynamic initialization in unreachable code" message
#pragma diag_suppress initialization_not_reachable
// Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are 4 of them)
#pragma diag_suppress 2651
#pragma diag_suppress 2653
#pragma diag_suppress 2668
#pragma diag_suppress 2670
#endif
#endif // not EIGEN_WARNINGS_DISABLED

View File

@ -94,10 +94,6 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename Decomposition, typename Rhstype> class Solve;
template<typename XprType> class Inverse;
namespace internal {
template<typename Lhs, typename Rhs> struct product_tag;
}
template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
template<typename Derived> class DiagonalBase;

View File

@ -8,6 +8,14 @@
#pragma warning pop
#elif defined __clang__
#pragma clang diagnostic pop
#elif defined __NVCC__
// Don't reenable the diagnostic messages, as it turns out these messages need
// to be disabled at the point of the template instantiation (i.e the user code)
// otherwise they'll be triggeredby nvcc.
// #pragma diag_default code_is_unreachable
// #pragma diag_default initialization_not_reachable
// #pragma diag_default 2651
// #pragma diag_default 2653
#endif
#endif

View File

@ -73,8 +73,15 @@ public:
Index m_outerStart;
const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
public:
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
protected:
// Disable assignment with clear error message.
// Note that simply removing operator= yields compilation errors with ICC+MSVC
template<typename T>
BlockImpl& operator=(const T&)
{
EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
return *this;
}
};
@ -424,8 +431,6 @@ public:
friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
Index nonZeros() const { return Dynamic; }
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
typename internal::ref_selector<XprType>::non_const_type m_matrix;
const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
@ -433,6 +438,16 @@ public:
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
protected:
// Disable assignment with clear error message.
// Note that simply removing operator= yields compilation errors with ICC+MSVC
template<typename T>
BlockImpl& operator=(const T&)
{
EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
return *this;
}
};
namespace internal {

View File

@ -13,32 +13,24 @@
#include "details.h"
// Define the explicit instantiation (e.g. necessary for the Intel compiler)
#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC
#define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...) template class std::deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >;
#else
#define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...)
#endif
/**
* This section contains a convenience MACRO which allows an easy specialization of
* std::deque such that for data types with alignment issues the correct allocator
* is used automatically.
*/
#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) \
EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(__VA_ARGS__) \
namespace std \
{ \
template<typename _Ay> \
class deque<__VA_ARGS__, _Ay> \
template<> \
class deque<__VA_ARGS__, std::allocator<__VA_ARGS__> > \
: public deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \
{ \
typedef deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > deque_base; \
public: \
typedef __VA_ARGS__ value_type; \
typedef typename deque_base::allocator_type allocator_type; \
typedef typename deque_base::size_type size_type; \
typedef typename deque_base::iterator iterator; \
typedef deque_base::allocator_type allocator_type; \
typedef deque_base::size_type size_type; \
typedef deque_base::iterator iterator; \
explicit deque(const allocator_type& a = allocator_type()) : deque_base(a) {} \
template<typename InputIterator> \
deque(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : deque_base(first, last, a) {} \

View File

@ -12,32 +12,24 @@
#include "details.h"
// Define the explicit instantiation (e.g. necessary for the Intel compiler)
#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC
#define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...) template class std::list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >;
#else
#define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...)
#endif
/**
* This section contains a convenience MACRO which allows an easy specialization of
* std::list such that for data types with alignment issues the correct allocator
* is used automatically.
*/
#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) \
EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(__VA_ARGS__) \
namespace std \
{ \
template<typename _Ay> \
class list<__VA_ARGS__, _Ay> \
template<> \
class list<__VA_ARGS__, std::allocator<__VA_ARGS__> > \
: public list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \
{ \
typedef list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > list_base; \
public: \
typedef __VA_ARGS__ value_type; \
typedef typename list_base::allocator_type allocator_type; \
typedef typename list_base::size_type size_type; \
typedef typename list_base::iterator iterator; \
typedef list_base::allocator_type allocator_type; \
typedef list_base::size_type size_type; \
typedef list_base::iterator iterator; \
explicit list(const allocator_type& a = allocator_type()) : list_base(a) {} \
template<typename InputIterator> \
list(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : list_base(first, last, a) {} \

View File

@ -21,7 +21,7 @@ i.e either row major or column major. The default is column major. Most arithmet
<td> Resize/Reserve</td>
<td>
\code
sm1.resize(m,n); //Change sm1 to a m x n matrix.
sm1.resize(m,n); // Change sm1 to a m x n matrix.
sm1.reserve(nnz); // Allocate room for nnz nonzeros elements.
\endcode
</td>
@ -151,10 +151,10 @@ It is easy to perform arithmetic operations on sparse matrices provided that the
<td> Permutation </td>
<td>
\code
perm.indices(); // Reference to the vector of indices
perm.indices(); // Reference to the vector of indices
sm1.twistedBy(perm); // Permute rows and columns
sm2 = sm1 * perm; //Permute the columns
sm2 = perm * sm1; // Permute the columns
sm2 = sm1 * perm; // Permute the columns
sm2 = perm * sm1; // Permute the columns
\endcode
</td>
<td>
@ -181,9 +181,9 @@ sm2 = perm * sm1; // Permute the columns
\section sparseotherops Other supported operations
<table class="manual">
<tr><th>Operations</th> <th> Code </th> <th> Notes</th> </tr>
<tr><th style="min-width:initial"> Code </th> <th> Notes</th> </tr>
<tr><td colspan="2">Sub-matrices</td></tr>
<tr>
<td>Sub-matrices</td>
<td>
\code
sm1.block(startRow, startCol, rows, cols);
@ -193,25 +193,31 @@ sm2 = perm * sm1; // Permute the columns
sm1.bottomLeftCorner( rows, cols);
sm1.bottomRightCorner( rows, cols);
\endcode
</td> <td> </td>
</td><td>
Contrary to dense matrices, here <strong>all these methods are read-only</strong>.\n
See \ref TutorialSparse_SubMatrices and below for read-write sub-matrices.
</td>
</tr>
<tr>
<td> Range </td>
<tr class="alt"><td colspan="2"> Range </td></tr>
<tr class="alt">
<td>
\code
sm1.innerVector(outer);
sm1.innerVectors(start, size);
sm1.leftCols(size);
sm2.rightCols(size);
sm1.middleRows(start, numRows);
sm1.middleCols(start, numCols);
sm1.col(j);
sm1.innerVector(outer); // RW
sm1.innerVectors(start, size); // RW
sm1.leftCols(size); // RW
sm2.rightCols(size); // RO because sm2 is row-major
sm1.middleRows(start, numRows); // RO becasue sm1 is column-major
sm1.middleCols(start, numCols); // RW
sm1.col(j); // RW
\endcode
</td>
<td>A inner vector is either a row (for row-major) or a column (for column-major). As stated earlier, the evaluation can be done in a matrix with different storage order </td>
<td>
A inner vector is either a row (for row-major) or a column (for column-major).\n
As stated earlier, for a read-write sub-matrix (RW), the evaluation can be done in a matrix with different storage order.
</td>
</tr>
<tr><td colspan="2"> Triangular and selfadjoint views</td></tr>
<tr>
<td> Triangular and selfadjoint views</td>
<td>
\code
sm2 = sm1.triangularview<Lower>();
@ -222,26 +228,30 @@ sm2 = perm * sm1; // Permute the columns
\code
\endcode </td>
</tr>
<tr>
<td>Triangular solve </td>
<tr class="alt"><td colspan="2">Triangular solve </td></tr>
<tr class="alt">
<td>
\code
dv2 = sm1.triangularView<Upper>().solve(dv1);
dv2 = sm1.topLeftCorner(size, size).triangularView<Lower>().solve(dv1);
dv2 = sm1.topLeftCorner(size, size)
.triangularView<Lower>().solve(dv1);
\endcode
</td>
<td> For general sparse solve, Use any suitable module described at \ref TopicSparseSystems </td>
</tr>
<tr><td colspan="2"> Low-level API</td></tr>
<tr>
<td> Low-level API</td>
<td>
\code
sm1.valuePtr(); // Pointer to the values
sm1.innerIndextr(); // Pointer to the indices.
sm1.outerIndexPtr(); //Pointer to the beginning of each inner vector
sm1.valuePtr(); // Pointer to the values
sm1.innerIndextr(); // Pointer to the indices.
sm1.outerIndexPtr(); // Pointer to the beginning of each inner vector
\endcode
</td>
<td> If the matrix is not in compressed form, makeCompressed() should be called before. Note that these functions are mostly provided for interoperability purposes with external libraries. A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section</td>
<td>
If the matrix is not in compressed form, makeCompressed() should be called before.\n
Note that these functions are mostly provided for interoperability purposes with external libraries.\n
A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section</td>
</tr>
</table>
*/

View File

@ -241,11 +241,11 @@ In the following \em sm denotes a sparse matrix, \em sv a sparse vector, \em dm
sm1.real() sm1.imag() -sm1 0.5*sm1
sm1+sm2 sm1-sm2 sm1.cwiseProduct(sm2)
\endcode
However, a strong restriction is that the storage orders must match. For instance, in the following example:
However, <strong>a strong restriction is that the storage orders must match</strong>. For instance, in the following example:
\code
sm4 = sm1 + sm2 + sm3;
\endcode
sm1, sm2, and sm3 must all be row-major or all column major.
sm1, sm2, and sm3 must all be row-major or all column-major.
On the other hand, there is no restriction on the target matrix sm4.
For instance, this means that for computing \f$ A^T + A \f$, the matrix \f$ A^T \f$ must be evaluated into a temporary matrix of compatible storage order:
\code
@ -311,6 +311,26 @@ sm2 = sm1.transpose() * P;
\endcode
\subsection TutorialSparse_SubMatrices Block operations
Regarding read-access, sparse matrices expose the same API than for dense matrices to access to sub-matrices such as blocks, columns, and rows. See \ref TutorialBlockOperations for a detailed introduction.
However, for performance reasons, writing to a sub-sparse-matrix is much more limited, and currently only contiguous sets of columns (resp. rows) of a column-major (resp. row-major) SparseMatrix are writable. Moreover, this information has to be known at compile-time, leaving out methods such as <tt>block(...)</tt> and <tt>corner*(...)</tt>. The available API for write-access to a SparseMatrix are summarized below:
\code
SparseMatrix<double,ColMajor> sm1;
sm1.col(j) = ...;
sm1.leftCols(ncols) = ...;
sm1.middleCols(j,ncols) = ...;
sm1.rightCols(ncols) = ...;
SparseMatrix<double,RowMajor> sm2;
sm2.row(i) = ...;
sm2.topRows(nrows) = ...;
sm2.middleRows(i,nrows) = ...;
sm2.bottomRows(nrows) = ...;
\endcode
In addition, sparse matrices expose the SparseMatrixBase::innerVector() and SparseMatrixBase::innerVectors() methods, which are aliases to the col/middleCols methods for a column-major storage, and to the row/middleRows methods for a row-major storage.
\subsection TutorialSparse_TriangularSelfadjoint Triangular and selfadjoint views
Just as with dense matrices, the triangularView() function can be used to address a triangular part of the matrix, and perform triangular solves with a dense right hand side:

View File

@ -226,7 +226,9 @@ ei_add_test(geo_homogeneous)
ei_add_test(stdvector)
ei_add_test(stdvector_overload)
ei_add_test(stdlist)
ei_add_test(stdlist_overload)
ei_add_test(stddeque)
ei_add_test(stddeque_overload)
ei_add_test(sparse_basic)
ei_add_test(sparse_block)
ei_add_test(sparse_vector)

159
test/stddeque_overload.cpp Normal file
View File

@ -0,0 +1,159 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/StdDeque>
#include <Eigen/Geometry>
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Vector4f)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix2f)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix4f)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix4d)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Affine3f)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Affine3d)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Quaternionf)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Quaterniond)
template<typename MatrixType>
void check_stddeque_matrix(const MatrixType& m)
{
typename MatrixType::Index rows = m.rows();
typename MatrixType::Index cols = m.cols();
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
std::deque<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
v = w;
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(w[i], v[i]);
}
v.resize(21);
v[20] = x;
VERIFY_IS_APPROX(v[20], x);
v.resize(22,y);
VERIFY_IS_APPROX(v[21], y);
v.push_back(x);
VERIFY_IS_APPROX(v[22], x);
VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(MatrixType));
// do a lot of push_back such that the deque gets internally resized
// (with memory reallocation)
MatrixType* ref = &w[0];
for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i)
v.push_back(w[i%w.size()]);
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY(v[i]==w[(i-23)%w.size()]);
}
}
template<typename TransformType>
void check_stddeque_transform(const TransformType&)
{
typedef typename TransformType::MatrixType MatrixType;
TransformType x(MatrixType::Random()), y(MatrixType::Random());
std::deque<TransformType> v(10), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
v = w;
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(w[i], v[i]);
}
v.resize(21);
v[20] = x;
VERIFY_IS_APPROX(v[20], x);
v.resize(22,y);
VERIFY_IS_APPROX(v[21], y);
v.push_back(x);
VERIFY_IS_APPROX(v[22], x);
// do a lot of push_back such that the deque gets internally resized
// (with memory reallocation)
TransformType* ref = &w[0];
for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i)
v.push_back(w[i%w.size()]);
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY(v[i].matrix()==w[(i-23)%w.size()].matrix());
}
}
template<typename QuaternionType>
void check_stddeque_quaternion(const QuaternionType&)
{
typedef typename QuaternionType::Coefficients Coefficients;
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
std::deque<QuaternionType> v(10), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
v = w;
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(w[i], v[i]);
}
v.resize(21);
v[20] = x;
VERIFY_IS_APPROX(v[20], x);
v.resize(22,y);
VERIFY_IS_APPROX(v[21], y);
v.push_back(x);
VERIFY_IS_APPROX(v[22], x);
// do a lot of push_back such that the deque gets internally resized
// (with memory reallocation)
QuaternionType* ref = &w[0];
for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i)
v.push_back(w[i%w.size()]);
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY(v[i].coeffs()==w[(i-23)%w.size()].coeffs());
}
}
void test_stddeque_overload()
{
// some non vectorizable fixed sizes
CALL_SUBTEST_1(check_stddeque_matrix(Vector2f()));
CALL_SUBTEST_1(check_stddeque_matrix(Matrix3f()));
CALL_SUBTEST_2(check_stddeque_matrix(Matrix3d()));
// some vectorizable fixed sizes
CALL_SUBTEST_1(check_stddeque_matrix(Matrix2f()));
CALL_SUBTEST_1(check_stddeque_matrix(Vector4f()));
CALL_SUBTEST_1(check_stddeque_matrix(Matrix4f()));
CALL_SUBTEST_2(check_stddeque_matrix(Matrix4d()));
// some dynamic sizes
CALL_SUBTEST_3(check_stddeque_matrix(MatrixXd(1,1)));
CALL_SUBTEST_3(check_stddeque_matrix(VectorXd(20)));
CALL_SUBTEST_3(check_stddeque_matrix(RowVectorXf(20)));
CALL_SUBTEST_3(check_stddeque_matrix(MatrixXcf(10,10)));
// some Transform
CALL_SUBTEST_4(check_stddeque_transform(Affine2f())); // does not need the specialization (2+1)^2 = 9
CALL_SUBTEST_4(check_stddeque_transform(Affine3f()));
CALL_SUBTEST_4(check_stddeque_transform(Affine3d()));
// some Quaternion
CALL_SUBTEST_5(check_stddeque_quaternion(Quaternionf()));
CALL_SUBTEST_5(check_stddeque_quaternion(Quaterniond()));
}

192
test/stdlist_overload.cpp Normal file
View File

@ -0,0 +1,192 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/StdList>
#include <Eigen/Geometry>
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Vector4f)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix2f)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix4f)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix4d)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Affine3f)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Affine3d)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Quaternionf)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Quaterniond)
template <class Container, class Position>
typename Container::iterator get(Container & c, Position position)
{
typename Container::iterator it = c.begin();
std::advance(it, position);
return it;
}
template <class Container, class Position, class Value>
void set(Container & c, Position position, const Value & value)
{
typename Container::iterator it = c.begin();
std::advance(it, position);
*it = value;
}
template<typename MatrixType>
void check_stdlist_matrix(const MatrixType& m)
{
typename MatrixType::Index rows = m.rows();
typename MatrixType::Index cols = m.cols();
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
std::list<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
typename std::list<MatrixType>::iterator itv = get(v, 5);
typename std::list<MatrixType>::iterator itw = get(w, 6);
*itv = x;
*itw = *itv;
VERIFY_IS_APPROX(*itw, *itv);
v = w;
itv = v.begin();
itw = w.begin();
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(*itw, *itv);
++itv;
++itw;
}
v.resize(21);
set(v, 20, x);
VERIFY_IS_APPROX(*get(v, 20), x);
v.resize(22,y);
VERIFY_IS_APPROX(*get(v, 21), y);
v.push_back(x);
VERIFY_IS_APPROX(*get(v, 22), x);
// do a lot of push_back such that the list gets internally resized
// (with memory reallocation)
MatrixType* ref = &(*get(w, 0));
for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i)
v.push_back(*get(w, i%w.size()));
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY((*get(v, i))==(*get(w, (i-23)%w.size())));
}
}
template<typename TransformType>
void check_stdlist_transform(const TransformType&)
{
typedef typename TransformType::MatrixType MatrixType;
TransformType x(MatrixType::Random()), y(MatrixType::Random());
std::list<TransformType> v(10), w(20, y);
typename std::list<TransformType>::iterator itv = get(v, 5);
typename std::list<TransformType>::iterator itw = get(w, 6);
*itv = x;
*itw = *itv;
VERIFY_IS_APPROX(*itw, *itv);
v = w;
itv = v.begin();
itw = w.begin();
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(*itw, *itv);
++itv;
++itw;
}
v.resize(21);
set(v, 20, x);
VERIFY_IS_APPROX(*get(v, 20), x);
v.resize(22,y);
VERIFY_IS_APPROX(*get(v, 21), y);
v.push_back(x);
VERIFY_IS_APPROX(*get(v, 22), x);
// do a lot of push_back such that the list gets internally resized
// (with memory reallocation)
TransformType* ref = &(*get(w, 0));
for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i)
v.push_back(*get(w, i%w.size()));
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY(get(v, i)->matrix()==get(w, (i-23)%w.size())->matrix());
}
}
template<typename QuaternionType>
void check_stdlist_quaternion(const QuaternionType&)
{
typedef typename QuaternionType::Coefficients Coefficients;
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
std::list<QuaternionType> v(10), w(20, y);
typename std::list<QuaternionType>::iterator itv = get(v, 5);
typename std::list<QuaternionType>::iterator itw = get(w, 6);
*itv = x;
*itw = *itv;
VERIFY_IS_APPROX(*itw, *itv);
v = w;
itv = v.begin();
itw = w.begin();
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(*itw, *itv);
++itv;
++itw;
}
v.resize(21);
set(v, 20, x);
VERIFY_IS_APPROX(*get(v, 20), x);
v.resize(22,y);
VERIFY_IS_APPROX(*get(v, 21), y);
v.push_back(x);
VERIFY_IS_APPROX(*get(v, 22), x);
// do a lot of push_back such that the list gets internally resized
// (with memory reallocation)
QuaternionType* ref = &(*get(w, 0));
for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i)
v.push_back(*get(w, i%w.size()));
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY(get(v, i)->coeffs()==get(w, (i-23)%w.size())->coeffs());
}
}
void test_stdlist_overload()
{
// some non vectorizable fixed sizes
CALL_SUBTEST_1(check_stdlist_matrix(Vector2f()));
CALL_SUBTEST_1(check_stdlist_matrix(Matrix3f()));
CALL_SUBTEST_2(check_stdlist_matrix(Matrix3d()));
// some vectorizable fixed sizes
CALL_SUBTEST_1(check_stdlist_matrix(Matrix2f()));
CALL_SUBTEST_1(check_stdlist_matrix(Vector4f()));
CALL_SUBTEST_1(check_stdlist_matrix(Matrix4f()));
CALL_SUBTEST_2(check_stdlist_matrix(Matrix4d()));
// some dynamic sizes
CALL_SUBTEST_3(check_stdlist_matrix(MatrixXd(1,1)));
CALL_SUBTEST_3(check_stdlist_matrix(VectorXd(20)));
CALL_SUBTEST_3(check_stdlist_matrix(RowVectorXf(20)));
CALL_SUBTEST_3(check_stdlist_matrix(MatrixXcf(10,10)));
// some Transform
CALL_SUBTEST_4(check_stdlist_transform(Affine2f())); // does not need the specialization (2+1)^2 = 9
CALL_SUBTEST_4(check_stdlist_transform(Affine3f()));
CALL_SUBTEST_4(check_stdlist_transform(Affine3d()));
// some Quaternion
CALL_SUBTEST_5(check_stdlist_quaternion(Quaternionf()));
CALL_SUBTEST_5(check_stdlist_quaternion(Quaterniond()));
}

View File

@ -165,10 +165,10 @@ template <typename T> class array<T, 0> {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE array() { }
EIGEN_STRONG_INLINE array() : dummy() { }
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) {
EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) : dummy() {
eigen_assert(l.size() == 0);
}
#endif

View File

@ -344,7 +344,7 @@ class TensorContractionSubMapper {
enum {
// We can use direct offsets iff the parent mapper supports then and we can compute the strides.
// TODO: we should also enable direct offsets for the Rhs case.
UseDirectOffsets = (side == Lhs) && inner_dim_contiguous && ParentMapper::DirectOffsets
UseDirectOffsets = ParentMapper::DirectOffsets && (side == Lhs) && inner_dim_contiguous && (array_size<contract_t>::value > 0)
};
EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)

View File

@ -86,6 +86,26 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
const TensorEvaluator& m_impl;
};
template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
PacketConverter(const TensorEvaluator& impl)
: m_impl(impl) {}
template<int LoadMode, typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
SrcPacket src1 = m_impl.template packet<LoadMode>(index);
SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
return result;
}
private:
const TensorEvaluator& m_impl;
};
template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {

View File

@ -109,10 +109,12 @@ class CudaStreamDevice : public StreamInterface {
struct GpuDevice {
// The StreamInterface is not owned: the caller is
// responsible for its initialization and eventual destruction.
explicit GpuDevice(const StreamInterface* stream) : stream_(stream) {
explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) {
eigen_assert(stream);
}
explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) {
eigen_assert(stream);
}
// TODO(bsteiner): This is an internal API, we should not expose it.
EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
return stream_->stream();
@ -246,6 +248,10 @@ struct GpuDevice {
#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
return max_blocks_;
}
// This function checks if the CUDA runtime recorded an error for the
// underlying stream device.
inline bool ok() const {
@ -259,7 +265,7 @@ struct GpuDevice {
private:
const StreamInterface* stream_;
int max_blocks_;
};
#ifndef __CUDA_ARCH__

View File

@ -220,7 +220,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(
if (needs_assign)
{
const int block_size = device.maxCudaThreadsPerBlock();
const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size;
const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
const Index size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash if we're called with tensors of size 0.
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
@ -239,7 +239,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(c
if (needs_assign)
{
const int block_size = device.maxCudaThreadsPerBlock();
const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size;
const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
const Index size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash if we're called with tensors of size 0.
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);

View File

@ -33,18 +33,19 @@ struct TensorUInt128
HIGH high;
LOW low;
template<typename OTHER_HIGH, typename OTHER_LOW>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128(int32_t x) : high(0), low(x) {
TensorUInt128(const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) : high(other.high), low(other.low) {
EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), "high too wide");
EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), "low too wide");
}
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
explicit TensorUInt128(const T& x) : high(0), low(x) {
eigen_assert(x >= 0);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128(uint32_t x) : high(0), low(x) { }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128(int64_t x) : high(0), low(x) {
eigen_assert(x >= 0);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128(uint64_t x) : high(0), low(x) { }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128(uint64_t y, uint64_t x) : high(y), low(x) { }

View File

@ -158,7 +158,7 @@ if(CUDA_FOUND)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
endif()
set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_30")
set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_30 -Xcudafe \"--display_error_number\"")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")

View File

@ -22,16 +22,16 @@ using Eigen::Tensor;
typedef Tensor<float, 1>::DimensionPair DimPair;
template<int DataLayout>
static void test_cuda_contraction(int m_size, int k_size, int n_size)
void test_cuda_contraction(int m_size, int k_size, int n_size)
{
std::cout << "Calling with (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
// with these dimensions, the output has 300 * 140 elements, which is
// more than 30 * 1024, which is the number of threads in blocks on
// a 15 SM GK110 GPU
Tensor<float, 2, DataLayout> t_left(Eigen::array<int, 2>(m_size, k_size));
Tensor<float, 2, DataLayout> t_right(Eigen::array<int, 2>(k_size, n_size));
Tensor<float, 2, DataLayout> t_result(Eigen::array<int, 2>(m_size, n_size));
Tensor<float, 2, DataLayout> t_result_gpu(Eigen::array<int, 2>(m_size, n_size));
Tensor<float, 2, DataLayout> t_left(m_size, k_size);
Tensor<float, 2, DataLayout> t_right(k_size, n_size);
Tensor<float, 2, DataLayout> t_result(m_size, n_size);
Tensor<float, 2, DataLayout> t_result_gpu(m_size, n_size);
Eigen::array<DimPair, 1> dims(DimPair(1, 0));
t_left.setRandom();
@ -84,41 +84,69 @@ static void test_cuda_contraction(int m_size, int k_size, int n_size)
cudaFree((void*)d_t_result);
}
template<int DataLayout>
void test_cuda_contraction_m() {
for (int k = 32; k < 256; k++) {
test_cuda_contraction<ColMajor>(k, 128, 128);
test_cuda_contraction<RowMajor>(k, 128, 128);
}
}
template<int DataLayout>
void test_cuda_contraction_k() {
for (int k = 32; k < 256; k++) {
test_cuda_contraction<ColMajor>(128, k, 128);
test_cuda_contraction<RowMajor>(128, k, 128);
}
}
template<int DataLayout>
void test_cuda_contraction_n() {
for (int k = 32; k < 256; k++) {
test_cuda_contraction<ColMajor>(128, 128, k);
test_cuda_contraction<RowMajor>(128, 128, k);
}
}
template<int DataLayout>
void test_cuda_contraction_sizes() {
int m_sizes[] = { 31, 39, 63, 64, 65,
127, 129, 255, 257 , 511,
512, 513, 1023, 1024, 1025};
int n_sizes[] = { 31, 39, 63, 64, 65,
127, 129, 255, 257, 511,
512, 513, 1023, 1024, 1025};
int k_sizes[] = { 31, 39, 63, 64, 65,
95, 96, 127, 129, 255,
257, 511, 512, 513, 1023,
1024, 1025};
for (int i = 0; i < 15; i++) {
for (int j = 0; j < 15; j++) {
for (int k = 0; k < 17; k++) {
test_cuda_contraction<DataLayout>(m_sizes[i], n_sizes[j], k_sizes[k]);
}
}
}
}
void test_cxx11_tensor_cuda()
{
std::cout << "Calling contraction tests" << std::endl;
CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, 128, 128));
CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, 128, 128));
for (int k = 32; k < 256; k++) {
CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, k, 128));
CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, k, 128));
}
for (int k = 32; k < 256; k++) {
CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, 128, k));
CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, 128, k));
}
for (int k = 32; k < 256; k++) {
CALL_SUBTEST(test_cuda_contraction<ColMajor>(k, 128, 128));
CALL_SUBTEST(test_cuda_contraction<RowMajor>(k, 128, 128));
}
CALL_SUBTEST_1(test_cuda_contraction<ColMajor>(128, 128, 128));
CALL_SUBTEST_1(test_cuda_contraction<RowMajor>(128, 128, 128));
int m_sizes[] = {31, 39, 63, 64, 65,
127, 129, 255, 257, 511,
512, 513, 1023, 1024, 1025 };
int n_sizes[] = {31, 39, 63, 64, 65,
127, 129, 255, 257, 511,
512, 513, 1023, 1024, 1025 };
CALL_SUBTEST_2(test_cuda_contraction_m<ColMajor>());
CALL_SUBTEST_3(test_cuda_contraction_m<RowMajor>());
int k_sizes[] = { 31, 39, 63, 64, 65,
95, 96, 127, 129, 255,
257, 511, 512, 513, 1023,
1024, 1025};
CALL_SUBTEST_4(test_cuda_contraction_k<ColMajor>());
CALL_SUBTEST_5(test_cuda_contraction_k<RowMajor>());
for (int i = 0; i <15; i++)
for (int j = 0; j < 15; j++)
for (int k = 0; k < 17; k++) {
CALL_SUBTEST(test_cuda_contraction<ColMajor>(m_sizes[i], n_sizes[j], k_sizes[k]));
CALL_SUBTEST(test_cuda_contraction<RowMajor>(m_sizes[i], n_sizes[j], k_sizes[k]));
}
CALL_SUBTEST_6(test_cuda_contraction_n<ColMajor>());
CALL_SUBTEST_7(test_cuda_contraction_n<RowMajor>());
CALL_SUBTEST_8(test_cuda_contraction_sizes<ColMajor>());
CALL_SUBTEST_9(test_cuda_contraction_sizes<RowMajor>());
}

View File

@ -109,19 +109,19 @@ struct GPUContext {
// The actual expression to evaluate
template <typename Context>
static void test_contextual_eval(Context* context)
void test_contextual_eval(Context* context)
{
context->out().device(context->device()) = context->in1() + context->in2() * 3.14f + context->in1().constant(2.718f);
}
template <typename Context>
static void test_forced_contextual_eval(Context* context)
void test_forced_contextual_eval(Context* context)
{
context->out().device(context->device()) = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f);
}
template <typename Context>
static void test_compound_assignment(Context* context)
void test_compound_assignment(Context* context)
{
context->out().device(context->device()) = context->in1().constant(2.718f);
context->out().device(context->device()) += context->in1() + context->in2() * 3.14f;
@ -129,7 +129,7 @@ static void test_compound_assignment(Context* context)
template <typename Context>
static void test_contraction(Context* context)
void test_contraction(Context* context)
{
Eigen::array<std::pair<int, int>, 2> dims;
dims[0] = std::make_pair(1, 1);
@ -145,7 +145,7 @@ static void test_contraction(Context* context)
template <typename Context>
static void test_1d_convolution(Context* context)
void test_1d_convolution(Context* context)
{
Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(40,49,70);
@ -155,7 +155,7 @@ static void test_1d_convolution(Context* context)
}
template <typename Context>
static void test_2d_convolution(Context* context)
void test_2d_convolution(Context* context)
{
Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(40,49,69);
@ -165,7 +165,7 @@ static void test_2d_convolution(Context* context)
}
template <typename Context>
static void test_3d_convolution(Context* context)
void test_3d_convolution(Context* context)
{
Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(39,49,69);
@ -175,7 +175,7 @@ static void test_3d_convolution(Context* context)
}
static void test_cpu() {
void test_cpu() {
Eigen::Tensor<float, 3> in1(40,50,70);
Eigen::Tensor<float, 3> in2(40,50,70);
Eigen::Tensor<float, 3> out(40,50,70);
@ -267,7 +267,7 @@ static void test_cpu() {
}
}
static void test_gpu() {
void test_gpu() {
Eigen::Tensor<float, 3> in1(40,50,70);
Eigen::Tensor<float, 3> in2(40,50,70);
Eigen::Tensor<float, 3> out(40,50,70);

View File

@ -127,7 +127,7 @@ void test_misc2() {
TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1));
uint64_t actual = static_cast<uint64_t>(result);
VERIFY_EQUAL(actual, expected);
VERIFY_IS_EQUAL(actual, expected);
}
}
}