Sync from Head.

This commit is contained in:
Srinivas Vasudevan 2016-12-02 14:14:45 -08:00
commit a0d3ac760f
18 changed files with 781 additions and 686 deletions

View File

@ -70,9 +70,11 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
#endif
eigen_assert(size() == other.size());
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);

View File

@ -224,50 +224,65 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
};
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
evalToDest ? dest.data() : static_dest.data());
if(!evalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Index size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
{
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDestPtr, dest.size()) = dest;
}
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
general_matrix_vector_product
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
actualDestPtr, 1,
compatibleAlpha);
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
if (!evalToDest)
if(!MightCannotUseDest)
{
if(!alphaIsCompatible)
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
// shortcut if we are sure to be able to use dest directly,
// this ease the compiler to generate cleaner and more optimzized code for most common cases
general_matrix_vector_product
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
dest.data(), 1,
compatibleAlpha);
}
else
{
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
evalToDest ? dest.data() : static_dest.data());
if(!evalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Index size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
{
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDestPtr, dest.size()) = dest;
}
general_matrix_vector_product
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
actualDestPtr, 1,
compatibleAlpha);
if (!evalToDest)
{
if(!alphaIsCompatible)
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
}
}
}
};

View File

@ -763,6 +763,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
{
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
const bool is_integer = NumTraits<T>::IsInteger;
EIGEN_UNUSED_VARIABLE(is_integer);
EIGEN_STATIC_ASSERT(is_integer,
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
resize(size);

View File

@ -158,10 +158,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
}
@ -176,10 +173,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
}
@ -377,7 +371,6 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
{
LhsNested actual_lhs(lhs);
RhsNested actual_rhs(rhs);
internal::gemv_dense_selector<Side,
(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)

View File

@ -523,7 +523,7 @@ template<typename T> struct smart_memmove_helper<T,true> {
template<typename T> struct smart_memmove_helper<T,false> {
static inline void run(const T* start, const T* end, T* target)
{
if (uintptr_t(target) < uintptr_t(start))
if (UIntPtr(target) < UIntPtr(start))
{
std::copy(start, end, target);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,3 @@
**Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms.**
For more information go to http://eigen.tuxfamily.org/.
**Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms.**
For more information go to http://eigen.tuxfamily.org/.

View File

@ -6,7 +6,7 @@
#include "action_atv_product.hh"
#include "action_matrix_matrix_product.hh"
// #include "action_ata_product.hh"
#include "action_ata_product.hh"
#include "action_aat_product.hh"
#include "action_trisolve.hh"

View File

@ -46,9 +46,9 @@ public :
BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
}
// static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
// ssyrk_(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N);
// }
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
BLAS_FUNC(syrk)(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N);
}
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
BLAS_FUNC(syrk)(&lower,&notrans,&N,&N,&fone,A,&N,&fzero,X,&N);

View File

@ -48,7 +48,7 @@ int main()
bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

View File

@ -78,18 +78,18 @@ public :
cible[i][j]=source[i][j];
}
// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
// {
// real somme;
// for (int j=0;j<N;j++){
// for (int i=0;i<N;i++){
// somme=0.0;
// for (int k=0;k<N;k++)
// somme += A[i][k]*A[j][k];
// X[j][i]=somme;
// }
// }
// }
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
{
real somme;
for (int j=0;j<N;j++){
for (int i=0;i<N;i++){
somme=0.0;
for (int k=0;k<N;k++)
somme += A[i][k]*A[j][k];
X[j][i]=somme;
}
}
}
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
{

View File

@ -80,35 +80,35 @@ public :
}
}
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
static EIGEN_DONT_INLINE void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
X = (A*B);
}
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
static EIGEN_DONT_INLINE void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
X = (trans(A)*trans(B));
}
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
static EIGEN_DONT_INLINE void ata_product(const gene_matrix & A, gene_matrix & X, int N){
X = (trans(A)*A);
}
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
static EIGEN_DONT_INLINE void aat_product(const gene_matrix & A, gene_matrix & X, int N){
X = (A*trans(A));
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
static EIGEN_DONT_INLINE void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
X = (A*B);
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
static EIGEN_DONT_INLINE void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
X = (trans(A)*B);
}
static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
static EIGEN_DONT_INLINE void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
Y += coef * X;
}
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
static EIGEN_DONT_INLINE void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
Y = a*X + b*Y;
}

View File

@ -30,9 +30,9 @@ int main()
bench<Action_matrix_vector_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
// bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
return 0;
}

View File

@ -92,9 +92,11 @@ public :
X.noalias() = A.transpose()*B.transpose();
}
// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int /*N*/){
// X.noalias() = A.transpose()*A;
// }
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int /*N*/){
//X.noalias() = A.transpose()*A;
X.template triangularView<Lower>().setZero();
X.template selfadjointView<Lower>().rankUpdate(A.transpose());
}
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int /*N*/){
X.template triangularView<Lower>().setZero();

View File

@ -25,7 +25,7 @@ BTL_MAIN;
int main()
{
bench<Action_matrix_matrix_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trmm<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

View File

@ -59,3 +59,6 @@ before-evaluators
9174:d228bc282ac9 # merge
9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955
9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775
3.3-beta2
3.3-rc1
3.3.0

View File

@ -0,0 +1,68 @@
#include <iostream>
#include <fstream>
#include <vector>
#include <Eigen/Core>
#include "../../BenchTimer.h"
using namespace Eigen;
#ifndef SCALAR
#error SCALAR must be defined
#endif
typedef SCALAR Scalar;
typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
typedef Matrix<Scalar,Dynamic,1> Vec;
EIGEN_DONT_INLINE
void gemv(const Mat &A, const Vec &B, Vec &C)
{
C.noalias() += A * B;
}
EIGEN_DONT_INLINE
double bench(long m, long n)
{
Mat A(m,n);
Vec B(n);
Vec C(m);
A.setRandom();
B.setRandom();
C.setZero();
BenchTimer t;
double up = 1e9*4/sizeof(Scalar);
double tm0 = 4, tm1 = 10;
if(NumTraits<Scalar>::IsComplex)
{
up /= 4;
tm0 = 2;
tm1 = 4;
}
double flops = 2. * m * n;
long rep = std::max(1., std::min(100., up/flops) );
long tries = std::max(tm0, std::min(tm1, up/flops) );
BENCH(t, tries, rep, gemv(A,B,C));
return 1e-9 * rep * flops / t.best();
}
int main(int argc, char **argv)
{
std::vector<double> results;
std::ifstream settings("gemv_settings.txt");
long m, n;
while(settings >> m >> n)
{
//std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
results.push_back( bench(m, n) );
}
std::cout << RowVectorXd::Map(results.data(), results.size());
return 0;
}

View File

@ -0,0 +1,11 @@
8 8
9 9
24 24
239 239
240 240
2400 24
24 2400
24 240
2400 2400
4800 23
23 4800