mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-30 17:40:05 +08:00
Sync from Head.
This commit is contained in:
commit
a0d3ac760f
@ -70,9 +70,11 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
|
||||
#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
|
||||
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
|
||||
|
||||
#endif
|
||||
|
||||
eigen_assert(size() == other.size());
|
||||
|
||||
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
|
||||
|
@ -224,50 +224,65 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
|
||||
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
|
||||
MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
|
||||
MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
|
||||
};
|
||||
|
||||
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
evalToDest ? dest.data() : static_dest.data());
|
||||
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
}
|
||||
|
||||
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
|
||||
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
actualDestPtr, 1,
|
||||
compatibleAlpha);
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
if (!evalToDest)
|
||||
if(!MightCannotUseDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
// shortcut if we are sure to be able to use dest directly,
|
||||
// this ease the compiler to generate cleaner and more optimzized code for most common cases
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
dest.data(), 1,
|
||||
compatibleAlpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
evalToDest ? dest.data() : static_dest.data());
|
||||
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
}
|
||||
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
actualDestPtr, 1,
|
||||
compatibleAlpha);
|
||||
|
||||
if (!evalToDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -763,6 +763,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
{
|
||||
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
|
||||
const bool is_integer = NumTraits<T>::IsInteger;
|
||||
EIGEN_UNUSED_VARIABLE(is_integer);
|
||||
EIGEN_STATIC_ASSERT(is_integer,
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||
resize(size);
|
||||
|
@ -158,10 +158,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
// FIXME shall we handle nested_eval here?
|
||||
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
|
||||
}
|
||||
@ -176,10 +173,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
|
||||
static EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
// FIXME shall we handle nested_eval here?
|
||||
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
|
||||
}
|
||||
@ -377,7 +371,6 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
||||
{
|
||||
LhsNested actual_lhs(lhs);
|
||||
RhsNested actual_rhs(rhs);
|
||||
|
||||
internal::gemv_dense_selector<Side,
|
||||
(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
||||
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
|
||||
|
@ -523,7 +523,7 @@ template<typename T> struct smart_memmove_helper<T,true> {
|
||||
template<typename T> struct smart_memmove_helper<T,false> {
|
||||
static inline void run(const T* start, const T* end, T* target)
|
||||
{
|
||||
if (uintptr_t(target) < uintptr_t(start))
|
||||
if (UIntPtr(target) < UIntPtr(start))
|
||||
{
|
||||
std::copy(start, end, target);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,3 @@
|
||||
**Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms.**
|
||||
|
||||
For more information go to http://eigen.tuxfamily.org/.
|
||||
**Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms.**
|
||||
|
||||
For more information go to http://eigen.tuxfamily.org/.
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "action_atv_product.hh"
|
||||
|
||||
#include "action_matrix_matrix_product.hh"
|
||||
// #include "action_ata_product.hh"
|
||||
#include "action_ata_product.hh"
|
||||
#include "action_aat_product.hh"
|
||||
|
||||
#include "action_trisolve.hh"
|
||||
|
@ -46,9 +46,9 @@ public :
|
||||
BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
|
||||
}
|
||||
|
||||
// static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
|
||||
// ssyrk_(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N);
|
||||
// }
|
||||
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
|
||||
BLAS_FUNC(syrk)(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N);
|
||||
}
|
||||
|
||||
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
|
||||
BLAS_FUNC(syrk)(&lower,¬rans,&N,&N,&fone,A,&N,&fzero,X,&N);
|
||||
|
@ -48,7 +48,7 @@ int main()
|
||||
bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||
|
||||
bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
// bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
@ -78,18 +78,18 @@ public :
|
||||
cible[i][j]=source[i][j];
|
||||
}
|
||||
|
||||
// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||
// {
|
||||
// real somme;
|
||||
// for (int j=0;j<N;j++){
|
||||
// for (int i=0;i<N;i++){
|
||||
// somme=0.0;
|
||||
// for (int k=0;k<N;k++)
|
||||
// somme += A[i][k]*A[j][k];
|
||||
// X[j][i]=somme;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||
{
|
||||
real somme;
|
||||
for (int j=0;j<N;j++){
|
||||
for (int i=0;i<N;i++){
|
||||
somme=0.0;
|
||||
for (int k=0;k<N;k++)
|
||||
somme += A[i][k]*A[j][k];
|
||||
X[j][i]=somme;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||
{
|
||||
|
@ -80,35 +80,35 @@ public :
|
||||
}
|
||||
}
|
||||
|
||||
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||
static EIGEN_DONT_INLINE void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||
X = (A*B);
|
||||
}
|
||||
|
||||
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||
static EIGEN_DONT_INLINE void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||
X = (trans(A)*trans(B));
|
||||
}
|
||||
|
||||
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
static EIGEN_DONT_INLINE void ata_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
X = (trans(A)*A);
|
||||
}
|
||||
|
||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
static EIGEN_DONT_INLINE void aat_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
X = (A*trans(A));
|
||||
}
|
||||
|
||||
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||
static EIGEN_DONT_INLINE void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||
X = (A*B);
|
||||
}
|
||||
|
||||
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||
static EIGEN_DONT_INLINE void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||
X = (trans(A)*B);
|
||||
}
|
||||
|
||||
static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
|
||||
static EIGEN_DONT_INLINE void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
|
||||
Y += coef * X;
|
||||
}
|
||||
|
||||
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||
static EIGEN_DONT_INLINE void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||
Y = a*X + b*Y;
|
||||
}
|
||||
|
||||
|
@ -30,9 +30,9 @@ int main()
|
||||
|
||||
bench<Action_matrix_vector_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||
bench<Action_atv_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||
// bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
// bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
// bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -92,9 +92,11 @@ public :
|
||||
X.noalias() = A.transpose()*B.transpose();
|
||||
}
|
||||
|
||||
// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int /*N*/){
|
||||
// X.noalias() = A.transpose()*A;
|
||||
// }
|
||||
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int /*N*/){
|
||||
//X.noalias() = A.transpose()*A;
|
||||
X.template triangularView<Lower>().setZero();
|
||||
X.template selfadjointView<Lower>().rankUpdate(A.transpose());
|
||||
}
|
||||
|
||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int /*N*/){
|
||||
X.template triangularView<Lower>().setZero();
|
||||
|
@ -25,7 +25,7 @@ BTL_MAIN;
|
||||
int main()
|
||||
{
|
||||
bench<Action_matrix_matrix_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
// bench<Action_ata_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_ata_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_aat_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_trmm<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
|
@ -59,3 +59,6 @@ before-evaluators
|
||||
9174:d228bc282ac9 # merge
|
||||
9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955
|
||||
9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775
|
||||
3.3-beta2
|
||||
3.3-rc1
|
||||
3.3.0
|
||||
|
68
bench/perf_monitoring/gemm/gemv.cpp
Normal file
68
bench/perf_monitoring/gemm/gemv.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <Eigen/Core>
|
||||
#include "../../BenchTimer.h"
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef SCALAR
|
||||
#error SCALAR must be defined
|
||||
#endif
|
||||
|
||||
typedef SCALAR Scalar;
|
||||
|
||||
typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
|
||||
typedef Matrix<Scalar,Dynamic,1> Vec;
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
void gemv(const Mat &A, const Vec &B, Vec &C)
|
||||
{
|
||||
C.noalias() += A * B;
|
||||
}
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
double bench(long m, long n)
|
||||
{
|
||||
Mat A(m,n);
|
||||
Vec B(n);
|
||||
Vec C(m);
|
||||
A.setRandom();
|
||||
B.setRandom();
|
||||
C.setZero();
|
||||
|
||||
BenchTimer t;
|
||||
|
||||
double up = 1e9*4/sizeof(Scalar);
|
||||
double tm0 = 4, tm1 = 10;
|
||||
if(NumTraits<Scalar>::IsComplex)
|
||||
{
|
||||
up /= 4;
|
||||
tm0 = 2;
|
||||
tm1 = 4;
|
||||
}
|
||||
|
||||
double flops = 2. * m * n;
|
||||
long rep = std::max(1., std::min(100., up/flops) );
|
||||
long tries = std::max(tm0, std::min(tm1, up/flops) );
|
||||
|
||||
BENCH(t, tries, rep, gemv(A,B,C));
|
||||
|
||||
return 1e-9 * rep * flops / t.best();
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::vector<double> results;
|
||||
|
||||
std::ifstream settings("gemv_settings.txt");
|
||||
long m, n;
|
||||
while(settings >> m >> n)
|
||||
{
|
||||
//std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
|
||||
results.push_back( bench(m, n) );
|
||||
}
|
||||
|
||||
std::cout << RowVectorXd::Map(results.data(), results.size());
|
||||
|
||||
return 0;
|
||||
}
|
11
bench/perf_monitoring/gemm/gemv_settings.txt
Normal file
11
bench/perf_monitoring/gemm/gemv_settings.txt
Normal file
@ -0,0 +1,11 @@
|
||||
8 8
|
||||
9 9
|
||||
24 24
|
||||
239 239
|
||||
240 240
|
||||
2400 24
|
||||
24 2400
|
||||
24 240
|
||||
2400 2400
|
||||
4800 23
|
||||
23 4800
|
Loading…
Reference in New Issue
Block a user