From 61e58cf602849d4d71361fb7eaa25cb8b5a1196d Mon Sep 17 00:00:00 2001 From: Benoit Jacob <jacob.benoit.1@gmail.com> Date: Sat, 5 Apr 2008 14:15:02 +0000 Subject: [PATCH] fixes as discussed with Gael on IRC. Mainly, in Fuzzy.h, and Dot.h, use ei_xpr_copy to evaluate args when needed. Had to introduce an ugly trick with ei_unref as when the XprCopy type is a reference one can't directly access member typedefs such as Scalar. --- Eigen/src/Core/Dot.h | 27 ++++++++++++++++----------- Eigen/src/Core/ForwardDeclarations.h | 5 ++++- Eigen/src/Core/Fuzzy.h | 15 ++++++++++----- Eigen/src/Core/MatrixBase.h | 2 +- Eigen/src/Core/OperatorEquals.h | 8 ++------ Eigen/src/Core/Product.h | 4 +--- Eigen/src/Core/Util.h | 4 +--- 7 files changed, 35 insertions(+), 30 deletions(-) diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 248fbca79..ba45d5192 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -72,22 +72,24 @@ template<typename OtherDerived> typename ei_traits<Derived>::Scalar MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const { + typename Derived::XprCopy xprCopy(derived()); + typename OtherDerived::XprCopy otherXprCopy(other.derived()); + ei_assert(IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime - && size() == other.size()); + && xprCopy.size() == otherXprCopy.size()); Scalar res; - if(EIGEN_UNROLLED_LOOPS - && SizeAtCompileTime != Dynamic - && SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) + if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) ei_dot_unroller<SizeAtCompileTime-1, SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic, - Derived, MatrixBase<OtherDerived> > - ::run(derived(), other, res); + typename ei_unref<typename Derived::XprCopy>::type, + typename ei_unref<typename OtherDerived::XprCopy>::type> + ::run(xprCopy, otherXprCopy, res); else { - res = coeff(0) * ei_conj(other.coeff(0)); + res = xprCopy.coeff(0) * ei_conj(otherXprCopy.coeff(0)); for(int i = 1; i < size(); i++) - res += coeff(i)* ei_conj(other.coeff(i)); + res += xprCopy.coeff(i)* ei_conj(otherXprCopy.coeff(i)); } return res; } @@ -140,7 +142,9 @@ template<typename OtherDerived> bool MatrixBase<Derived>::isOrtho (const MatrixBase<OtherDerived>& other, RealScalar prec) const { - return ei_abs2(dot(other)) <= prec * prec * norm2() * other.norm2(); + typename Derived::XprCopy xprCopy(derived()); + typename OtherDerived::XprCopy otherXprCopy(other.derived()); + return ei_abs2(xprCopy.dot(otherXprCopy)) <= prec * prec * xprCopy.norm2() * otherXprCopy.norm2(); } /** \returns true if *this is approximately an unitary matrix, @@ -157,12 +161,13 @@ bool MatrixBase<Derived>::isOrtho template<typename Derived> bool MatrixBase<Derived>::isOrtho(RealScalar prec) const { + typename Derived::XprCopy xprCopy(derived()); for(int i = 0; i < cols(); i++) { - if(!ei_isApprox(col(i).norm2(), static_cast<Scalar>(1), prec)) + if(!ei_isApprox(xprCopy.col(i).norm2(), static_cast<Scalar>(1), prec)) return false; for(int j = 0; j < i; j++) - if(!ei_isMuchSmallerThan(col(i).dot(col(j)), static_cast<Scalar>(1), prec)) + if(!ei_isMuchSmallerThan(xprCopy.col(i).dot(xprCopy.col(j)), static_cast<Scalar>(1), prec)) return false; } return true; diff --git a/Eigen/src/Core/ForwardDeclarations.h b/Eigen/src/Core/ForwardDeclarations.h index bf82a0c0a..36519c7da 100644 --- a/Eigen/src/Core/ForwardDeclarations.h +++ b/Eigen/src/Core/ForwardDeclarations.h @@ -80,6 +80,9 @@ template<typename T> struct ei_eval ei_traits<T>::MaxColsAtCompileTime> type; }; +template<typename T> struct ei_unref { typedef T type; }; +template<typename T> struct ei_unref<T&> { typedef T type; }; + template<typename T> struct ei_xpr_copy { typedef typename ei_meta_if< ei_traits<T>::Flags & EvalBeforeNestingBit, @@ -95,7 +98,7 @@ template<typename T, int n=1> struct ei_eval_if_needed_before_nesting { // FIXME should we consider the additional store as well as the creation cost of the temporary ? enum { eval = T::Flags & EvalBeforeNestingBit - || n * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost }; + || (n+1) * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost }; typedef typename ei_meta_if<eval, typename ei_eval<T>::type, T>::ret XprType; typedef typename ei_meta_if<eval, typename ei_eval<T>::type, typename T::XprCopy>::ret CopyType; }; diff --git a/Eigen/src/Core/Fuzzy.h b/Eigen/src/Core/Fuzzy.h index 4ebe9d2a7..5dd0265ba 100644 --- a/Eigen/src/Core/Fuzzy.h +++ b/Eigen/src/Core/Fuzzy.h @@ -44,7 +44,7 @@ template<typename Derived> template<typename OtherDerived> bool MatrixBase<Derived>::isApprox( - const OtherDerived& other, + const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real prec ) const { @@ -55,9 +55,11 @@ bool MatrixBase<Derived>::isApprox( } else { + typename Derived::XprCopy xprCopy(derived()); + typename OtherDerived::XprCopy otherXprCopy(other.derived()); for(int i = 0; i < cols(); i++) - if((col(i) - other.col(i)).norm2() - > std::min(col(i).norm2(), other.col(i).norm2()) * prec * prec) + if((xprCopy.col(i) - otherXprCopy.col(i)).norm2() + > std::min(xprCopy.col(i).norm2(), otherXprCopy.col(i).norm2()) * prec * prec) return false; return true; } @@ -85,8 +87,9 @@ bool MatrixBase<Derived>::isMuchSmallerThan( } else { + typename Derived::XprCopy xprCopy(*this); for(int i = 0; i < cols(); i++) - if(col(i).norm2() > ei_abs2(other * prec)) + if(xprCopy.col(i).norm2() > ei_abs2(other * prec)) return false; return true; } @@ -116,8 +119,10 @@ bool MatrixBase<Derived>::isMuchSmallerThan( } else { + typename Derived::XprCopy xprCopy(*this); + typename OtherDerived::XprCopy otherXprCopy(other); for(int i = 0; i < cols(); i++) - if(col(i).norm2() > other.col(i).norm2() * prec * prec) + if(xprCopy.col(i).norm2() > otherXprCopy.col(i).norm2() * prec * prec) return false; return true; } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 7720433d0..2bc54701d 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -340,7 +340,7 @@ template<typename Derived> class MatrixBase /// \name Comparison and diagnostic //@{ template<typename OtherDerived> - bool isApprox(const OtherDerived& other, + bool isApprox(const MatrixBase<OtherDerived>& other, RealScalar prec = precision<Scalar>()) const; bool isMuchSmallerThan(const RealScalar& other, RealScalar prec = precision<Scalar>()) const; diff --git a/Eigen/src/Core/OperatorEquals.h b/Eigen/src/Core/OperatorEquals.h index 0bed89b7e..c93a9329f 100644 --- a/Eigen/src/Core/OperatorEquals.h +++ b/Eigen/src/Core/OperatorEquals.h @@ -106,9 +106,7 @@ Derived& MatrixBase<Derived> // copying a vector expression into a vector { ei_assert(size() == other.size()); - if(EIGEN_UNROLLED_LOOPS - && SizeAtCompileTime != Dynamic - && SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) + if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) ei_vector_operator_equals_unroller <Derived, OtherDerived, SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic @@ -120,9 +118,7 @@ Derived& MatrixBase<Derived> else // copying a matrix expression into a matrix { ei_assert(rows() == other.rows() && cols() == other.cols()); - if(EIGEN_UNROLLED_LOOPS - && SizeAtCompileTime != Dynamic - && SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) + if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) { ei_matrix_operator_equals_unroller <Derived, OtherDerived, diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 21a4dd6f9..608de0b9f 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -133,9 +133,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm const Scalar _coeff(int row, int col) const { Scalar res; - if(EIGEN_UNROLLED_LOOPS - && Lhs::ColsAtCompileTime != Dynamic - && Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT) + if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT) ei_product_unroller<Lhs::ColsAtCompileTime-1, Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT ? Lhs::ColsAtCompileTime : Dynamic, diff --git a/Eigen/src/Core/Util.h b/Eigen/src/Core/Util.h index 6ac93b05d..10fdacb8b 100644 --- a/Eigen/src/Core/Util.h +++ b/Eigen/src/Core/Util.h @@ -26,9 +26,7 @@ #define EIGEN_UTIL_H #ifdef EIGEN_DONT_USE_UNROLLED_LOOPS -#define EIGEN_UNROLLED_LOOPS (false) -#else -#define EIGEN_UNROLLED_LOOPS (true) +#define EIGEN_UNROLLING_LIMIT 0 #endif /** Defines the maximal loop size to enable meta unrolling of loops */