* fix compilation of mixed scalar product

* optimize mixed scalar products
2025-01-24 14:45:14 +08:00 · 2010-07-19 16:49:09 +02:00 · 2010-07-19 16:49:09 +02:00 · c2ee454df4
commit c2ee454df4
parent 6e157dd7c6
5 changed files with 22 additions and 20 deletions
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@ -55,21 +55,25 @@ struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
  *
  * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
  */
-template<typename Scalar> struct ei_scalar_product_op {
+template<typename LhsScalar,typename RhsScalar> struct ei_scalar_product_op {
+  enum {
+    Vectorizable = ei_is_same_type<LhsScalar,RhsScalar>::ret && ei_packet_traits<LhsScalar>::HasMul && ei_packet_traits<RhsScalar>::HasMul
+  };
+  typedef typename ei_scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
  EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_product_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a * b; }
+  EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
  template<typename Packet>
  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return ei_pmul(a,b); }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+  EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
  { return ei_predux_mul(a); }
 };
-template<typename Scalar>
-struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
+template<typename LhsScalar,typename RhsScalar>
+struct ei_functor_traits<ei_scalar_product_op<LhsScalar,RhsScalar> > {
  enum {
-    Cost = NumTraits<Scalar>::MulCost,
-    PacketAccess = ei_packet_traits<Scalar>::HasMul
+    Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
+    PacketAccess = ei_scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
  };
 };

@ -581,13 +585,15 @@ template <typename Scalar, bool RandomAccess> struct ei_linspaced_op
 // all functors allow linear access, except ei_scalar_identity_op. So we fix here a quick meta
 // to indicate whether a functor allows linear access, just always answering 'yes' except for
 // ei_scalar_identity_op.
+// FIXME move this to ei_functor_traits adding a ei_functor_default
 template<typename Functor> struct ei_functor_has_linear_access { enum { ret = 1 }; };
 template<typename Scalar> struct ei_functor_has_linear_access<ei_scalar_identity_op<Scalar> > { enum { ret = 0 }; };

 // in CwiseBinaryOp, we require the Lhs and Rhs to have the same scalar type, except for multiplication
 // where we only require them to have the same _real_ scalar type so one may multiply, say, float by complex<float>.
+// FIXME move this to ei_functor_traits adding a ei_functor_default
 template<typename Functor> struct ei_functor_allows_mixing_real_and_complex { enum { ret = 0 }; };
-template<typename Scalar> struct ei_functor_allows_mixing_real_and_complex<ei_scalar_product_op<Scalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct ei_functor_allows_mixing_real_and_complex<ei_scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };


 /** \internal
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@ -45,8 +45,6 @@ template<> struct ei_is_arithmetic<__m128d> { enum { ret = true }; };

 #define ei_vec2d_swizzle1(v,p,q) \
  (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
-// #define ei_vec2d_swizzle1(v,p,q) \
-  (_mm_shuffle_pd(v,v, (q)<<1|(p) ))
  
 #define ei_vec4f_swizzle2(a,b,p,q,r,s) \
  (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@ -108,11 +108,10 @@ struct ProductReturnType;

 // Provides scalar/packet-wise product and product with accumulation
 // with optional conjugation of the arguments.
-template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs> struct ei_conj_helper;
+template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct ei_conj_helper;

 template<typename Scalar> struct ei_scalar_sum_op;
 template<typename Scalar> struct ei_scalar_difference_op;
-template<typename Scalar> struct ei_scalar_product_op;
 template<typename Scalar> struct ei_scalar_conj_product_op;
 template<typename Scalar> struct ei_scalar_quotient_op;
 template<typename Scalar> struct ei_scalar_opposite_op;
@ -140,7 +139,8 @@ template<typename Scalar> struct ei_scalar_add_op;
 template<typename Scalar> struct ei_scalar_constant_op;
 template<typename Scalar> struct ei_scalar_identity_op;

-template<typename Scalar1,typename Scalar2> struct ei_scalar_multiple2_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct ei_scalar_product_op;
+template<typename LhsScalar,typename RhsScalar> struct ei_scalar_multiple2_op;

 struct IOFormat;

--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@ -359,10 +359,8 @@
 #define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \
    CwiseBinaryOp< \
      ei_scalar_product_op< \
-        typename ei_scalar_product_traits< \
          typename ei_traits<LHS>::Scalar, \
          typename ei_traits<RHS>::Scalar \
-        >::ReturnType \
      >, \
      LHS, \
      RHS \
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@ -205,10 +205,10 @@ template<typename T> struct ei_scalar_product_traits<std::complex<T>, T>
 };

 // FIXME quick workaround around current limitation of ei_result_of
-template<typename Scalar, typename ArgType0, typename ArgType1>
-struct ei_result_of<ei_scalar_product_op<Scalar>(ArgType0,ArgType1)> {
-typedef typename ei_scalar_product_traits<typename ei_cleantype<ArgType0>::type, typename ei_cleantype<ArgType1>::type>::ReturnType type;
-};
+// template<typename Scalar, typename ArgType0, typename ArgType1>
+// struct ei_result_of<ei_scalar_product_op<Scalar>(ArgType0,ArgType1)> {
+// typedef typename ei_scalar_product_traits<typename ei_cleantype<ArgType0>::type, typename ei_cleantype<ArgType1>::type>::ReturnType type;
+// };

 template<typename T> struct ei_is_diagonal
 { enum { ret = false }; };