From c2ee454df447f3fc3be505c4c89ecf94140345c3 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 19 Jul 2010 16:49:09 +0200
Subject: [PATCH] * fix compilation of mixed scalar product * optimize mixed
 scalar products

---
 Eigen/src/Core/Functors.h                 | 24 ++++++++++++++---------
 Eigen/src/Core/arch/SSE/PacketMath.h      |  2 --
 Eigen/src/Core/util/ForwardDeclarations.h |  6 +++---
 Eigen/src/Core/util/Macros.h              |  2 --
 Eigen/src/Core/util/Meta.h                |  8 ++++----
 5 files changed, 22 insertions(+), 20 deletions(-)
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index 3f059d233..cde91ff97 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -55,21 +55,25 @@ struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
   *
   * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
   */
-template<typename Scalar> struct ei_scalar_product_op {
+template<typename LhsScalar,typename RhsScalar> struct ei_scalar_product_op {
+  enum {
+    Vectorizable = ei_is_same_type<LhsScalar,RhsScalar>::ret && ei_packet_traits<LhsScalar>::HasMul && ei_packet_traits<RhsScalar>::HasMul
+  };
+  typedef typename ei_scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
   EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_product_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a * b; }
+  EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
   template<typename Packet>
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
   { return ei_pmul(a,b); }
   template<typename Packet>
-  EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+  EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
   { return ei_predux_mul(a); }
 };
-template<typename Scalar>
-struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
+template<typename LhsScalar,typename RhsScalar>
+struct ei_functor_traits<ei_scalar_product_op<LhsScalar,RhsScalar> > {
   enum {
-    Cost = NumTraits<Scalar>::MulCost,
-    PacketAccess = ei_packet_traits<Scalar>::HasMul
+    Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
+    PacketAccess = ei_scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
   };
 };
 
@@ -581,13 +585,15 @@ template <typename Scalar, bool RandomAccess> struct ei_linspaced_op
 // all functors allow linear access, except ei_scalar_identity_op. So we fix here a quick meta
 // to indicate whether a functor allows linear access, just always answering 'yes' except for
 // ei_scalar_identity_op.
+// FIXME move this to ei_functor_traits adding a ei_functor_default
 template<typename Functor> struct ei_functor_has_linear_access { enum { ret = 1 }; };
 template<typename Scalar> struct ei_functor_has_linear_access<ei_scalar_identity_op<Scalar> > { enum { ret = 0 }; };
 
 // in CwiseBinaryOp, we require the Lhs and Rhs to have the same scalar type, except for multiplication
 // where we only require them to have the same _real_ scalar type so one may multiply, say, float by complex<float>.
+// FIXME move this to ei_functor_traits adding a ei_functor_default
 template<typename Functor> struct ei_functor_allows_mixing_real_and_complex { enum { ret = 0 }; };
-template<typename Scalar> struct ei_functor_allows_mixing_real_and_complex<ei_scalar_product_op<Scalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct ei_functor_allows_mixing_real_and_complex<ei_scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
 
 
 /** \internal
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index c2459e99f..8c441810c 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -45,8 +45,6 @@ template<> struct ei_is_arithmetic<__m128d> { enum { ret = true }; };
 
 #define ei_vec2d_swizzle1(v,p,q) \
   (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
-// #define ei_vec2d_swizzle1(v,p,q) \
-  (_mm_shuffle_pd(v,v, (q)<<1|(p) ))
   
 #define ei_vec4f_swizzle2(a,b,p,q,r,s) \
   (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 310ffa4b3..72223c9e0 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -108,11 +108,10 @@ struct ProductReturnType;
 
 // Provides scalar/packet-wise product and product with accumulation
 // with optional conjugation of the arguments.
-template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs> struct ei_conj_helper;
+template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct ei_conj_helper;
 
 template<typename Scalar> struct ei_scalar_sum_op;
 template<typename Scalar> struct ei_scalar_difference_op;
-template<typename Scalar> struct ei_scalar_product_op;
 template<typename Scalar> struct ei_scalar_conj_product_op;
 template<typename Scalar> struct ei_scalar_quotient_op;
 template<typename Scalar> struct ei_scalar_opposite_op;
@@ -140,7 +139,8 @@ template<typename Scalar> struct ei_scalar_add_op;
 template<typename Scalar> struct ei_scalar_constant_op;
 template<typename Scalar> struct ei_scalar_identity_op;
 
-template<typename Scalar1,typename Scalar2> struct ei_scalar_multiple2_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct ei_scalar_product_op;
+template<typename LhsScalar,typename RhsScalar> struct ei_scalar_multiple2_op;
 
 struct IOFormat;
 
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 987020e52..7600cc3e7 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -359,10 +359,8 @@
 #define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \
     CwiseBinaryOp< \
       ei_scalar_product_op< \
-        typename ei_scalar_product_traits< \
           typename ei_traits<LHS>::Scalar, \
           typename ei_traits<RHS>::Scalar \
-        >::ReturnType \
       >, \
       LHS, \
       RHS \
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index b01ceafb2..3d28680b6 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -205,10 +205,10 @@ template<typename T> struct ei_scalar_product_traits<std::complex<T>, T>
 };
 
 // FIXME quick workaround around current limitation of ei_result_of
-template<typename Scalar, typename ArgType0, typename ArgType1>
-struct ei_result_of<ei_scalar_product_op<Scalar>(ArgType0,ArgType1)> {
-typedef typename ei_scalar_product_traits<typename ei_cleantype<ArgType0>::type, typename ei_cleantype<ArgType1>::type>::ReturnType type;
-};
+// template<typename Scalar, typename ArgType0, typename ArgType1>
+// struct ei_result_of<ei_scalar_product_op<Scalar>(ArgType0,ArgType1)> {
+// typedef typename ei_scalar_product_traits<typename ei_cleantype<ArgType0>::type, typename ei_cleantype<ArgType1>::type>::ReturnType type;
+// };
 
 template<typename T> struct ei_is_diagonal
 { enum { ret = false }; };