diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 93fb80f12..767b67f0b 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -24,7 +24,7 @@ namespace internal { // copy_using_evaluator_traits is based on assign_traits -template +template struct copy_using_evaluator_traits { typedef typename DstEvaluator::XprType Dst; @@ -51,13 +51,15 @@ private: InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) : int(Dst::MaxRowsAtCompileTime), + RestrictedInnerSize = InnerSize == -1 ? MaxPacketSize : InnerSize, + RestrictedLinearSize = Dst::SizeAtCompileTime == -1 ? MaxPacketSize : Dst::SizeAtCompileTime, OuterStride = int(outer_stride_at_compile_time::ret), MaxSizeAtCompileTime = Dst::SizeAtCompileTime }; // TODO distinguish between linear traversal and inner-traversals - typedef typename find_best_packet::type LinearPacketType; - typedef typename find_best_packet::type InnerPacketType; + typedef typename find_best_packet::type LinearPacketType; + typedef typename find_best_packet::type InnerPacketType; enum { LinearPacketSize = unpacket_traits::size, @@ -711,7 +713,8 @@ protected: public: typedef typename Base::Scalar Scalar; typedef typename Base::DstXprType DstXprType; - typedef typename find_best_packet::type PacketType; + typedef copy_using_evaluator_traits AssignmentTraits; + typedef typename AssignmentTraits::PacketType PacketType; EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr) : Base(dst, src, func, dstExpr) diff --git a/test/evaluators.cpp b/test/evaluators.cpp index f4fdaf053..ec000f1eb 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -90,6 +90,12 @@ namespace Eigen { { call_assignment_no_alias(dst.expression(), src, func); } + + template class StorageBase, typename Src, typename Func> + EIGEN_DEVICE_FUNC void call_restricted_packet_assignment(const NoAlias& dst, const Src& src, const Func& func) + { + call_restricted_packet_assignment_no_alias(dst.expression(), src, func); + } } } @@ -496,4 +502,23 @@ EIGEN_DECLARE_TEST(evaluators) VERIFY_IS_EQUAL( get_cost(a*(a+b)), 1); VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a+b)), 15); } + + { + // test restricted_packet_assignment with an unaligned destination + const size_t M = 2; + const size_t K = 2; + const size_t N = 5; + float *destMem = new float[(M*N) + 1]; + float *dest = (internal::UIntPtr(destMem)%EIGEN_MAX_ALIGN_BYTES) == 0 ? destMem+1 : destMem; + + const Matrix a = Matrix::Random(M, K); + const Matrix b = Matrix::Random(K, N); + + Map > z(dest, M, N);; + Product, Matrix, LazyProduct> tmp(a,b); + internal::call_restricted_packet_assignment(z.noalias(), tmp.derived(), internal::assign_op()); + + VERIFY_IS_APPROX(z, a*b); + delete[] destMem; + } }