mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-30 17:40:05 +08:00
Pulled latest updates from trunk
This commit is contained in:
commit
cb26784d07
@ -81,10 +81,10 @@ private:
|
||||
MayInnerVectorize = MightVectorize
|
||||
&& int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
|
||||
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
|
||||
&& int(JointAlignment)>=int(InnerRequiredAlignment),
|
||||
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
|
||||
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
|
||||
MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
|
||||
&& ((int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
|
||||
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
|
||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||
so it's only good for large enough sizes. */
|
||||
MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
|
||||
@ -130,8 +130,9 @@ public:
|
||||
: int(NoUnrolling)
|
||||
)
|
||||
: int(Traversal) == int(LinearVectorizedTraversal)
|
||||
? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(LinearRequiredAlignment)) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
|
||||
? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
: int(Traversal) == int(LinearTraversal)
|
||||
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
@ -156,6 +157,7 @@ public:
|
||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||
EIGEN_DEBUG_VAR(LinearPacketSize)
|
||||
EIGEN_DEBUG_VAR(InnerPacketSize)
|
||||
EIGEN_DEBUG_VAR(ActualPacketSize)
|
||||
EIGEN_DEBUG_VAR(StorageOrdersAgree)
|
||||
EIGEN_DEBUG_VAR(MightVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearize)
|
||||
@ -256,13 +258,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
|
||||
enum {
|
||||
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
||||
inner = Index % DstXprType::InnerSizeAtCompileTime,
|
||||
JointAlignment = Kernel::AssignmentTraits::JointAlignment,
|
||||
DefaultAlignment = unpacket_traits<PacketType>::alignment
|
||||
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
||||
DstAlignment = Kernel::AssignmentTraits::DstAlignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<DefaultAlignment, JointAlignment, PacketType>(outer, inner);
|
||||
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
|
||||
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
|
||||
}
|
||||
@ -279,11 +281,12 @@ struct copy_using_evaluator_innervec_InnerUnrolling
|
||||
{
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
DefaultAlignment = unpacket_traits<PacketType>::alignment
|
||||
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
||||
DstAlignment = Kernel::AssignmentTraits::DstAlignment
|
||||
};
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, Index_);
|
||||
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
|
||||
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
|
||||
}
|
||||
@ -438,7 +441,8 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
DefaultAlignment = unpacket_traits<PacketType>::alignment
|
||||
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
||||
DstAlignment = Kernel::AssignmentTraits::DstAlignment
|
||||
};
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
@ -447,7 +451,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
|
||||
const Index packetSize = unpacket_traits<PacketType>::size;
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
for(Index inner = 0; inner < innerSize; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, inner);
|
||||
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -755,9 +755,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
||||
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
|
||||
? int(outer_stride_at_compile_time<ArgType>::ret)
|
||||
: int(inner_stride_at_compile_time<ArgType>::ret),
|
||||
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
|
||||
&& (InnerStrideAtCompileTime == 1)
|
||||
? PacketAccessBit : 0,
|
||||
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
|
||||
|
||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
|
||||
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
|
||||
|
@ -27,7 +27,7 @@ private:
|
||||
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
|
||||
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
|
||||
required_alignment = unpacket_traits<PacketScalar>::alignment,
|
||||
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
|
||||
packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
public:
|
||||
|
@ -756,6 +756,11 @@ namespace Eigen {
|
||||
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef EIGEN_UNALIGNED_VECTORIZE
|
||||
#define EIGEN_UNALIGNED_VECTORIZE 1
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
|
||||
|
@ -261,7 +261,7 @@ use it unless you are sure of what you are doing, i.e., you have rigourosly meas
|
||||
|
||||
The EIGEN_ALIGN_128 macro has been renamed to EIGEN_ALIGN16. Don't be surprised, it's just that we switched to counting in bytes ;-)
|
||||
|
||||
The EIGEN_DONT_ALIGN option still exists in Eigen 3, but it has a new cousin: EIGEN_DONT_ALIGN_STATICALLY. It allows to get rid of all static alignment issues while keeping alignment of dynamic-size heap-allocated arrays, thus keeping vectorization for dynamic-size objects.
|
||||
The \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_ALIGN \endlink option still exists in Eigen 3, but it has a new cousin: \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_ALIGN_STATICALLY.\endlink It allows to get rid of all static alignment issues while keeping alignment of dynamic-size heap-allocated arrays. Vectorization of statically allocated arrays is still preserved (unless you define \link TopicPreprocessorDirectivesPerformance EIGEN_UNALIGNED_VECTORIZE \endlink =0), at the cost of unaligned memory stores.
|
||||
|
||||
\section AlignedMap Aligned Map objects
|
||||
|
||||
|
@ -108,6 +108,9 @@ run time. However, these assertions do cost time and can thus be turned off.
|
||||
See \ref TopicMultiThreading for details.
|
||||
- \b EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless
|
||||
alignment is disabled by %Eigen's platform test or the user defining \c EIGEN_DONT_ALIGN.
|
||||
- \b EIGEN_UNALIGNED_VECTORIZE - disables/enables vectorization with unaligned stores. Default is 1 (enabled).
|
||||
If set to 0 (disabled), then expression for which the destination cannot be aligned are not vectorized (e.g., unaligned
|
||||
small fixed size vectors or matrices)
|
||||
- \b EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. This currently
|
||||
enables the SSE vectorization of sin() and cos(), and speedups sqrt() for single precision. Defined to 1 by default.
|
||||
Define it to 0 to disable.
|
||||
|
@ -92,27 +92,28 @@ Note that here, Eigen::Quaternionf is only used as an example, more generally th
|
||||
|
||||
\section explanation General explanation of this assertion
|
||||
|
||||
\ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen objects" must absolutely be created at 16-byte-aligned locations, otherwise SIMD instructions adressing them will crash.
|
||||
\ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen objects" must absolutely be created at 16-byte-aligned locations, otherwise SIMD instructions addressing them will crash.
|
||||
|
||||
Eigen normally takes care of these alignment issues for you, by setting an alignment attribute on them and by overloading their "operator new".
|
||||
|
||||
However there are a few corner cases where these alignment settings get overridden: they are the possible causes for this assertion.
|
||||
|
||||
\section getrid I don't care about vectorization, how do I get rid of that stuff?
|
||||
\section getrid I don't care about optimal vectorization, how do I get rid of that stuff?
|
||||
|
||||
Two possibilities:
|
||||
Three possibilities:
|
||||
<ul>
|
||||
<li>Define EIGEN_DONT_ALIGN_STATICALLY. That disables all 128-bit static alignment code, while keeping 128-bit heap alignment. This has the effect of
|
||||
disabling vectorization for fixed-size objects (like Matrix4d) while keeping vectorization of dynamic-size objects
|
||||
(like MatrixXd). But do note that this breaks ABI compatibility with the default behavior of 128-bit static alignment.</li>
|
||||
<li>Or define both EIGEN_DONT_VECTORIZE and EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT. This keeps the
|
||||
128-bit alignment code and thus preserves ABI compatibility, but completely disables vectorization.</li>
|
||||
<li>Use the \c DontAlign option to Matrix, Array, Quaternion, etc. objects that gives you trouble. This way Eigen won't try to align them, and thus won"t assume any special alignment. On the down side, you will pay the cost of unaligned loads/stores for them, but on modern CPUs, the overhead is either null or marginal. See \link StructHavingEigenMembers_othersolutions here \endlink for an example.</li>
|
||||
<li>Define \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_ALIGN_STATICALLY \endlink. That disables all 16-byte (and above) static alignment code, while keeping 16-byte (or above) heap alignment. This has the effect of
|
||||
vectorizing fixed-size objects (like Matrix4d) through unaligned stores (as controlled by \link TopicPreprocessorDirectivesPerformance EIGEN_UNALIGNED_VECTORIZE \endlink), while keeping unchanged the vectorization of dynamic-size objects
|
||||
(like MatrixXd). But do note that this breaks ABI compatibility with the default behavior of static alignment.</li>
|
||||
<li>Or define both \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_VECTORIZE \endlink and EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT. This keeps the
|
||||
16-byte alignment code and thus preserves ABI compatibility, but completely disables vectorization.</li>
|
||||
</ul>
|
||||
|
||||
If you want to know why defining EIGEN_DONT_VECTORIZE does not by itself disable 128-bit alignment and the assertion, here's the explanation:
|
||||
If you want to know why defining EIGEN_DONT_VECTORIZE does not by itself disable 16-byte alignment and the assertion, here's the explanation:
|
||||
|
||||
It doesn't disable the assertion, because otherwise code that runs fine without vectorization would suddenly crash when enabling vectorization.
|
||||
It doesn't disable 128bit alignment, because that would mean that vectorized and non-vectorized code are not mutually ABI-compatible. This ABI compatibility is very important, even for people who develop only an in-house application, as for instance one may want to have in the same application a vectorized path and a non-vectorized path.
|
||||
It doesn't disable 16-byte alignment, because that would mean that vectorized and non-vectorized code are not mutually ABI-compatible. This ABI compatibility is very important, even for people who develop only an in-house application, as for instance one may want to have in the same application a vectorized path and a non-vectorized path.
|
||||
|
||||
*/
|
||||
|
||||
|
@ -18,6 +18,10 @@ Matrix<T,2,1> angleToVec(T a)
|
||||
return Matrix<T,2,1>(std::cos(a), std::sin(a));
|
||||
}
|
||||
|
||||
// This permits to workaround a bug in clang/llvm code generation.
|
||||
EIGEN_DONT_INLINE
|
||||
void dont_over_optimize(void* x) { *(int*)(x) = (*(int*)(x))&0xFFFF000 | (*(int*)(x))&0x0000FFFF; }
|
||||
|
||||
template<typename Scalar, int Mode, int Options> void non_projective_only()
|
||||
{
|
||||
/* this test covers the following files:
|
||||
@ -224,12 +228,13 @@ template<typename Scalar, int Mode, int Options> void transformations()
|
||||
|
||||
do {
|
||||
v3 = Vector3::Random();
|
||||
dont_over_optimize(&v3);
|
||||
} while (v3.cwiseAbs().minCoeff()<NumTraits<Scalar>::epsilon());
|
||||
Translation3 tv3(v3);
|
||||
Transform3 t5(tv3);
|
||||
t4 = tv3;
|
||||
VERIFY_IS_APPROX(t5.matrix(), t4.matrix());
|
||||
t4.translate(-v3);
|
||||
t4.translate((-v3).eval());
|
||||
VERIFY_IS_APPROX(t4.matrix(), MatrixType::Identity());
|
||||
t4 *= tv3;
|
||||
VERIFY_IS_APPROX(t5.matrix(), t4.matrix());
|
||||
|
@ -7,6 +7,14 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifdef EIGEN_TEST_PART_1
|
||||
#define EIGEN_UNALIGNED_VECTORIZE 1
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_TEST_PART_2
|
||||
#define EIGEN_UNALIGNED_VECTORIZE 0
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||
#undef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||
#endif
|
||||
@ -144,10 +152,16 @@ struct vectorization_logic
|
||||
InnerVectorizedTraversal,InnerUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix44u(),Matrix44()+Matrix44(),
|
||||
LinearTraversal,NoUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal,
|
||||
EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix1(),Matrix1()+Matrix1(),
|
||||
(Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal,
|
||||
CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
|
||||
LinearTraversal,CompleteUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal)
|
||||
: LinearTraversal, CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3),
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
@ -158,19 +172,30 @@ struct vectorization_logic
|
||||
if(PacketSize>1)
|
||||
{
|
||||
typedef Matrix<Scalar,3,3,ColMajor> Matrix33c;
|
||||
typedef Matrix<Scalar,3,1,ColMajor> Vector3;
|
||||
VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
|
||||
LinearTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Vector3(),Vector3()+Vector3(),
|
||||
EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal), CompleteUnrolling));
|
||||
VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
|
||||
LinearTraversal,CompleteUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? SliceVectorizedTraversal : LinearTraversal),
|
||||
((!EIGEN_UNALIGNED_VECTORIZE) && HalfPacketSize==1) ? NoUnrolling : CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix3(),Matrix3().cwiseProduct(Matrix3()),
|
||||
LinearVectorizedTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
|
||||
HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal,NoUnrolling));
|
||||
HalfPacketSize==1 ? InnerVectorizedTraversal :
|
||||
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal :
|
||||
LinearTraversal,
|
||||
NoUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix11(), Matrix11()+Matrix11(),InnerVectorizedTraversal,CompleteUnrolling));
|
||||
|
||||
|
||||
VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
|
||||
DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling));
|
||||
(EIGEN_UNALIGNED_VECTORIZE) ? InnerVectorizedTraversal : DefaultTraversal,
|
||||
(EIGEN_UNALIGNED_VECTORIZE || PacketSize<=4) ? CompleteUnrolling : InnerUnrolling ));
|
||||
|
||||
VERIFY(test_assign(Vector1(),Matrix11()*Vector1(),
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
@ -270,6 +295,12 @@ struct vectorization_logic_half
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Vector1(),Vector1()+Vector1(),
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Vector1(),Vector1().template segment<PacketSize>(0).derived(),
|
||||
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Vector1(),Scalar(2.1)*Vector1()-Vector1(),
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Vector1(),(Scalar(2.1)*Vector1().template segment<PacketSize>(0)-Vector1().template segment<PacketSize>(0)).derived(),
|
||||
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Vector1(),Vector1().template cast<Scalar>(),
|
||||
@ -287,10 +318,11 @@ struct vectorization_logic_half
|
||||
InnerVectorizedTraversal,InnerUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix57u(),Matrix57()+Matrix57(),
|
||||
LinearTraversal,NoUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal,
|
||||
EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
|
||||
LinearTraversal,CompleteUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling));
|
||||
|
||||
if(PacketSize>1)
|
||||
{
|
||||
@ -298,16 +330,17 @@ struct vectorization_logic_half
|
||||
VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
|
||||
LinearTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
|
||||
LinearTraversal,CompleteUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()),
|
||||
PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
|
||||
LinearTraversal,NoUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,
|
||||
NoUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
|
||||
DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(Vector1(),Matrix11()*Vector1(),
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
@ -367,19 +400,19 @@ void test_vectorization_logic()
|
||||
if(internal::packet_traits<float>::Vectorizable)
|
||||
{
|
||||
VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
|
||||
LinearTraversal,CompleteUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_redux(Matrix<float,5,2>(),
|
||||
DefaultTraversal,CompleteUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling));
|
||||
}
|
||||
|
||||
if(internal::packet_traits<double>::Vectorizable)
|
||||
{
|
||||
VERIFY(test_assign(Matrix<double,3,3>(),Matrix<double,3,3>()+Matrix<double,3,3>(),
|
||||
LinearTraversal,CompleteUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_redux(Matrix<double,7,3>(),
|
||||
DefaultTraversal,CompleteUnrolling));
|
||||
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling));
|
||||
}
|
||||
#endif // EIGEN_VECTORIZE
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user