First part of a big refactoring of alignment control to enable the handling of arbitrarily aligned buffers. It includes:

- AlignedBit flag is deprecated. Alignment is now specified by the evaluator through the 'Alignment' enum, e.g., evaluator<Xpr>::Alignment. Its value is in Bytes.
 - Add several enums to specify alignment: Aligned8, Aligned16, Aligned32, Aligned64, Aligned128. AlignedMax corresponds to EIGEN_MAX_ALIGN_BYTES. Such enums are used to define the above Alignment value, and as the 'Options' template parameter of Map<> and Ref<>.
 - The Aligned enum is now deprecated. It is now an alias for Aligned16.
 - Currently, traits<Matrix<>>, traits<Array<>>, traits<Ref<>>, traits<Map<>>, and traits<Block<>> also expose the Alignment enum.
This commit is contained in:
Gael Guennebaud 2015-08-06 15:31:07 +02:00
parent 65186ef18d
commit 1f5024332e
24 changed files with 231 additions and 200 deletions

View File

@ -28,18 +28,19 @@ template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
struct copy_using_evaluator_traits
{
typedef typename DstEvaluator::XprType Dst;
typedef typename Dst::Scalar DstScalar;
enum {
DstFlags = DstEvaluator::Flags,
SrcFlags = SrcEvaluator::Flags
SrcFlags = SrcEvaluator::Flags,
RequiredAlignment = packet_traits<DstScalar>::size*sizeof(DstScalar) // FIXME ask packet_traits for the true alignment requirement
};
public:
enum {
DstIsAligned = DstFlags & AlignedBit,
DstAlignment = DstEvaluator::Alignment,
SrcAlignment = SrcEvaluator::Alignment,
DstHasDirectAccess = DstFlags & DirectAccessBit,
SrcIsAligned = SrcFlags & AlignedBit,
JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
};
private:
@ -51,7 +52,7 @@ private:
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
: int(Dst::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
PacketSize = packet_traits<typename Dst::Scalar>::size
PacketSize = packet_traits<DstScalar>::size
};
enum {
@ -62,10 +63,10 @@ private:
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
&& (functor_traits<AssignFunc>::PacketAccess),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
&& int(DstIsAligned) && int(SrcIsAligned),
&& int(JointAlignment)>=int(RequiredAlignment),
MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
&& (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
&& ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
MaySliceVectorize = MightVectorize && DstHasDirectAccess
@ -107,8 +108,8 @@ public:
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling)
: int(NoUnrolling) )
? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling)
: int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(NoUnrolling) )
@ -124,8 +125,9 @@ public:
EIGEN_DEBUG_VAR(DstFlags)
EIGEN_DEBUG_VAR(SrcFlags)
std::cerr.unsetf(std::ios::hex);
EIGEN_DEBUG_VAR(DstIsAligned)
EIGEN_DEBUG_VAR(SrcIsAligned)
EIGEN_DEBUG_VAR(DstAlignment)
EIGEN_DEBUG_VAR(SrcAlignment)
EIGEN_DEBUG_VAR(RequiredAlignment)
EIGEN_DEBUG_VAR(JointAlignment)
EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize)
@ -360,11 +362,13 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
const Index size = kernel.size();
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
typedef typename Kernel::Scalar Scalar;
typedef packet_traits<Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
dstIsAligned = int(Kernel::AssignmentTraits::DstIsAligned),
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(Kernel::AssignmentTraits::RequiredAlignment),
dstAlignment = PacketTraits::AlignedOnScalar ? int(Kernel::AssignmentTraits::RequiredAlignment)
: int(Kernel::AssignmentTraits::DstAlignment),
srcAlignment = Kernel::AssignmentTraits::JointAlignment
};
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size);
@ -475,9 +479,10 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
typedef packet_traits<Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
alignable = PacketTraits::AlignedOnScalar,
dstIsAligned = Kernel::AssignmentTraits::DstIsAligned,
dstAlignment = alignable ? Aligned : int(dstIsAligned)
alignable = PacketTraits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(Kernel::AssignmentTraits::RequiredAlignment),
dstAlignment = alignable ? int(Kernel::AssignmentTraits::RequiredAlignment)
: int(Kernel::AssignmentTraits::DstAlignment)
};
const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)

View File

@ -81,14 +81,16 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(outer_stride_at_compile_time<XprType>::ret)
: int(inner_stride_at_compile_time<XprType>::ret),
// IsAligned is needed by MapBase's assertions
// We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
IsAligned = 0,
// FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit
Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
// FIXME DirectAccessBit should not be handled by expressions
//
// Alignment is needed by MapBase's assertions
// We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
Alignment = 0
};
};

View File

@ -111,6 +111,10 @@ struct evaluator_base
typedef typename traits<ExpressionType>::StorageIndex StorageIndex;
// TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
typedef traits<ExpressionType> ExpressionTraits;
enum {
Alignment = 0
};
};
// -------------------- Matrix and Array --------------------
@ -137,8 +141,8 @@ struct evaluator<PlainObjectBase<Derived> >
ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
CoeffReadCost = NumTraits<Scalar>::ReadCost,
Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime,
Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret
Flags = traits<Derived>::EvaluatorFlags,
Alignment = traits<Derived>::Alignment
};
EIGEN_DEVICE_FUNC evaluator()
@ -255,7 +259,8 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
Flags = evaluator<ArgType>::Flags ^ RowMajorBit
Flags = evaluator<ArgType>::Flags ^ RowMajorBit,
Alignment = evaluator<ArgType>::Alignment
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
@ -331,7 +336,8 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
& ( HereditaryBits
| (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
| (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
| (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore
| (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit), // FIXME EvalBeforeNestingBit should be needed anymore
Alignment = 0 // FIXME alignment should not matter here, perhaps we could set it to AlignMax??
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
@ -378,9 +384,9 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
Flags = evaluator<ArgType>::Flags & (
HereditaryBits | LinearAccessBit | AlignedBit
| (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))
Flags = evaluator<ArgType>::Flags
& (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
Alignment = evaluator<ArgType>::Alignment
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
@ -447,13 +453,13 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
HereditaryBits
| (int(LhsFlags) & int(RhsFlags) &
( AlignedBit
| (StorageOrdersAgree ? LinearAccessBit : 0)
( (StorageOrdersAgree ? LinearAccessBit : 0)
| (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
)
)
),
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit)
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
};
EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr)
@ -506,7 +512,9 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit))
Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost...
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
@ -641,7 +649,6 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
HasNoInnerStride = InnerStrideAtCompileTime == 1,
HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
// TODO: should check for smaller packet types once we can handle multi-sized packet types
@ -653,10 +660,13 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
|| ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ),
Flags0 = evaluator<PlainObjectType>::Flags,
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
? int(Flags1) : int(Flags1 & ~LinearAccessBit),
Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
//Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
? int(Flags0) : int(Flags0 & ~LinearAccessBit),
Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit),
//IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&int(AlignedMask))>0),
Alignment = int(MapOptions)&int(AlignedMask)
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)
@ -673,7 +683,8 @@ struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
enum {
Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags
Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,
Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref)
@ -717,17 +728,17 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
// TODO: should check for smaller packet types once we can handle multi-sized packet types
AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
DirectAccessBit |
MaskPacketAccessBit |
MaskAlignedBit),
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit
MaskPacketAccessBit),
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
// TODO: should check for smaller packet types once we can handle multi-sized packet types
AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignBytes : 0,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
};
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {}
@ -833,11 +844,8 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
{
// TODO: should check for smaller packet types once we can handle multi-sized packet types
const int AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar);
EIGEN_ONLY_USED_FOR_DEBUG(AlignBytes)
// FIXME this should be an internal assertion
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned");
eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
}
};
@ -856,7 +864,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+ EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
evaluator<ElseMatrixType>::CoeffReadCost),
Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits
Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
};
inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
@ -908,7 +918,9 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
enum {
CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit)
Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),
Alignment = evaluator<ArgTypeNestedCleaned>::Alignment
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate)
@ -992,7 +1004,9 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
CoeffReadCost = TraversalSize==Dynamic ? Dynamic
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits)
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits),
Alignment = 0 // FIXME this could be improved
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr)
@ -1028,7 +1042,8 @@ struct evaluator_wrapper_base
typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
Flags = evaluator<ArgType>::Flags
Flags = evaluator<ArgType>::Flags,
Alignment = evaluator<ArgType>::Alignment
};
EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
@ -1144,7 +1159,9 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
? LinearAccessBit : 0,
Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess)
Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),
Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f.
};
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
@ -1226,7 +1243,9 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit
Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit,
Alignment = 0
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal)

View File

@ -602,11 +602,11 @@ struct first_aligned_impl<Derived, false>
* documentation.
*/
template<typename Derived>
static inline Index first_aligned(const Derived& m)
static inline Index first_aligned(const DenseBase<Derived>& m)
{
return first_aligned_impl
<Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
::run(m);
<Derived, (evaluator<Derived>::Alignment > 0 ) || !(Derived::Flags & DirectAccessBit)> // FIXME Alignment!
::run(m.derived());
}
template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>

View File

@ -450,22 +450,22 @@ pmadd(const Packet& a,
{ return padd(pmul(a, b),c); }
/** \internal \returns a packet version of \a *from.
* If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
template<typename Packet, int LoadMode>
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
template<typename Packet, int Alignment>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
{
if(LoadMode == Aligned)
if(Alignment >= unpacket_traits<Packet>::size*sizeof(typename unpacket_traits<Packet>::type))
return pload<Packet>(from);
else
return ploadu<Packet>(from);
}
/** \internal copy the packet \a from to \a *to.
* If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
template<typename Scalar, typename Packet, int LoadMode>
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
template<typename Scalar, typename Packet, int Alignment>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
{
if(LoadMode == Aligned)
if(Alignment >= unpacket_traits<Packet>::size*sizeof(typename unpacket_traits<Packet>::type))
pstore(to, from);
else
pstoreu(to, from);

View File

@ -19,7 +19,7 @@ namespace Eigen {
* \brief A matrix or vector expression mapping an existing array of data.
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
* of an ordinary, contiguous array. This can be overridden by specifying strides.
@ -77,7 +77,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned),
Alignment = int(MapOptions)&int(AlignedMask),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
};

View File

@ -160,9 +160,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_DEVICE_FUNC
void checkSanity() const
{
// TODO "IsAligned" should be replaced to handle arbitrary alignment
#if EIGEN_MAX_ALIGN_BYTES>0
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_MAX_ALIGN_BYTES) == 0) && "data is not aligned");
eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits<Derived>::Alignment)) == 0) && "data is not aligned");
#endif
}

View File

@ -139,6 +139,18 @@ namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
private:
enum {
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
required_alignment = packet_traits<_Scalar>::size * sizeof(_Scalar), // FIXME ask packet_traits for the true required alignment
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
};
public:
typedef _Scalar Scalar;
typedef Dense StorageKind;
typedef Eigen::Index StorageIndex;
@ -149,11 +161,13 @@ struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols,
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
EvaluatorFlags = compute_matrix_evaluator_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
Options = _Options,
InnerStrideAtCompileTime = 1,
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
Alignment = actual_alignment
};
};
}

View File

@ -116,20 +116,20 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
typedef Eigen::Map<Derived, Unaligned> MapType;
friend class Eigen::Map<const Derived, Unaligned>;
typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
friend class Eigen::Map<Derived, Aligned>;
typedef Eigen::Map<Derived, Aligned> AlignedMapType;
friend class Eigen::Map<const Derived, Aligned>;
typedef const Eigen::Map<const Derived, Aligned> ConstAlignedMapType;
friend class Eigen::Map<Derived, AlignedMax>;
typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
friend class Eigen::Map<const Derived, AlignedMax>;
typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; };
template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; };
protected:
DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
public:
enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::EvaluatorFlags & AlignedBit) != 0 };
enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
EIGEN_DEVICE_FUNC

View File

@ -430,24 +430,22 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
LhsFlags = LhsEtorType::Flags,
RhsFlags = RhsEtorType::Flags,
LhsAlignment = LhsEtorType::Alignment,
RhsAlignment = RhsEtorType::Alignment,
LhsIsAligned = int(LhsAlignment) >= int(sizeof(Scalar)*PacketSize), // FIXME compare to required alignment
RhsIsAligned = int(RhsAlignment) >= int(sizeof(Scalar)*PacketSize),
LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit,
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
&& (ColsAtCompileTime == Dynamic
|| ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (RhsFlags&AlignedBit)
)
),
&& (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % PacketSize) == 0 && RhsIsAligned ) ),
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
&& (RowsAtCompileTime == Dynamic
|| ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (LhsFlags&AlignedBit)
)
),
&& (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % PacketSize) == 0 && LhsIsAligned ) ),
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@ -455,11 +453,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0)
| (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
| (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
// TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
Alignment = CanVectorizeLhs ? LhsAlignment
: CanVectorizeRhs ? RhsAlignment
: 0,
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
@ -469,7 +469,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
&& LhsRowMajor
&& (!RhsRowMajor)
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
&& (LhsFlags & RhsFlags & AlignedBit)
&& (LhsIsAligned && RhsIsAligned)
&& (InnerSize % packet_traits<Scalar>::size == 0)
};
@ -706,7 +706,8 @@ public:
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0)
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
Alignment = evaluator<MatrixType>::Alignment
};
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
@ -732,7 +733,7 @@ protected:
{
enum {
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
};
return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
m_diagImpl.template packet<DiagonalPacketLoadMode>(id));

View File

@ -165,7 +165,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
index = Start * packet_traits<typename Derived::Scalar>::size,
outer = index / int(Derived::InnerSizeAtCompileTime),
inner = index % int(Derived::InnerSizeAtCompileTime),
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
alignment = Derived::Alignment
};
typedef typename Derived::Scalar Scalar;
@ -222,10 +222,10 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
const Index size = mat.size();
const Index packetSize = packet_traits<Scalar>::size;
const Index alignedStart = internal::first_aligned(mat);
const Index alignedStart = internal::first_aligned(mat.nestedExpression());
enum {
alignment = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) || bool(Derived::Flags & AlignedBit)
? Aligned : Unaligned
alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(sizeof(Scalar)*packetSize) : int(Unaligned), // FIXME take into account alignment requirement
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
};
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
@ -352,7 +352,8 @@ public:
IsRowMajor = XprType::IsRowMajor,
SizeAtCompileTime = XprType::SizeAtCompileTime,
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
CoeffReadCost = evaluator<XprType>::CoeffReadCost
CoeffReadCost = evaluator<XprType>::CoeffReadCost,
Alignment = evaluator<XprType>::Alignment
};
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
@ -385,6 +386,8 @@ public:
PacketReturnType packetByOuterInner(Index outer, Index inner) const
{ return m_evaluator.template packet<LoadMode>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
const XprType & nestedExpression() const { return m_xpr; }
protected:
typename internal::evaluator<XprType>::nestedType m_evaluator;
const XprType &m_xpr;

View File

@ -18,7 +18,7 @@ namespace Eigen {
* \brief A matrix or vector expression mapping an existing expression
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
* \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned.
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
* but accepts a variable outer stride (leading dimension).
@ -92,7 +92,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
typedef _StrideType StrideType;
enum {
Options = _Options,
Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit
Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit,
Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment
};
template<typename Derived> struct match {
@ -104,7 +105,7 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|| (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
OuterStrideMatch = Derived::IsVectorAtCompileTime
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (int(evaluator<Derived>::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
};

View File

@ -162,21 +162,27 @@ MatrixBase<Derived>::stableNorm() const
RealScalar scale(0);
RealScalar invScale(1);
RealScalar ssq(0); // sum of square
typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
DerivedCopy copy(derived());
enum {
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
CanAlign = (int(Flags)&DirectAccessBit) || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME
};
typedef typename internal::conditional<Alignment, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, Aligned>,
typename Base::ConstSegmentReturnType>::type SegmentWrapper;
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
typename DerivedCopyClean
::ConstSegmentReturnType>::type SegmentWrapper;
Index n = size();
if(n==1)
return abs(this->coeff(0));
Index bi = internal::first_aligned(derived());
Index bi = internal::first_aligned(copy);
if (bi>0)
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
for (; bi<n; bi+=blockSize)
internal::stable_norm_kernel(SegmentWrapper(this->segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
return scale * sqrt(ssq);
}

View File

@ -233,7 +233,7 @@ struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x Packet
typedef typename MatrixType::Scalar Scalar;
typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
const Index PacketSize = internal::packet_traits<Scalar>::size;
const Index Alignment = internal::evaluator<MatrixType>::Flags&AlignedBit ? Aligned : Unaligned;
const Index Alignment = internal::evaluator<MatrixType>::Alignment;
PacketBlock<Packet> A;
for (Index i=0; i<PacketSize; ++i)
A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);

View File

@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
@ -140,7 +140,7 @@ const unsigned int LvalueBit = 0x20;
*/
const unsigned int DirectAccessBit = 0x40;
/** \ingroup flags
/* \ingroup flags
*
* means the first coefficient packet is guaranteed to be aligned.
* An expression cannot has the AlignedBit without the PacketAccessBit flag.
@ -215,12 +215,31 @@ enum {
};
/** \ingroup enums
* Enum for indicating whether an object is aligned or not. */
* Enum for indicating whether a buffer is aligned or not. */
enum {
/** Object is not correctly aligned for vectorization. */
Unaligned=0,
/** Object is aligned for vectorization. */
Aligned=1
Unaligned=0, /**< Data pointer has no specific alignment. */
Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */
Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */
Aligned32=32, /**< Data pointer is aligned on a 32 bytes boundary. */
Aligned64=64, /**< Data pointer is aligned on a 64 bytes boundary. */
Aligned128=128, /**< Data pointer is aligned on a 128 bytes boundary. */
AlignedMask=255,
Aligned=16, /**< \deprecated Synonym for Aligned16. */
#if EIGEN_MAX_ALIGN_BYTES==128
AlignedMax = Aligned128
#elif EIGEN_MAX_ALIGN_BYTES==64
AlignedMax = Aligned64
#elif EIGEN_MAX_ALIGN_BYTES==32
AlignedMax = Aligned32
#elif EIGEN_MAX_ALIGN_BYTES==16
AlignedMax = Aligned16
#elif EIGEN_MAX_ALIGN_BYTES==8
AlignedMax = Aligned8
#elif EIGEN_MAX_ALIGN_BYTES==0
AlignedMax = Unaligned
#else
#error Invalid value for EIGEN_MAX_ALIGN_BYTES
#endif
};
/** \ingroup enums

View File

@ -192,43 +192,6 @@ class compute_matrix_flags
enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
};
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
class compute_matrix_evaluator_flags
{
enum {
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
// TODO: should check for smaller packet types once we can handle multi-sized packet types
align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
aligned_bit =
(
((Options&DontAlign)==0)
&& (
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0))
#else
0
#endif
||
#if EIGEN_MAX_ALIGN_BYTES!=0
is_dynamic_size_storage
#else
0
#endif
)
) ? AlignedBit : 0,
packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
};
public:
enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit };
};
template<int _Rows, int _Cols> struct size_at_compile_time
{
enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };

View File

@ -217,8 +217,8 @@ struct traits<Quaternion<_Scalar,_Options> >
typedef _Scalar Scalar;
typedef Matrix<_Scalar,4,1,_Options> Coefficients;
enum{
IsAligned = (internal::traits<Coefficients>::EvaluatorFlags & AlignedBit) != 0,
Flags = IsAligned ? (AlignedBit | LvalueBit) : LvalueBit
Alignment = internal::traits<Coefficients>::Alignment,
Flags = LvalueBit
};
};
}
@ -228,7 +228,7 @@ class Quaternion : public QuaternionBase<Quaternion<_Scalar,_Options> >
{
public:
typedef QuaternionBase<Quaternion<_Scalar,_Options> > Base;
enum { IsAligned = internal::traits<Quaternion>::IsAligned };
enum { NeedsAlignment = internal::traits<Quaternion>::Alignment>0 };
typedef _Scalar Scalar;
@ -277,7 +277,7 @@ public:
inline Coefficients& coeffs() { return m_coeffs;}
inline const Coefficients& coeffs() const { return m_coeffs;}
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(IsAligned)
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsAlignment)
protected:
Coefficients m_coeffs;
@ -441,7 +441,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
return internal::quat_product<Architecture::Target, Derived, OtherDerived,
typename internal::traits<Derived>::Scalar,
(internal::traits<Derived>::IsAligned && internal::traits<OtherDerived>::IsAligned)?Aligned:Unaligned>::run(*this, other);
EIGEN_PLAIN_ENUM_MIN(internal::traits<Derived>::Alignment, internal::traits<OtherDerived>::Alignment)>::run(*this, other);
}
/** \sa operator*(Quaternion) */
@ -668,7 +668,7 @@ QuaternionBase<Derived>::conjugate() const
{
return internal::quat_conj<Architecture::Target, Derived,
typename internal::traits<Derived>::Scalar,
internal::traits<Derived>::IsAligned?Aligned:Unaligned>::run(*this);
internal::traits<Derived>::Alignment>::run(*this);
}

View File

@ -16,14 +16,14 @@ namespace Eigen {
namespace internal {
template<class Derived, class OtherDerived>
struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned>
struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
{
static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
{
Quaternion<float> res;
const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
__m128 a = _a.coeffs().template packet<Aligned>(0);
__m128 b = _b.coeffs().template packet<Aligned>(0);
__m128 a = _a.coeffs().template packet<Aligned16>(0);
__m128 b = _b.coeffs().template packet<Aligned16>(0);
__m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
__m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
pstore(&res.x(),
@ -55,8 +55,8 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
static inline typename plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs)
{
__m128 a = lhs.template packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 b = rhs.template packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 a = lhs.template packet<traits<VectorLhs>::Alignment>(0);
__m128 b = rhs.template packet<traits<VectorRhs>::Alignment>(0);
__m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
__m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
typename plain_matrix_type<VectorLhs>::type res;

View File

@ -263,7 +263,7 @@ namespace internal {
* \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
*/
template<typename VectorX, typename VectorY, typename OtherScalar>
void apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, const JacobiRotation<OtherScalar>& j);
void apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j);
}
/** \jacobi_module
@ -298,18 +298,18 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR
namespace internal {
template<typename VectorX, typename VectorY, typename OtherScalar>
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, const JacobiRotation<OtherScalar>& j)
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
{
typedef typename VectorX::Scalar Scalar;
enum { PacketSize = packet_traits<Scalar>::size };
typedef typename packet_traits<Scalar>::type Packet;
eigen_assert(_x.size() == _y.size());
Index size = _x.size();
Index incrx = _x.innerStride();
Index incry = _y.innerStride();
eigen_assert(xpr_x.size() == xpr_y.size());
Index size = xpr_x.size();
Index incrx = xpr_x.derived().innerStride();
Index incry = xpr_y.derived().innerStride();
Scalar* EIGEN_RESTRICT x = &_x.coeffRef(0);
Scalar* EIGEN_RESTRICT y = &_y.coeffRef(0);
Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);
Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);
OtherScalar c = j.c();
OtherScalar s = j.s();
@ -392,7 +392,7 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(VectorX& _x, VectorY& _y,
/*** fixed-size vectorized path ***/
else if(VectorX::SizeAtCompileTime != Dynamic &&
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
(VectorX::Flags & VectorY::Flags & AlignedBit))
(EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
{
const Packet pc = pset1<Packet>(c);
const Packet ps = pset1<Packet>(s);

View File

@ -35,8 +35,8 @@ template<typename MatrixType, typename ResultType>
struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
{
enum {
MatrixAlignment = bool(MatrixType::Flags&AlignedBit),
ResultAlignment = bool(ResultType::Flags&AlignedBit),
MatrixAlignment = traits<MatrixType>::Alignment,
ResultAlignment = traits<ResultType>::Alignment,
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
};
typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
@ -165,8 +165,8 @@ template<typename MatrixType, typename ResultType>
struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
{
enum {
MatrixAlignment = bool(MatrixType::Flags&AlignedBit),
ResultAlignment = bool(ResultType::Flags&AlignedBit),
MatrixAlignment = traits<MatrixType>::Alignment,
ResultAlignment = traits<ResultType>::Alignment,
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
};
typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;

View File

@ -41,7 +41,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, Diagonal
typedef Product<Lhs, Rhs, DefaultProduct> XprType;
typedef evaluator<XprType> type;
typedef evaluator<XprType> nestedType;
enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags
enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags
typedef sparse_diagonal_product_evaluator<Rhs, typename Lhs::DiagonalVectorType, Rhs::Flags&RowMajorBit?SDP_AsScalarProduct:SDP_AsCwiseProduct> Base;
explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}
@ -54,14 +54,14 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, SparseSh
typedef Product<Lhs, Rhs, DefaultProduct> XprType;
typedef evaluator<XprType> type;
typedef evaluator<XprType> nestedType;
enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags
enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags
typedef sparse_diagonal_product_evaluator<Lhs, Transpose<const typename Rhs::DiagonalVectorType>, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> Base;
explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal().transpose()) {}
};
template<typename SparseXprType, typename DiagonalCoeffType>
struct sparse_diagonal_product_evaluator<SparseXprType, DiagonalCoeffType, SDP_AsScalarProduct>
struct sparse_diagonal_product_evaluator<SparseXprType, DiagonalCoeffType, SDP_AsScalarProduct>
{
protected:
typedef typename evaluator<SparseXprType>::InnerIterator SparseXprInnerIterator;
@ -92,7 +92,7 @@ protected:
template<typename SparseXprType, typename DiagCoeffType>
struct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwiseProduct>
struct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwiseProduct>
{
typedef typename SparseXprType::Scalar Scalar;

View File

@ -25,15 +25,15 @@ template<typename VectorType> void map_class_vector(const VectorType& m)
Scalar* array1 = internal::aligned_new<Scalar>(size);
Scalar* array2 = internal::aligned_new<Scalar>(size);
Scalar* array3 = new Scalar[size+1];
Scalar* array3unaligned = size_t(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3;
Scalar* array3unaligned = (std::size_t(array3)%EIGEN_MAX_ALIGN_BYTES) == 0 ? array3+1 : array3;
Scalar array4[EIGEN_TESTMAP_MAX_SIZE];
Map<VectorType, Aligned>(array1, size) = VectorType::Random(size);
Map<VectorType, Aligned>(array2, size) = Map<VectorType,Aligned>(array1, size);
Map<VectorType, AlignedMax>(array1, size) = VectorType::Random(size);
Map<VectorType, AlignedMax>(array2, size) = Map<VectorType,AlignedMax>(array1, size);
Map<VectorType>(array3unaligned, size) = Map<VectorType>(array1, size);
Map<VectorType>(array4, size) = Map<VectorType,Aligned>(array1, size);
VectorType ma1 = Map<VectorType, Aligned>(array1, size);
VectorType ma2 = Map<VectorType, Aligned>(array2, size);
Map<VectorType>(array4, size) = Map<VectorType,AlignedMax>(array1, size);
VectorType ma1 = Map<VectorType, AlignedMax>(array1, size);
VectorType ma2 = Map<VectorType, AlignedMax>(array2, size);
VectorType ma3 = Map<VectorType>(array3unaligned, size);
VectorType ma4 = Map<VectorType>(array4, size);
VERIFY_IS_EQUAL(ma1, ma2);
@ -41,7 +41,7 @@ template<typename VectorType> void map_class_vector(const VectorType& m)
VERIFY_IS_EQUAL(ma1, ma4);
#ifdef EIGEN_VECTORIZE
if(internal::packet_traits<Scalar>::Vectorizable)
VERIFY_RAISES_ASSERT((Map<VectorType,Aligned>(array3unaligned, size)))
VERIFY_RAISES_ASSERT((Map<VectorType,AlignedMax>(array3unaligned, size)))
#endif
internal::aligned_delete(array1, size);
@ -71,7 +71,7 @@ template<typename MatrixType> void map_class_matrix(const MatrixType& m)
for(int i = 0; i < size; i++) array4[i] = Scalar(1);
Map<MatrixType> map1(array1, rows, cols);
Map<MatrixType, Aligned> map2(array2, rows, cols);
Map<MatrixType, AlignedMax> map2(array2, rows, cols);
Map<MatrixType> map3(array3unaligned, rows, cols);
Map<MatrixType> map4(array4, rows, cols);
@ -154,9 +154,9 @@ template<typename PlainObjectType> void check_const_correctness(const PlainObjec
// verify that map-to-const don't have LvalueBit
typedef typename internal::add_const<PlainObjectType>::type ConstPlainObjectType;
VERIFY( !(internal::traits<Map<ConstPlainObjectType> >::Flags & LvalueBit) );
VERIFY( !(internal::traits<Map<ConstPlainObjectType, Aligned> >::Flags & LvalueBit) );
VERIFY( !(internal::traits<Map<ConstPlainObjectType, AlignedMax> >::Flags & LvalueBit) );
VERIFY( !(Map<ConstPlainObjectType>::Flags & LvalueBit) );
VERIFY( !(Map<ConstPlainObjectType, Aligned>::Flags & LvalueBit) );
VERIFY( !(Map<ConstPlainObjectType, AlignedMax>::Flags & LvalueBit) );
}
template<typename Scalar>

View File

@ -162,12 +162,12 @@ void unalignedassert()
}
for(int b=8; b<EIGEN_MAX_ALIGN_BYTES; b+=8)
{
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(b));
VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4f>(b));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(b));
VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix2d>(b));
VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4d>(b));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cd>(b));
if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(b));
if(b<64) VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4f>(b));
if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(b));
if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix2d>(b));
if(b<128) VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4d>(b));
if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cd>(b));
}
#endif
}

View File

@ -35,7 +35,6 @@ std::string demangle_flags(int f)
if(f&LinearAccessBit) res += " | Linear";
if(f&LvalueBit) res += " | Lvalue";
if(f&DirectAccessBit) res += " | Direct";
if(f&AlignedBit) res += " | Aligned";
if(f&NestByRefBit) res += " | NestByRef";
if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit";
@ -204,12 +203,12 @@ template<typename Scalar, bool Enable = internal::packet_traits<Scalar>::Vectori
LinearVectorizedTraversal,CompleteUnrolling));
VERIFY((test_assign<
Map<Matrix22, Aligned, OuterStride<3*PacketSize> >,
Map<Matrix22, AlignedMax, OuterStride<3*PacketSize> >,
Matrix22
>(InnerVectorizedTraversal,CompleteUnrolling)));
VERIFY((test_assign<
Map<Matrix<Scalar,EIGEN_PLAIN_ENUM_MAX(2,PacketSize),EIGEN_PLAIN_ENUM_MAX(2,PacketSize)>, Aligned, InnerStride<3*PacketSize> >,
Map<Matrix<Scalar,EIGEN_PLAIN_ENUM_MAX(2,PacketSize),EIGEN_PLAIN_ENUM_MAX(2,PacketSize)>, AlignedMax, InnerStride<3*PacketSize> >,
Matrix<Scalar,EIGEN_PLAIN_ENUM_MAX(2,PacketSize),EIGEN_PLAIN_ENUM_MAX(2,PacketSize)>
>(DefaultTraversal,CompleteUnrolling)));