mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-15 07:10:37 +08:00
merge
This commit is contained in:
commit
a4f956b1da
@ -105,7 +105,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|
||||
OuterStrideMatch = Derived::IsVectorAtCompileTime
|
||||
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
|
||||
AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
|
||||
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch
|
||||
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
|
||||
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
|
||||
};
|
||||
typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
|
||||
};
|
||||
@ -184,9 +185,11 @@ protected:
|
||||
template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
|
||||
{
|
||||
private:
|
||||
typedef internal::traits<Ref> Traits;
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr);
|
||||
EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
|
||||
public:
|
||||
|
||||
typedef RefBase<Ref> Base;
|
||||
@ -195,13 +198,15 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr)
|
||||
EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
|
||||
Base::construct(expr.derived());
|
||||
}
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr)
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
#else
|
||||
template<typename Derived>
|
||||
inline Ref(DenseBase<Derived>& expr)
|
||||
@ -228,7 +233,8 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr)
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
|
||||
{
|
||||
// std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
|
||||
// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
|
||||
|
@ -213,8 +213,22 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
|
||||
// Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2.
|
||||
// The second half is implicitly reserved to access the result and lhs coefficients.
|
||||
// When k<max_kc, then nc can arbitrarily growth. In practice, it seems to be fruitful
|
||||
// to limit this growth: we bound nc to growth by a factor x1.5, leading to:
|
||||
const Index max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
|
||||
// to limit this growth: we bound nc to growth by a factor x1.5.
|
||||
// However, if the entire lhs block fit within L1, then we are not going to block on the rows at all,
|
||||
// and it becomes fruitful to keep the packed rhs blocks in L1 if there is enough remaining space.
|
||||
Index max_nc;
|
||||
const Index lhs_bytes = m * k * sizeof(LhsScalar);
|
||||
const Index remaining_l1 = l1- k_sub - lhs_bytes;
|
||||
if(remaining_l1 >= Index(Traits::nr*sizeof(RhsScalar))*k)
|
||||
{
|
||||
// L1 blocking
|
||||
max_nc = remaining_l1 / (k*sizeof(RhsScalar));
|
||||
}
|
||||
else
|
||||
{
|
||||
// L2 blocking
|
||||
max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
|
||||
}
|
||||
// WARNING Below, we assume that Traits::nr is a power of two.
|
||||
Index nc = std::min<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));
|
||||
if(n>nc)
|
||||
@ -230,6 +244,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
|
||||
{
|
||||
// So far, no blocking at all, i.e., kc==k, and nc==n.
|
||||
// In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2
|
||||
// TODO: part of this blocking strategy is now implemented within the kernel itself, so the L1-based heuristic here should be obsolete.
|
||||
Index problem_size = k*n*sizeof(LhsScalar);
|
||||
Index actual_lm = actual_l2;
|
||||
Index max_mc = m;
|
||||
@ -951,33 +966,32 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
// This corresponds to 3*LhsProgress x nr register blocks.
|
||||
// Usually, make sense only with FMA
|
||||
if(mr>=3*Traits::LhsProgress)
|
||||
{
|
||||
#ifdef EIGEN_TEST_SPECIFIC_LOOP_SWAP_CRITERION
|
||||
const bool swap_loops = EIGEN_TEST_SPECIFIC_LOOP_SWAP_CRITERION;
|
||||
#else
|
||||
const bool swap_loops = depth<48;
|
||||
#endif
|
||||
|
||||
Index bound1 = swap_loops ? packet_cols4 : peeled_mc3;
|
||||
Index bound2 = !swap_loops ? packet_cols4 : peeled_mc3;
|
||||
Index incr1 = swap_loops ? nr : 3*Traits::LhsProgress;
|
||||
Index incr2 = !swap_loops ? nr : 3*Traits::LhsProgress;
|
||||
|
||||
{
|
||||
PossiblyRotatingKernelHelper<gebp_kernel> possiblyRotatingKernelHelper(traits);
|
||||
|
||||
// loops on each largest micro horizontal panel of lhs (3*Traits::LhsProgress x depth)
|
||||
// and on each largest micro vertical panel of rhs (depth * nr)
|
||||
for(Index it1=0; it1<bound1; it1+=incr1)
|
||||
|
||||
// Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth)
|
||||
// and on each largest micro vertical panel of the rhs (depth * nr).
|
||||
// Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.
|
||||
// However, if depth is too small, we can extend the number of rows of these horizontal panels.
|
||||
// This actual number of rows is computed as follow:
|
||||
const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function.
|
||||
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
|
||||
const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
|
||||
#else
|
||||
const Index actual_panel_rows = (3*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) );
|
||||
#endif
|
||||
for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
|
||||
{
|
||||
for(Index it2=0; it2<bound2; it2+=incr2)
|
||||
const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
|
||||
for(Index j2=0; j2<packet_cols4; j2+=nr)
|
||||
{
|
||||
Index i = swap_loops ? it2 : it1;
|
||||
Index j2 = !swap_loops ? it2 : it1;
|
||||
for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
|
||||
{
|
||||
|
||||
// We select a 3*Traits::LhsProgress x nr micro block of res which is entirely
|
||||
// We selected a 3*Traits::LhsProgress x nr micro block of res which is entirely
|
||||
// stored into 3 x nr registers.
|
||||
|
||||
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
|
||||
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];
|
||||
prefetch(&blA[0]);
|
||||
|
||||
// gets res block as register
|
||||
@ -1109,16 +1123,15 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
traits.acc(C11, alphav, R2);
|
||||
r3.storePacket(0 * Traits::ResPacketSize, R0);
|
||||
r3.storePacket(1 * Traits::ResPacketSize, R1);
|
||||
r3.storePacket(2 * Traits::ResPacketSize, R2);
|
||||
r3.storePacket(2 * Traits::ResPacketSize, R2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deal with remaining columns of the rhs
|
||||
if(packet_cols4<cols)
|
||||
for(Index i=0; i<peeled_mc3; i+=3*Traits::LhsProgress)
|
||||
{
|
||||
// Deal with remaining columns of the rhs
|
||||
for(Index j2=packet_cols4; j2<cols; j2++)
|
||||
{
|
||||
for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
|
||||
{
|
||||
// One column at a time
|
||||
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
|
||||
prefetch(&blA[0]);
|
||||
@ -1189,7 +1202,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
traits.acc(C8, alphav, R2);
|
||||
r0.storePacket(0 * Traits::ResPacketSize, R0);
|
||||
r0.storePacket(1 * Traits::ResPacketSize, R1);
|
||||
r0.storePacket(2 * Traits::ResPacketSize, R2);
|
||||
r0.storePacket(2 * Traits::ResPacketSize, R2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1197,26 +1211,21 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
//---------- Process 2 * LhsProgress rows at once ----------
|
||||
if(mr>=2*Traits::LhsProgress)
|
||||
{
|
||||
#ifdef EIGEN_TEST_SPECIFIC_LOOP_SWAP_CRITERION
|
||||
const bool swap_loops = (mr<3*Traits::LhsProgress) && (EIGEN_TEST_SPECIFIC_LOOP_SWAP_CRITERION);
|
||||
const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function.
|
||||
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
|
||||
Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
|
||||
#else
|
||||
const bool swap_loops = (mr<3*Traits::LhsProgress) && (depth<48);
|
||||
#endif
|
||||
Index start1 = swap_loops ? 0 : peeled_mc3;
|
||||
Index start2 = !swap_loops ? 0 : peeled_mc3;
|
||||
Index bound1 = swap_loops ? packet_cols4 : peeled_mc2;
|
||||
Index bound2 = !swap_loops ? packet_cols4 : peeled_mc2;
|
||||
Index incr1 = swap_loops ? nr : 2*Traits::LhsProgress;
|
||||
Index incr2 = !swap_loops ? nr : 2*Traits::LhsProgress;
|
||||
|
||||
for(Index it1=start1; it1<bound1; it1+=incr1)
|
||||
Index actual_panel_rows = (2*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) );
|
||||
#endif
|
||||
for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
|
||||
{
|
||||
for(Index it2=start2; it2<bound2; it2+=incr2)
|
||||
Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
|
||||
for(Index j2=0; j2<packet_cols4; j2+=nr)
|
||||
{
|
||||
Index i = swap_loops ? it2 : it1;
|
||||
Index j2 = !swap_loops ? it2 : it1;
|
||||
for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
|
||||
{
|
||||
|
||||
// We select a 2*Traits::LhsProgress x nr micro block of res which is entirely
|
||||
// We selected a 2*Traits::LhsProgress x nr micro block of res which is entirely
|
||||
// stored into 2 x nr registers.
|
||||
|
||||
const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
|
||||
@ -1320,15 +1329,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
r2.storePacket(1 * Traits::ResPacketSize, R1);
|
||||
r3.storePacket(0 * Traits::ResPacketSize, R2);
|
||||
r3.storePacket(1 * Traits::ResPacketSize, R3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deal with remaining columns of the rhs
|
||||
if(packet_cols4<cols)
|
||||
for(Index i=peeled_mc3; i<peeled_mc2; i+=2*Traits::LhsProgress)
|
||||
{
|
||||
// Deal with remaining columns of the rhs
|
||||
for(Index j2=packet_cols4; j2<cols; j2++)
|
||||
{
|
||||
for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
|
||||
{
|
||||
// One column at a time
|
||||
const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
|
||||
prefetch(&blA[0]);
|
||||
@ -1395,6 +1403,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
traits.acc(C4, alphav, R1);
|
||||
r0.storePacket(0 * Traits::ResPacketSize, R0);
|
||||
r0.storePacket(1 * Traits::ResPacketSize, R1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -457,6 +457,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
||||
static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
|
||||
{
|
||||
eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
|
||||
if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
|
||||
return;
|
||||
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
|
||||
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
|
||||
|
@ -203,7 +203,7 @@ void benchmark_t::run()
|
||||
|
||||
double starttime = timer.getCpuTime();
|
||||
for (int i = 0; i < iters_at_a_time; i++) {
|
||||
dst[matrix_index] = lhs[matrix_index] * rhs[matrix_index];
|
||||
dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
|
||||
matrix_index++;
|
||||
if (matrix_index == matrix_pool_size) {
|
||||
matrix_index = 0;
|
||||
|
@ -1,39 +1,43 @@
|
||||
3.0.1
|
||||
3.1.1
|
||||
3.2.0
|
||||
#3.0.1
|
||||
#3.1.1
|
||||
#3.2.0
|
||||
3.2.4
|
||||
5745:37f59e65eb6c
|
||||
5891:d8652709345d
|
||||
5893:24b4dc92c6d3
|
||||
5895:997c2ef9fc8b
|
||||
5904:e1eafd14eaa1
|
||||
5908:f8ee3c721251
|
||||
5921:ca808bb456b0
|
||||
5927:8b1001f9e3ac
|
||||
5937:5a4ca1ad8c53
|
||||
5949:f3488f4e45b2
|
||||
5969:e09031dccfd9
|
||||
5992:4a429f5e0483
|
||||
#5745:37f59e65eb6c
|
||||
5891:d8652709345d # introduce AVX
|
||||
#5893:24b4dc92c6d3 # merge
|
||||
5895:997c2ef9fc8b # introduce FMA
|
||||
#5904:e1eafd14eaa1 # complex and AVX
|
||||
5908:f8ee3c721251 # improve packing with ptranspose
|
||||
#5921:ca808bb456b0 # merge
|
||||
#5927:8b1001f9e3ac
|
||||
5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks
|
||||
#5949:f3488f4e45b2 # merge
|
||||
#5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec
|
||||
#5992:4a429f5e0483 # merge
|
||||
before-evaluators
|
||||
6334:f6a45e5b8b7c
|
||||
6639:c9121c60b5c7
|
||||
6655:06f163b5221f
|
||||
6677:700e023044e7 # FMA has been wrongly disabled
|
||||
6681:11d31dafb0e3
|
||||
6699:5e6e8e10aad1 # merge default to tensors
|
||||
6726:ff2d2388e7b9 # merge default to tensors
|
||||
6742:0cbd6195e829 # merge default to tensors
|
||||
6747:853d2bafeb8f # Generalized the gebp apis
|
||||
#6334:f6a45e5b8b7c # Implement evaluator for sparse outer products
|
||||
#6639:c9121c60b5c7
|
||||
#6655:06f163b5221f # Properly detect FMA support on ARM
|
||||
#6677:700e023044e7 # FMA has been wrongly disabled
|
||||
#6681:11d31dafb0e3
|
||||
#6699:5e6e8e10aad1 # merge default to tensors
|
||||
#6726:ff2d2388e7b9 # merge default to tensors
|
||||
#6742:0cbd6195e829 # merge default to tensors
|
||||
#6747:853d2bafeb8f # Generalized the gebp apis
|
||||
6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation
|
||||
6781:9cc5a931b2c6 # generalized gemv
|
||||
6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product
|
||||
6844:039efd86b75c # merge tensor
|
||||
#6781:9cc5a931b2c6 # generalized gemv
|
||||
#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product
|
||||
#6844:039efd86b75c # merge tensor
|
||||
6845:7333ed40c6ef # change prefetching in gebp
|
||||
6856:b5be5e10eb7f # merge index conversion
|
||||
6893:c3a64aba7c70 # clean blocking size computation
|
||||
6898:6fb31ebe6492 # rotating kernel for ARM
|
||||
#6856:b5be5e10eb7f # merge index conversion
|
||||
#6893:c3a64aba7c70 # clean blocking size computation
|
||||
#6898:6fb31ebe6492 # rotating kernel for ARM
|
||||
6899:877facace746 # rotating kernel for ARM only
|
||||
6904:c250623ae9fa # result_of
|
||||
#6904:c250623ae9fa # result_of
|
||||
6921:915f1b1fc158 # fix prefetching change for ARM
|
||||
6923:9ff25f6dacc6 # prefetching
|
||||
6933:52572e60b5d3 # blocking size strategy
|
||||
6933:52572e60b5d3 # blocking size strategy
|
||||
6937:c8c042f286b2 # avoid redundant pack_rhs
|
||||
6981:7e5d6f78da59 # dynamic loop swapping
|
||||
6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache
|
||||
6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1.
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
# Options:
|
||||
# -up : enforce the recomputation of existing data, and keep best results as a merging strategy
|
||||
# -s : recompute selected changesets only and keep bests
|
||||
|
||||
|
||||
if echo "$*" | grep '\-up' > /dev/null; then
|
||||
@ -14,14 +15,30 @@ else
|
||||
update=false
|
||||
fi
|
||||
|
||||
if [ $update == true ]; then
|
||||
if echo "$*" | grep '\-s' > /dev/null; then
|
||||
selected=true
|
||||
else
|
||||
selected=false
|
||||
fi
|
||||
|
||||
global_args="$*"
|
||||
|
||||
if [ $selected == true ]; then
|
||||
echo "Recompute selected changesets only and keep bests"
|
||||
elif [ $update == true ]; then
|
||||
echo "(Re-)Compute all changesets and keep bests"
|
||||
else
|
||||
echo "Skip previously computed changesets"
|
||||
fi
|
||||
|
||||
|
||||
|
||||
if [ ! -d "eigen_src" ]; then
|
||||
hg clone https://bitbucket.org/eigen/eigen eigen_src
|
||||
else
|
||||
cd eigen_src
|
||||
hg pull -u
|
||||
cd ..
|
||||
fi
|
||||
|
||||
if [ ! -z '$CXX' ]; then
|
||||
@ -61,17 +78,31 @@ function test_current
|
||||
scalar=$2
|
||||
name=$3
|
||||
|
||||
prev=`grep $rev "$name.backup" | cut -c 14-`
|
||||
prev=""
|
||||
if [ -e "$name.backup" ]; then
|
||||
prev=`grep $rev "$name.backup" | cut -c 14-`
|
||||
fi
|
||||
res=$prev
|
||||
count_rev=`echo $prev | wc -w`
|
||||
count_ref=`cat "settings.txt" | wc -l`
|
||||
if [ $update == true ] || [ $count_rev != $count_ref ]; then
|
||||
if echo "$global_args" | grep "$rev" > /dev/null; then
|
||||
rev_found=true
|
||||
else
|
||||
rev_found=false
|
||||
fi
|
||||
# echo $update et $selected et $rev_found because $rev et "$global_args"
|
||||
# echo $count_rev et $count_ref
|
||||
if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then
|
||||
if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src gemm.cpp -DSCALAR=$scalar -o $name; then
|
||||
curr=`./$name`
|
||||
echo merge $prev
|
||||
echo with $curr
|
||||
if [ $count_rev == $count_ref ]; then
|
||||
echo "merge previous $prev"
|
||||
echo "with new $curr"
|
||||
else
|
||||
echo "got $curr"
|
||||
fi
|
||||
res=`merge "$curr" "$prev"`
|
||||
echo $res
|
||||
# echo $res
|
||||
echo "$rev $res" >> $name.out
|
||||
else
|
||||
echo "Compilation failed, skip rev $rev"
|
||||
@ -86,12 +117,12 @@ make_backup $PREFIX"sgemm"
|
||||
make_backup $PREFIX"dgemm"
|
||||
make_backup $PREFIX"cgemm"
|
||||
|
||||
cut -f1 -d"#" < changesets.txt | while read rev
|
||||
cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev
|
||||
do
|
||||
if [ ! -z '$rev' ]; then
|
||||
echo "Testing rev $rev"
|
||||
cd eigen_src
|
||||
hg up -C $rev
|
||||
hg up -C $rev > /dev/null
|
||||
actual_rev=`hg identify | cut -f1 -d' '`
|
||||
cd ..
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
8 8 8
|
||||
9 9 9
|
||||
24 24 24
|
||||
239 239 239
|
||||
240 240 240
|
||||
2400 24 24
|
||||
@ -8,4 +9,7 @@
|
||||
24 2400 2400
|
||||
2400 24 2400
|
||||
2400 2400 24
|
||||
2400 2400 64
|
||||
4800 23 160
|
||||
23 4800 160
|
||||
2400 2400 2400
|
||||
|
@ -109,8 +109,33 @@ void mat_mat_scalar_scalar_product()
|
||||
double det = 6.0, wt = 0.5;
|
||||
VERIFY_IS_APPROX(dNdxy.transpose()*dNdxy*det*wt, det*wt*dNdxy.transpose()*dNdxy);
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
void zero_sized_objects(const MatrixType& m)
|
||||
{
|
||||
Index rows = m.rows();
|
||||
Index cols = m.cols();
|
||||
|
||||
void zero_sized_objects()
|
||||
{
|
||||
MatrixType res, a(rows,0), b(0,cols);
|
||||
VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(rows,cols) );
|
||||
VERIFY_IS_APPROX( (res=a*a.transpose()), MatrixType::Zero(rows,rows) );
|
||||
VERIFY_IS_APPROX( (res=b.transpose()*b), MatrixType::Zero(cols,cols) );
|
||||
VERIFY_IS_APPROX( (res=b.transpose()*a.transpose()), MatrixType::Zero(cols,rows) );
|
||||
}
|
||||
|
||||
{
|
||||
MatrixType res, a(rows,cols), b(cols,0);
|
||||
res = a*b;
|
||||
VERIFY(res.rows()==rows && res.cols()==0);
|
||||
b.resize(0,rows);
|
||||
res = b*a;
|
||||
VERIFY(res.rows()==0 && res.cols()==cols);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void bug_127()
|
||||
{
|
||||
// Bug 127
|
||||
//
|
||||
@ -171,7 +196,8 @@ void test_product_extra()
|
||||
CALL_SUBTEST_2( mat_mat_scalar_scalar_product() );
|
||||
CALL_SUBTEST_3( product_extra(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
|
||||
CALL_SUBTEST_4( product_extra(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
|
||||
CALL_SUBTEST_1( zero_sized_objects(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
}
|
||||
CALL_SUBTEST_5( zero_sized_objects() );
|
||||
CALL_SUBTEST_5( bug_127() );
|
||||
CALL_SUBTEST_6( unaligned_objects() );
|
||||
}
|
||||
|
24
test/ref.cpp
24
test/ref.cpp
@ -228,6 +228,28 @@ void call_ref()
|
||||
VERIFY_EVALUATION_COUNT( call_ref_7(c,c), 0);
|
||||
}
|
||||
|
||||
typedef Matrix<double,Dynamic,Dynamic,RowMajor> RowMatrixXd;
|
||||
int test_ref_overload_fun1(Ref<MatrixXd> ) { return 1; }
|
||||
int test_ref_overload_fun1(Ref<RowMatrixXd> ) { return 2; }
|
||||
int test_ref_overload_fun1(Ref<MatrixXf> ) { return 3; }
|
||||
|
||||
int test_ref_overload_fun2(Ref<const MatrixXd> ) { return 4; }
|
||||
int test_ref_overload_fun2(Ref<const MatrixXf> ) { return 5; }
|
||||
|
||||
// See also bug 969
|
||||
void test_ref_overloads()
|
||||
{
|
||||
MatrixXd Ad, Bd;
|
||||
RowMatrixXd rAd, rBd;
|
||||
VERIFY( test_ref_overload_fun1(Ad)==1 );
|
||||
VERIFY( test_ref_overload_fun1(rAd)==2 );
|
||||
|
||||
MatrixXf Af, Bf;
|
||||
VERIFY( test_ref_overload_fun2(Ad)==4 );
|
||||
VERIFY( test_ref_overload_fun2(Ad+Bd)==4 );
|
||||
VERIFY( test_ref_overload_fun2(Af+Bf)==5 );
|
||||
}
|
||||
|
||||
void test_ref()
|
||||
{
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
@ -248,4 +270,6 @@ void test_ref()
|
||||
CALL_SUBTEST_5( ref_matrix(MatrixXi(internal::random<int>(1,10),internal::random<int>(1,10))) );
|
||||
CALL_SUBTEST_6( call_ref() );
|
||||
}
|
||||
|
||||
CALL_SUBTEST_7( test_ref_overloads() );
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user