Optimization: enable cache-efficient application of HouseholderSequence.

This commit is contained in:
Gael Guennebaud 2014-09-04 09:15:59 +02:00
parent 80993b95d3
commit 8846aa6d1b
4 changed files with 89 additions and 28 deletions

View File

@ -16,48 +16,85 @@
namespace Eigen {
namespace internal {
/** \internal */
// template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
// void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
// {
// typedef typename TriangularFactorType::Index Index;
// typedef typename VectorsType::Scalar Scalar;
// const Index nbVecs = vectors.cols();
// eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
//
// for(Index i = 0; i < nbVecs; i++)
// {
// Index rs = vectors.rows() - i;
// // Warning, note that hCoeffs may alias with vectors.
// // It is then necessary to copy it before modifying vectors(i,i).
// typename CoeffsType::Scalar h = hCoeffs(i);
// // This hack permits to pass trough nested Block<> and Transpose<> expressions.
// Scalar *Vii_ptr = const_cast<Scalar*>(vectors.data() + vectors.outerStride()*i + vectors.innerStride()*i);
// Scalar Vii = *Vii_ptr;
// *Vii_ptr = Scalar(1);
// triFactor.col(i).head(i).noalias() = -h * vectors.block(i, 0, rs, i).adjoint()
// * vectors.col(i).tail(rs);
// *Vii_ptr = Vii;
// // FIXME add .noalias() once the triangular product can work inplace
// triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()
// * triFactor.col(i).head(i);
// triFactor(i,i) = hCoeffs(i);
// }
// }
/** \internal */
// This variant avoid modifications in vectors
template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
{
typedef typename TriangularFactorType::Index Index;
typedef typename VectorsType::Scalar Scalar;
const Index nbVecs = vectors.cols();
eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
for(Index i = 0; i < nbVecs; i++)
for(Index i = nbVecs-1; i >=0 ; --i)
{
Index rs = vectors.rows() - i;
Scalar Vii = vectors(i,i);
vectors.const_cast_derived().coeffRef(i,i) = Scalar(1);
triFactor.col(i).head(i).noalias() = -hCoeffs(i) * vectors.block(i, 0, rs, i).adjoint()
* vectors.col(i).tail(rs);
vectors.const_cast_derived().coeffRef(i, i) = Vii;
// FIXME add .noalias() once the triangular product can work inplace
triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()
* triFactor.col(i).head(i);
Index rs = vectors.rows() - i - 1;
Index rt = nbVecs-i-1;
if(rt>0)
{
triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint()
* vectors.bottomRightCorner(rs, rt).template triangularView<UnitLower>();
// FIXME add .noalias() once the triangular product can work inplace
triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView<Upper>();
}
triFactor(i,i) = hCoeffs(i);
}
}
/** \internal */
/** \internal
* if forward then perform mat = H0 * H1 * H2 * mat
* otherwise perform mat = H2 * H1 * H0 * mat
*/
template<typename MatrixType,typename VectorsType,typename CoeffsType>
void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs)
void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs, bool forward)
{
typedef typename MatrixType::Index Index;
enum { TFactorSize = MatrixType::ColsAtCompileTime };
Index nbVecs = vectors.cols();
Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, ColMajor> T(nbVecs,nbVecs);
make_block_householder_triangular_factor(T, vectors, hCoeffs);
const TriangularView<const VectorsType, UnitLower>& V(vectors);
Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, RowMajor> T(nbVecs,nbVecs);
if(forward) make_block_householder_triangular_factor(T, vectors, hCoeffs);
else make_block_householder_triangular_factor(T, vectors, hCoeffs.conjugate());
const TriangularView<const VectorsType, UnitLower> V(vectors);
// A -= V T V^* A
Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,0,
VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat;
// FIXME add .noalias() once the triangular product can work inplace
tmp = T.template triangularView<Upper>().adjoint() * tmp;
if(forward) tmp = T.template triangularView<Upper>() * tmp;
else tmp = T.template triangularView<Upper>().adjoint() * tmp;
mat.noalias() -= V * tmp;
}

View File

@ -319,12 +319,36 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
template<typename Dest, typename Workspace>
inline void applyThisOnTheLeft(Dest& dst, Workspace& workspace) const
{
workspace.resize(dst.cols());
for(Index k = 0; k < m_length; ++k)
const Index BlockSize = 1;
// if the entries are large enough, then apply the reflectors by block
if(m_length>BlockSize && dst.cols()>1)
{
Index actual_k = m_trans ? k : m_length-k-1;
dst.bottomRows(rows()-m_shift-actual_k)
.applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
for(Index i = 0; i < m_length; i+=BlockSize)
{
Index end = m_trans ? (std::min)(m_length,i+BlockSize) : m_length-i;
Index k = m_trans ? i : (std::max)(Index(0),end-BlockSize);
Index bs = end-k;
Index start = k + m_shift;
typedef Block<typename internal::remove_all<VectorsType>::type,Dynamic,Dynamic> SubVectorsType;
SubVectorsType sub_vecs1(m_vectors.const_cast_derived(), Side==OnTheRight ? k : start,
Side==OnTheRight ? start : k,
Side==OnTheRight ? bs : m_vectors.rows()-start,
Side==OnTheRight ? m_vectors.cols()-start : bs);
typename internal::conditional<Side==OnTheRight, Transpose<SubVectorsType>, SubVectorsType&>::type sub_vecs(sub_vecs1);
Block<Dest,Dynamic,Dynamic> sub_dst(dst,dst.rows()-rows()+m_shift+k,0, rows()-m_shift-k,dst.cols());
apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_trans);
}
}
else
{
workspace.resize(dst.cols());
for(Index k = 0; k < m_length; ++k)
{
Index actual_k = m_trans ? k : m_length-k-1;
dst.bottomRows(rows()-m_shift-actual_k)
.applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
}
}
}

View File

@ -299,8 +299,8 @@ struct householder_qr_inplace_blocked
for (k = 0; k < size; k += blockSize)
{
Index bs = (std::min)(size-k,blockSize); // actual size of the block
Index tcols = cols - k - bs; // trailing columns
Index brows = rows-k; // rows of the block
Index tcols = cols - k - bs; // trailing columns
Index brows = rows-k; // rows of the block
// partition the matrix:
// A00 | A01 | A02
@ -318,7 +318,7 @@ struct householder_qr_inplace_blocked
if(tcols)
{
BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint());
apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment, false); // false == backward
}
}
}

View File

@ -215,10 +215,10 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
if(k) u_k -= U_k1.adjoint() * X.row(k).head(k).adjoint();
}
// 5 - construct right Householder transform in-placecols
// 5 - construct right Householder transform in-place
u_k.makeHouseholderInPlace(tau_u, upper_diagonal[k]);
// this eases the application of Householder transforAions
// this eases the application of Householder transformations
// A(k,k+1) will store tau_u later
A(k,k+1) = Scalar(1);