mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
Optimization: enable cache-efficient application of HouseholderSequence.
This commit is contained in:
parent
80993b95d3
commit
8846aa6d1b
@ -16,48 +16,85 @@
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal */
|
||||
// template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
|
||||
// void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
|
||||
// {
|
||||
// typedef typename TriangularFactorType::Index Index;
|
||||
// typedef typename VectorsType::Scalar Scalar;
|
||||
// const Index nbVecs = vectors.cols();
|
||||
// eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
|
||||
//
|
||||
// for(Index i = 0; i < nbVecs; i++)
|
||||
// {
|
||||
// Index rs = vectors.rows() - i;
|
||||
// // Warning, note that hCoeffs may alias with vectors.
|
||||
// // It is then necessary to copy it before modifying vectors(i,i).
|
||||
// typename CoeffsType::Scalar h = hCoeffs(i);
|
||||
// // This hack permits to pass trough nested Block<> and Transpose<> expressions.
|
||||
// Scalar *Vii_ptr = const_cast<Scalar*>(vectors.data() + vectors.outerStride()*i + vectors.innerStride()*i);
|
||||
// Scalar Vii = *Vii_ptr;
|
||||
// *Vii_ptr = Scalar(1);
|
||||
// triFactor.col(i).head(i).noalias() = -h * vectors.block(i, 0, rs, i).adjoint()
|
||||
// * vectors.col(i).tail(rs);
|
||||
// *Vii_ptr = Vii;
|
||||
// // FIXME add .noalias() once the triangular product can work inplace
|
||||
// triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()
|
||||
// * triFactor.col(i).head(i);
|
||||
// triFactor(i,i) = hCoeffs(i);
|
||||
// }
|
||||
// }
|
||||
|
||||
/** \internal */
|
||||
// This variant avoid modifications in vectors
|
||||
template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
|
||||
void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
|
||||
{
|
||||
typedef typename TriangularFactorType::Index Index;
|
||||
typedef typename VectorsType::Scalar Scalar;
|
||||
const Index nbVecs = vectors.cols();
|
||||
eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
|
||||
|
||||
for(Index i = 0; i < nbVecs; i++)
|
||||
for(Index i = nbVecs-1; i >=0 ; --i)
|
||||
{
|
||||
Index rs = vectors.rows() - i;
|
||||
Scalar Vii = vectors(i,i);
|
||||
vectors.const_cast_derived().coeffRef(i,i) = Scalar(1);
|
||||
triFactor.col(i).head(i).noalias() = -hCoeffs(i) * vectors.block(i, 0, rs, i).adjoint()
|
||||
* vectors.col(i).tail(rs);
|
||||
vectors.const_cast_derived().coeffRef(i, i) = Vii;
|
||||
// FIXME add .noalias() once the triangular product can work inplace
|
||||
triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()
|
||||
* triFactor.col(i).head(i);
|
||||
Index rs = vectors.rows() - i - 1;
|
||||
Index rt = nbVecs-i-1;
|
||||
|
||||
if(rt>0)
|
||||
{
|
||||
triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint()
|
||||
* vectors.bottomRightCorner(rs, rt).template triangularView<UnitLower>();
|
||||
|
||||
// FIXME add .noalias() once the triangular product can work inplace
|
||||
triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView<Upper>();
|
||||
|
||||
}
|
||||
triFactor(i,i) = hCoeffs(i);
|
||||
}
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
/** \internal
|
||||
* if forward then perform mat = H0 * H1 * H2 * mat
|
||||
* otherwise perform mat = H2 * H1 * H0 * mat
|
||||
*/
|
||||
template<typename MatrixType,typename VectorsType,typename CoeffsType>
|
||||
void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs)
|
||||
void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs, bool forward)
|
||||
{
|
||||
typedef typename MatrixType::Index Index;
|
||||
enum { TFactorSize = MatrixType::ColsAtCompileTime };
|
||||
Index nbVecs = vectors.cols();
|
||||
Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, ColMajor> T(nbVecs,nbVecs);
|
||||
make_block_householder_triangular_factor(T, vectors, hCoeffs);
|
||||
|
||||
const TriangularView<const VectorsType, UnitLower>& V(vectors);
|
||||
Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, RowMajor> T(nbVecs,nbVecs);
|
||||
|
||||
if(forward) make_block_householder_triangular_factor(T, vectors, hCoeffs);
|
||||
else make_block_householder_triangular_factor(T, vectors, hCoeffs.conjugate());
|
||||
const TriangularView<const VectorsType, UnitLower> V(vectors);
|
||||
|
||||
// A -= V T V^* A
|
||||
Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,0,
|
||||
VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat;
|
||||
// FIXME add .noalias() once the triangular product can work inplace
|
||||
tmp = T.template triangularView<Upper>().adjoint() * tmp;
|
||||
if(forward) tmp = T.template triangularView<Upper>() * tmp;
|
||||
else tmp = T.template triangularView<Upper>().adjoint() * tmp;
|
||||
mat.noalias() -= V * tmp;
|
||||
}
|
||||
|
||||
|
@ -319,12 +319,36 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
|
||||
template<typename Dest, typename Workspace>
|
||||
inline void applyThisOnTheLeft(Dest& dst, Workspace& workspace) const
|
||||
{
|
||||
workspace.resize(dst.cols());
|
||||
for(Index k = 0; k < m_length; ++k)
|
||||
const Index BlockSize = 1;
|
||||
// if the entries are large enough, then apply the reflectors by block
|
||||
if(m_length>BlockSize && dst.cols()>1)
|
||||
{
|
||||
Index actual_k = m_trans ? k : m_length-k-1;
|
||||
dst.bottomRows(rows()-m_shift-actual_k)
|
||||
.applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
|
||||
for(Index i = 0; i < m_length; i+=BlockSize)
|
||||
{
|
||||
Index end = m_trans ? (std::min)(m_length,i+BlockSize) : m_length-i;
|
||||
Index k = m_trans ? i : (std::max)(Index(0),end-BlockSize);
|
||||
Index bs = end-k;
|
||||
Index start = k + m_shift;
|
||||
|
||||
typedef Block<typename internal::remove_all<VectorsType>::type,Dynamic,Dynamic> SubVectorsType;
|
||||
SubVectorsType sub_vecs1(m_vectors.const_cast_derived(), Side==OnTheRight ? k : start,
|
||||
Side==OnTheRight ? start : k,
|
||||
Side==OnTheRight ? bs : m_vectors.rows()-start,
|
||||
Side==OnTheRight ? m_vectors.cols()-start : bs);
|
||||
typename internal::conditional<Side==OnTheRight, Transpose<SubVectorsType>, SubVectorsType&>::type sub_vecs(sub_vecs1);
|
||||
Block<Dest,Dynamic,Dynamic> sub_dst(dst,dst.rows()-rows()+m_shift+k,0, rows()-m_shift-k,dst.cols());
|
||||
apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_trans);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
workspace.resize(dst.cols());
|
||||
for(Index k = 0; k < m_length; ++k)
|
||||
{
|
||||
Index actual_k = m_trans ? k : m_length-k-1;
|
||||
dst.bottomRows(rows()-m_shift-actual_k)
|
||||
.applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -299,8 +299,8 @@ struct householder_qr_inplace_blocked
|
||||
for (k = 0; k < size; k += blockSize)
|
||||
{
|
||||
Index bs = (std::min)(size-k,blockSize); // actual size of the block
|
||||
Index tcols = cols - k - bs; // trailing columns
|
||||
Index brows = rows-k; // rows of the block
|
||||
Index tcols = cols - k - bs; // trailing columns
|
||||
Index brows = rows-k; // rows of the block
|
||||
|
||||
// partition the matrix:
|
||||
// A00 | A01 | A02
|
||||
@ -318,7 +318,7 @@ struct householder_qr_inplace_blocked
|
||||
if(tcols)
|
||||
{
|
||||
BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
|
||||
apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint());
|
||||
apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment, false); // false == backward
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -215,10 +215,10 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
|
||||
if(k) u_k -= U_k1.adjoint() * X.row(k).head(k).adjoint();
|
||||
}
|
||||
|
||||
// 5 - construct right Householder transform in-placecols
|
||||
// 5 - construct right Householder transform in-place
|
||||
u_k.makeHouseholderInPlace(tau_u, upper_diagonal[k]);
|
||||
|
||||
// this eases the application of Householder transforAions
|
||||
// this eases the application of Householder transformations
|
||||
// A(k,k+1) will store tau_u later
|
||||
A(k,k+1) = Scalar(1);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user