From 669db3d7768b3b94d31d6552a1012ee29f54b8d8 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sun, 9 Sep 2012 02:55:10 +0800 Subject: [PATCH] Extend rank-1 updates for different storage orders. --- blas/GeneralRank1Update.h | 21 +++++++++++++++---- ...edProduct.h => PackedSelfadjointProduct.h} | 20 ++++++++++++++---- blas/common.h | 2 +- blas/level2_cplx_impl.h | 4 ++-- blas/level2_real_impl.h | 6 +++--- 5 files changed, 39 insertions(+), 14 deletions(-) rename blas/{SelfadjointPackedProduct.h => PackedSelfadjointProduct.h} (57%) diff --git a/blas/GeneralRank1Update.h b/blas/GeneralRank1Update.h index a3301ed92..6d33fbcc1 100644 --- a/blas/GeneralRank1Update.h +++ b/blas/GeneralRank1Update.h @@ -13,15 +13,28 @@ namespace internal { /* Optimized matrix += alpha * uv' */ -template -struct general_rank1_update +template +struct general_rank1_update; + +template +struct general_rank1_update { static void run(Index rows, Index cols, Scalar* mat, Index stride, const Scalar* u, const Scalar* v, Scalar alpha) { - typedef Matrix PlainVector; internal::conj_if cj; + typedef Map > OtherMap; + typedef typename internal::conditional::type ConjRhsType; for (Index i=0; i(mat+stride*i,rows) += alpha * cj(v[i]) * Map(u,rows); + Map >(mat+stride*i,rows) += alpha * cj(v[i]) * ConjRhsType(OtherMap(u,rows)); + } +}; + +template +struct general_rank1_update +{ + static void run(Index rows, Index cols, Scalar* mat, Index stride, const Scalar* u, const Scalar* v, Scalar alpha) + { + general_rank1_update::run(rows,cols,mat,stride,u,v,alpha); } }; diff --git a/blas/SelfadjointPackedProduct.h b/blas/PackedSelfadjointProduct.h similarity index 57% rename from blas/SelfadjointPackedProduct.h rename to blas/PackedSelfadjointProduct.h index 4ea36b569..adc86ece1 100644 --- a/blas/SelfadjointPackedProduct.h +++ b/blas/PackedSelfadjointProduct.h @@ -21,18 +21,23 @@ namespace internal { ******* xHPR FAILED ON CALL NUMBER: 2: xHPR ('U', 1, 0.0, X, 1, AP) */ -template -struct selfadjoint_packed_rank1_update +template +struct selfadjoint_packed_rank1_update; + +template +struct selfadjoint_packed_rank1_update { static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) { + internal::conj_if cj; typedef Map > OtherMap; + typedef typename internal::conditional::type ConjRhsType; Index offset = 0; for (Index i=0; i >(mat+offset, UpLo==Lower ? size-i : (i+1)) - += alpha * conj(vec[i]) * OtherMap(vec+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)); + += alpha * cj(vec[i]) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1))); //FIXME This should be handled outside. mat[offset+(UpLo==Lower ? 0 : i)] = real(mat[offset+(UpLo==Lower ? 0 : i)]); offset += UpLo==Lower ? size-i : (i+1); @@ -40,7 +45,14 @@ struct selfadjoint_packed_rank1_update } }; -//TODO struct selfadjoint_packed_product_selector +template +struct selfadjoint_packed_rank1_update +{ + static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) + { + selfadjoint_packed_rank1_update::run(size,mat,vec,alpha); + } +}; } // end namespace internal diff --git a/blas/common.h b/blas/common.h index a14d32289..3160d3b41 100644 --- a/blas/common.h +++ b/blas/common.h @@ -75,8 +75,8 @@ inline bool check_uplo(const char* uplo) namespace Eigen { #include "BandTriangularSolver.h" #include "GeneralRank1Update.h" +#include "PackedSelfadjointProduct.h" #include "Rank2Update.h" -#include "SelfadjointPackedProduct.h" } using namespace Eigen; diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 46bddc134..11ee13b4c 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -309,7 +309,7 @@ int EIGEN_BLAS_FUNC(geru)(int *m, int *n, RealScalar *palpha, RealScalar *px, in Scalar* x_cpy = get_compact_vector(x,*m,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); - internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); + internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; @@ -346,7 +346,7 @@ int EIGEN_BLAS_FUNC(gerc)(int *m, int *n, RealScalar *palpha, RealScalar *px, in Scalar* x_cpy = get_compact_vector(x,*m,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); - internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); + internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index 735545e2b..38b0dadb6 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -242,8 +242,8 @@ int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *in for(int k=0; k<2; ++k) func[k] = 0; - func[UP] = (internal::selfadjoint_packed_rank1_update::run); - func[LO] = (internal::selfadjoint_packed_rank1_update::run); + func[UP] = (internal::selfadjoint_packed_rank1_update::run); + func[LO] = (internal::selfadjoint_packed_rank1_update::run); init = true; } @@ -359,7 +359,7 @@ int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar* x_cpy = get_compact_vector(x,*m,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); - internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); + internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy;