Improved lapacke binding code for HouseholderQR and PartialPivLU

This commit is contained in:
Erik Schultheis 2021-12-02 00:10:58 +00:00 committed by Rasmus Munk Larsen
parent 7ef5f0641f
commit d60f7fa518
7 changed files with 246 additions and 116 deletions

View File

@ -32,11 +32,7 @@
#include "src/Cholesky/LLT.h" #include "src/Cholesky/LLT.h"
#include "src/Cholesky/LDLT.h" #include "src/Cholesky/LDLT.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL #include "src/misc/lapacke_helpers.h"
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h"
#endif
#include "src/Cholesky/LLT_LAPACKE.h" #include "src/Cholesky/LLT_LAPACKE.h"
#endif #endif

View File

@ -28,11 +28,7 @@
#include "src/LU/FullPivLU.h" #include "src/LU/FullPivLU.h"
#include "src/LU/PartialPivLU.h" #include "src/LU/PartialPivLU.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL #include "src/misc/lapacke_helpers.h"
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h"
#endif
#include "src/LU/PartialPivLU_LAPACKE.h" #include "src/LU/PartialPivLU_LAPACKE.h"
#endif #endif
#include "src/LU/Determinant.h" #include "src/LU/Determinant.h"

View File

@ -36,11 +36,7 @@
#include "src/QR/ColPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h"
#include "src/QR/CompleteOrthogonalDecomposition.h" #include "src/QR/CompleteOrthogonalDecomposition.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL #include "src/misc/lapacke_helpers.h"
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h"
#endif
#include "src/QR/HouseholderQR_LAPACKE.h" #include "src/QR/HouseholderQR_LAPACKE.h"
#include "src/QR/ColPivHouseholderQR_LAPACKE.h" #include "src/QR/ColPivHouseholderQR_LAPACKE.h"
#endif #endif

View File

@ -39,44 +39,12 @@ namespace Eigen {
namespace internal { namespace internal {
namespace lapacke_llt_helpers { namespace lapacke_helpers {
// -------------------------------------------------------------------------------------------------------------------
// Translation from Eigen to Lapacke types
// -------------------------------------------------------------------------------------------------------------------
// For complex numbers, the types in Eigen and Lapacke are different, but layout compatible.
template<typename Scalar> struct translate_type;
template<> struct translate_type<float> { using type = float; };
template<> struct translate_type<double> { using type = double; };
template<> struct translate_type<dcomplex> { using type = lapack_complex_double; };
template<> struct translate_type<scomplex> { using type = lapack_complex_float; };
// -------------------------------------------------------------------------------------------------------------------
// Dispatch for potrf handling double, float, complex double, complex float types
// -------------------------------------------------------------------------------------------------------------------
inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, double* a, lapack_int lda) {
return LAPACKE_dpotrf( matrix_order, uplo, size, a, lda );
}
inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, float* a, lapack_int lda) {
return LAPACKE_spotrf( matrix_order, uplo, size, a, lda );
}
inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, lapack_complex_double* a, lapack_int lda) {
return LAPACKE_zpotrf( matrix_order, uplo, size, a, lda );
}
inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, lapack_complex_float* a, lapack_int lda) {
return LAPACKE_cpotrf( matrix_order, uplo, size, a, lda );
}
// ------------------------------------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------------------------------------
// Dispatch for rank update handling upper and lower parts // Dispatch for rank update handling upper and lower parts
// ------------------------------------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------------------------------------
template<unsigned Mode> template<UpLoType Mode>
struct rank_update {}; struct rank_update {};
template<> template<>
@ -100,9 +68,8 @@ namespace lapacke_llt_helpers {
// Generic lapacke llt implementation that hands of to the dispatches // Generic lapacke llt implementation that hands of to the dispatches
// ------------------------------------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------------------------------------
template<typename Scalar, unsigned Mode> template<typename Scalar, UpLoType Mode>
struct lapacke_llt { struct lapacke_llt {
using BlasType = typename translate_type<Scalar>::type;
template<typename MatrixType> template<typename MatrixType>
static Index blocked(MatrixType& m) static Index blocked(MatrixType& m)
{ {
@ -110,15 +77,13 @@ namespace lapacke_llt_helpers {
if(m.rows() == 0) { if(m.rows() == 0) {
return -1; return -1;
} }
/* Set up parameters for ?potrf */ /* Set up parameters for ?potrf */
lapack_int size = convert_index<lapack_int>(m.rows()); lapack_int size = to_lapack(m.rows());
lapack_int StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; lapack_int matrix_order = lapack_storage_of(m);
lapack_int matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR;
Scalar* a = &(m.coeffRef(0,0)); Scalar* a = &(m.coeffRef(0,0));
lapack_int lda = convert_index<lapack_int>(m.outerStride()); lapack_int lda = to_lapack(m.outerStride());
lapack_int info = potrf( matrix_order, Mode == Lower ? 'L' : 'U', size, (BlasType*)a, lda ); lapack_int info = potrf(matrix_order, translate_mode<Mode>, size, to_lapack(a), lda );
info = (info==0) ? -1 : info>0 ? info-1 : size; info = (info==0) ? -1 : info>0 ? info-1 : size;
return info; return info;
} }
@ -130,7 +95,7 @@ namespace lapacke_llt_helpers {
} }
}; };
} }
// end namespace lapacke_llt_helpers // end namespace lapacke_helpers
/* /*
* Here, we just put the generic implementation from lapacke_llt into a full specialization of the llt_inplace * Here, we just put the generic implementation from lapacke_llt into a full specialization of the llt_inplace
@ -139,13 +104,13 @@ namespace lapacke_llt_helpers {
*/ */
#define EIGEN_LAPACKE_LLT(EIGTYPE) \ #define EIGEN_LAPACKE_LLT(EIGTYPE) \
template<> struct llt_inplace<EIGTYPE, Lower> : public lapacke_llt_helpers::lapacke_llt<EIGTYPE, Lower> {}; \ template<> struct llt_inplace<EIGTYPE, Lower> : public lapacke_helpers::lapacke_llt<EIGTYPE, Lower> {}; \
template<> struct llt_inplace<EIGTYPE, Upper> : public lapacke_llt_helpers::lapacke_llt<EIGTYPE, Upper> {}; template<> struct llt_inplace<EIGTYPE, Upper> : public lapacke_helpers::lapacke_llt<EIGTYPE, Upper> {};
EIGEN_LAPACKE_LLT(double) EIGEN_LAPACKE_LLT(double)
EIGEN_LAPACKE_LLT(float) EIGEN_LAPACKE_LLT(float)
EIGEN_LAPACKE_LLT(dcomplex) EIGEN_LAPACKE_LLT(std::complex<double>)
EIGEN_LAPACKE_LLT(scomplex) EIGEN_LAPACKE_LLT(std::complex<float>)
#undef EIGEN_LAPACKE_LLT #undef EIGEN_LAPACKE_LLT

View File

@ -39,44 +39,55 @@ namespace Eigen {
namespace internal { namespace internal {
/** \internal Specialization for the data types supported by LAPACKe */ namespace lapacke_helpers {
// -------------------------------------------------------------------------------------------------------------------
// Generic lapacke partial lu implementation that converts arguments and dispatches to the function above
// -------------------------------------------------------------------------------------------------------------------
#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \ template<typename Scalar, int StorageOrder>
template<int StorageOrder> \ struct lapacke_partial_lu {
struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \ /** \internal performs the LU decomposition in-place of the matrix represented */
{ \ static lapack_int blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, lapack_int* row_transpositions,
/* \internal performs the LU decomposition in-place of the matrix represented */ \ lapack_int& nb_transpositions, lapack_int maxBlockSize=256)
static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ {
{ \ EIGEN_UNUSED_VARIABLE(maxBlockSize);
EIGEN_UNUSED_VARIABLE(maxBlockSize);\ // Set up parameters for getrf
lapack_int matrix_order, first_zero_pivot; \ lapack_int matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR;
lapack_int m, n, lda, *ipiv, info; \ lapack_int lda = to_lapack(luStride);
EIGTYPE* a; \ Scalar* a = lu_data;
/* Set up parameters for ?getrf */ \ lapack_int* ipiv = row_transpositions;
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ lapack_int m = to_lapack(rows);
lda = convert_index<lapack_int>(luStride); \ lapack_int n = to_lapack(cols);
a = lu_data; \ nb_transpositions = 0;
ipiv = row_transpositions; \
m = convert_index<lapack_int>(rows); \ lapack_int info = getrf(matrix_order, m, n, to_lapack(a), lda, ipiv );
n = convert_index<lapack_int>(cols); \ eigen_assert(info >= 0);
nb_transpositions = 0; \
\ for(int i=0; i<m; i++) {
info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \ ipiv[i]--;
\ if (ipiv[i] != i) nb_transpositions++;
for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \ }
\ lapack_int first_zero_pivot = info;
eigen_assert(info >= 0); \ return first_zero_pivot;
/* something should be done with nb_transpositions */ \ }
\
first_zero_pivot = info; \
return first_zero_pivot; \
} \
}; };
} // end namespace lapacke_helpers
EIGEN_LAPACKE_LU_PARTPIV(double, double, d) /*
EIGEN_LAPACKE_LU_PARTPIV(float, float, s) * Here, we just put the generic implementation from lapacke_partial_lu into a partial specialization of the partial_lu_impl
EIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z) * type. This specialization is more specialized than the generic implementations that Eigen implements, so if the
EIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float, c) * Scalar type matches they will be chosen.
*/
#define EIGEN_LAPACKE_PARTIAL_LU(EIGTYPE) \
template<int StorageOrder> \
struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int, Dynamic> : public lapacke_helpers::lapacke_partial_lu<EIGTYPE, StorageOrder> {};
EIGEN_LAPACKE_PARTIAL_LU(double)
EIGEN_LAPACKE_PARTIAL_LU(float)
EIGEN_LAPACKE_PARTIAL_LU(std::complex<double>)
EIGEN_LAPACKE_PARTIAL_LU(std::complex<float>)
#undef EIGEN_LAPACKE_PARTIAL_LU
} // end namespace internal } // end namespace internal

View File

@ -40,28 +40,35 @@ namespace Eigen {
namespace internal { namespace internal {
/** \internal Specialization for the data types supported by LAPACKe */ namespace lapacke_helpers {
#define EIGEN_LAPACKE_QR_NOPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \ template<typename MatrixQR, typename HCoeffs>
template<typename MatrixQR, typename HCoeffs> \ struct lapacke_hqr
struct householder_qr_inplace_blocked<MatrixQR, HCoeffs, EIGTYPE, true> \ {
{ \ static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index = 32, typename MatrixQR::Scalar* = 0)
static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index = 32, \ {
typename MatrixQR::Scalar* = 0) \ lapack_int m = to_lapack(mat.rows());
{ \ lapack_int n = to_lapack(mat.cols());
lapack_int m = (lapack_int) mat.rows(); \ lapack_int lda = to_lapack(mat.outerStride());
lapack_int n = (lapack_int) mat.cols(); \ lapack_int matrix_order = lapack_storage_of(mat);
lapack_int lda = (lapack_int) mat.outerStride(); \ geqrf(matrix_order, m, n, to_lapack(mat.data()), lda, to_lapack(hCoeffs.data()));
lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ hCoeffs.adjointInPlace();
LAPACKE_##LAPACKE_PREFIX##geqrf( matrix_order, m, n, (LAPACKE_TYPE*)mat.data(), lda, (LAPACKE_TYPE*)hCoeffs.data()); \ }
hCoeffs.adjointInPlace(); \
} \
}; };
EIGEN_LAPACKE_QR_NOPIV(double, double, d) }
EIGEN_LAPACKE_QR_NOPIV(float, float, s)
EIGEN_LAPACKE_QR_NOPIV(dcomplex, lapack_complex_double, z) /** \internal Specialization for the data types supported by LAPACKe */
EIGEN_LAPACKE_QR_NOPIV(scomplex, lapack_complex_float, c) #define EIGEN_LAPACKE_HH_QR(EIGTYPE) \
template<typename MatrixQR, typename HCoeffs> \
struct householder_qr_inplace_blocked<MatrixQR, HCoeffs, EIGTYPE, true> : public lapacke_helpers::lapacke_hqr<MatrixQR, HCoeffs> {};
EIGEN_LAPACKE_HH_QR(double)
EIGEN_LAPACKE_HH_QR(float)
EIGEN_LAPACKE_HH_QR(std::complex<double>)
EIGEN_LAPACKE_HH_QR(std::complex<float>)
#undef EIGEN_LAPACKE_HH_QR
} // end namespace internal } // end namespace internal

View File

@ -0,0 +1,159 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2021 Erik Schultheis <erik.schultheis@aalto.fi>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_LAPACKE_HELPERS_H
#define EIGEN_LAPACKE_HELPERS_H
#include "./InternalHeaderCheck.h"
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "lapacke.h"
#endif
namespace Eigen {
namespace internal {
/**
* \internal
* \brief Implementation details and helper functions for the lapacke glue code.
*/
namespace lapacke_helpers {
// ---------------------------------------------------------------------------------------------------------------------
// Translation from Eigen to Lapacke for types and constants
// ---------------------------------------------------------------------------------------------------------------------
// For complex numbers, the types in Eigen and Lapacke are different, but layout compatible.
template<typename Scalar>
struct translate_type_imp;
template<>
struct translate_type_imp<float> {
using type = float;
};
template<>
struct translate_type_imp<double> {
using type = double;
};
template<>
struct translate_type_imp<std::complex<double>> {
using type = lapack_complex_double;
};
template<>
struct translate_type_imp<std::complex<float>> {
using type = lapack_complex_float;
};
/// Given an Eigen types, this is defined to be the corresponding, layout-compatible lapack type
template<typename Scalar>
using translated_type = typename translate_type_imp<Scalar>::type;
/// These functions convert their arguments from Eigen to Lapack types
/// This function performs conversion for any of the translations defined above.
template<typename Source, typename Target=translated_type<Source>>
auto to_lapack(Source value) { return static_cast<Target>(value); }
/// This function performs conversions for pointer types corresponding to the translations abovce.
/// This is valid because the translations are between layout-compatible types.
template<typename Source, typename Target=translated_type<Source>>
auto to_lapack(Source *value) { return reinterpret_cast<Target*>(value); }
/// This function converts the Eigen Index to a lapack index, with possible range checks
/// \sa internal::convert_index
lapack_int to_lapack(Index index) {
return convert_index<lapack_int>(index);
}
/// translates storage order of the given Eigen object to the corresponding lapack constant
template<typename Derived>
EIGEN_CONSTEXPR lapack_int lapack_storage_of(const EigenBase<Derived> &) {
return Derived::IsRowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR;
}
/// translate UpLo type to the corresponding letter code
template<UpLoType mode> char translate_mode;
template<> EIGEN_CONSTEXPR const char translate_mode<Lower> = 'L';
template<> EIGEN_CONSTEXPR const char translate_mode<Upper> = 'U';
// ---------------------------------------------------------------------------------------------------------------------
// Automatic generation of low-level wrappers
// ---------------------------------------------------------------------------------------------------------------------
/*!
* \internal
* \brief Helper type to facilitate the wrapping of raw LAPACKE functions for different types into a single, overloaded C++ function.
* This is achieved in combination with \r EIGEN_MAKE_LAPACKE_WRAPPER
* \details This implementation works by providing an overloaded call function that just forwards its arguments to the
* underlying lapack function. Each of these overloads is enabled only if the call is actually well formed.
* Because these lapack functions take pointers to the underlying scalar type as arguments, even though the actual Scalars
* would be implicitly convertible, the pointers are not and therefore only a single overload can be valid at the same time.
* Thus, despite all functions taking fully generic `Args&&... args` as arguments, there is never any ambiguity.
*/
template<typename DoubleFn, typename SingleFn, typename DoubleCpxFn, typename SingleCpxFn>
struct WrappingHelper {
// The naming of double, single, double complex and single complex is purely for readability
// and doesn't actually affect the workings of this class. In principle, the arguments can
// be supplied in any permuted order.
DoubleFn double_; SingleFn single_; DoubleCpxFn double_cpx_; SingleCpxFn single_cpx_;
template<typename... Args>
auto call(Args&&... args) -> decltype(double_(std::forward<Args>(args)...)) {
return double_(std::forward<Args>(args)...);
}
template<typename... Args>
auto call(Args&&... args) -> decltype(single_(std::forward<Args>(args)...)){
return single_(std::forward<Args>(args)...);
}
template<typename... Args>
auto call(Args&&... args) -> decltype(double_cpx_(std::forward<Args>(args)...)){
return double_cpx_(std::forward<Args>(args)...);
}
template<typename... Args>
auto call(Args&&... args) -> decltype(single_cpx_(std::forward<Args>(args)...)){
return single_cpx_(std::forward<Args>(args)...);
}
};
/** \internal Helper function that generates a `WrappingHelper` object with the given function pointers and
* invokes its `call` method, thus selecting one of the overloads.
* \sa EIGEN_MAKE_LAPACKE_WRAPPER
*/
template<typename DoubleFn, typename SingleFn, typename DoubleCpxFn, typename SingleCpxFn, typename... Args>
auto call_wrapper(DoubleFn df, SingleFn sf, DoubleCpxFn dcf, SingleCpxFn scf, Args&&... args) {
WrappingHelper<DoubleFn, SingleFn, DoubleCpxFn, SingleCpxFn> helper{df, sf, dcf, scf};
return helper.call(std::forward<Args>(args)...);
}
/**
* \internal
* Generates a new function `Function` that dispatches to the corresponding LAPACKE_? prefixed functions.
* \sa WrappingHelper
*/
#define EIGEN_MAKE_LAPACKE_WRAPPER(FUNCTION) \
template<typename... Args> \
auto FUNCTION(Args&&... args) { return call_wrapper(LAPACKE_d##FUNCTION, LAPACKE_s##FUNCTION, LAPACKE_z##FUNCTION, LAPACKE_c##FUNCTION, std::forward<Args>(args)...); }
// Now with this macro and the helper wrappers, we can generate the dispatch for all the lapacke functions that are
// used in Eigen.
// We define these here instead of in the files where they are used because this allows us to #undef the macro again
// right here
EIGEN_MAKE_LAPACKE_WRAPPER(potrf)
EIGEN_MAKE_LAPACKE_WRAPPER(getrf)
EIGEN_MAKE_LAPACKE_WRAPPER(geqrf)
#undef EIGEN_MAKE_LAPACKE_WRAPPER
}
}
}
#endif // EIGEN_LAPACKE_HELPERS_H