Improved lapacke binding code for HouseholderQR and PartialPivLU

2025-02-23 18:20:47 +08:00 · 2021-12-02 00:10:58 +00:00 · 2021-12-02 00:10:58 +00:00 · d60f7fa518
commit d60f7fa518
parent 7ef5f0641f
7 changed files with 246 additions and 116 deletions
--- a/Eigen/Cholesky
+++ b/Eigen/Cholesky
@ -32,11 +32,7 @@
 #include "src/Cholesky/LLT.h"
 #include "src/Cholesky/LDLT.h"
 #ifdef EIGEN_USE_LAPACKE
-#ifdef EIGEN_USE_MKL
+#include "src/misc/lapacke_helpers.h"
 #include "mkl_lapacke.h"
 #else
 #include "src/misc/lapacke.h"
 #endif
 #include "src/Cholesky/LLT_LAPACKE.h"
 #endif
--- a/Eigen/LU
+++ b/Eigen/LU
@ -28,11 +28,7 @@
 #include "src/LU/FullPivLU.h"
 #include "src/LU/PartialPivLU.h"
 #ifdef EIGEN_USE_LAPACKE
-#ifdef EIGEN_USE_MKL
+#include "src/misc/lapacke_helpers.h"
 #include "mkl_lapacke.h"
 #else
 #include "src/misc/lapacke.h"
 #endif
 #include "src/LU/PartialPivLU_LAPACKE.h"
 #endif
 #include "src/LU/Determinant.h"
--- a/Eigen/QR
+++ b/Eigen/QR
@ -36,11 +36,7 @@
 #include "src/QR/ColPivHouseholderQR.h"
 #include "src/QR/CompleteOrthogonalDecomposition.h"
 #ifdef EIGEN_USE_LAPACKE
-#ifdef EIGEN_USE_MKL
+#include "src/misc/lapacke_helpers.h"
 #include "mkl_lapacke.h"
 #else
 #include "src/misc/lapacke.h"
 #endif
 #include "src/QR/HouseholderQR_LAPACKE.h"
 #include "src/QR/ColPivHouseholderQR_LAPACKE.h"
 #endif
--- a/Eigen/src/Cholesky/LLT_LAPACKE.h
+++ b/Eigen/src/Cholesky/LLT_LAPACKE.h
@ -39,44 +39,12 @@ namespace Eigen {
 namespace internal {
-namespace lapacke_llt_helpers {
+namespace lapacke_helpers {
  // -------------------------------------------------------------------------------------------------------------------
  //        Translation from Eigen to Lapacke types
  // -------------------------------------------------------------------------------------------------------------------
  // For complex numbers, the types in Eigen and Lapacke are different, but layout compatible.
  template<typename Scalar> struct translate_type;
  template<> struct translate_type<float> { using type = float; };
  template<> struct translate_type<double> { using type = double; };
  template<> struct translate_type<dcomplex> { using type = lapack_complex_double; };
  template<> struct translate_type<scomplex> { using type = lapack_complex_float; };
  // -------------------------------------------------------------------------------------------------------------------
  //        Dispatch for potrf handling double, float, complex double, complex float types
  // -------------------------------------------------------------------------------------------------------------------
  inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, double* a, lapack_int lda) {
    return LAPACKE_dpotrf( matrix_order, uplo, size, a, lda );
  }
  inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, float* a, lapack_int lda) {
    return LAPACKE_spotrf( matrix_order, uplo, size, a, lda );
  }
  inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, lapack_complex_double* a, lapack_int lda) {
    return LAPACKE_zpotrf( matrix_order, uplo, size, a, lda );
  }
  inline lapack_int potrf(lapack_int matrix_order, char uplo, lapack_int size, lapack_complex_float* a, lapack_int lda) {
    return LAPACKE_cpotrf( matrix_order, uplo, size, a, lda );
  }
  // -------------------------------------------------------------------------------------------------------------------
  //        Dispatch for rank update handling upper and lower parts
  // -------------------------------------------------------------------------------------------------------------------
-  template<unsigned Mode>
+  template<UpLoType Mode>
  struct rank_update {};
  template<>
@ -100,9 +68,8 @@ namespace lapacke_llt_helpers {
  //        Generic lapacke llt implementation that hands of to the dispatches
  // -------------------------------------------------------------------------------------------------------------------
-  template<typename Scalar, unsigned Mode>
+  template<typename Scalar, UpLoType Mode>
  struct lapacke_llt {
    using BlasType = typename translate_type<Scalar>::type;
    template<typename MatrixType>
    static Index blocked(MatrixType& m)
    {
@ -110,15 +77,13 @@ namespace lapacke_llt_helpers {
      if(m.rows() == 0) {
        return -1;
      }
      /* Set up parameters for ?potrf */
-      lapack_int size = convert_index<lapack_int>(m.rows());
+      lapack_int size = to_lapack(m.rows());
-      lapack_int StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor;
+      lapack_int matrix_order = lapack_storage_of(m);
      lapack_int matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR;
      Scalar* a = &(m.coeffRef(0,0));
-      lapack_int lda = convert_index<lapack_int>(m.outerStride());
+      lapack_int lda = to_lapack(m.outerStride());
-      lapack_int info = potrf( matrix_order, Mode == Lower ? 'L' : 'U', size, (BlasType*)a, lda );
+      lapack_int info = potrf(matrix_order, translate_mode<Mode>, size, to_lapack(a), lda );
      info = (info==0) ? -1 : info>0 ? info-1 : size;
      return info;
    }
@ -130,7 +95,7 @@ namespace lapacke_llt_helpers {
    }
  };
 }
-// end namespace lapacke_llt_helpers
+// end namespace lapacke_helpers
 /*
 * Here, we just put the generic implementation from lapacke_llt into a full specialization of the llt_inplace
@ -139,13 +104,13 @@ namespace lapacke_llt_helpers {
 */
 #define EIGEN_LAPACKE_LLT(EIGTYPE) \
-template<> struct llt_inplace<EIGTYPE, Lower> : public lapacke_llt_helpers::lapacke_llt<EIGTYPE, Lower> {}; \
+template<> struct llt_inplace<EIGTYPE, Lower> : public lapacke_helpers::lapacke_llt<EIGTYPE, Lower> {}; \
-template<> struct llt_inplace<EIGTYPE, Upper> : public lapacke_llt_helpers::lapacke_llt<EIGTYPE, Upper> {};
+template<> struct llt_inplace<EIGTYPE, Upper> : public lapacke_helpers::lapacke_llt<EIGTYPE, Upper> {};
 EIGEN_LAPACKE_LLT(double)
 EIGEN_LAPACKE_LLT(float)
-EIGEN_LAPACKE_LLT(dcomplex)
+EIGEN_LAPACKE_LLT(std::complex<double>)
-EIGEN_LAPACKE_LLT(scomplex)
+EIGEN_LAPACKE_LLT(std::complex<float>)
 #undef EIGEN_LAPACKE_LLT
--- a/Eigen/src/LU/PartialPivLU_LAPACKE.h
+++ b/Eigen/src/LU/PartialPivLU_LAPACKE.h
@ -39,44 +39,55 @@ namespace Eigen {
 namespace internal {
-/** \internal Specialization for the data types supported by LAPACKe */
+namespace lapacke_helpers {
 // -------------------------------------------------------------------------------------------------------------------
 //        Generic lapacke partial lu implementation that converts arguments and dispatches to the function above
 // -------------------------------------------------------------------------------------------------------------------
-#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \
+template<typename Scalar, int StorageOrder>
-template<int StorageOrder> \
+struct lapacke_partial_lu {
-struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \
+  /** \internal performs the LU decomposition in-place of the matrix represented */
-{ \
+  static lapack_int blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, lapack_int* row_transpositions,
-  /* \internal performs the LU decomposition in-place of the matrix represented */ \
+  lapack_int& nb_transpositions, lapack_int maxBlockSize=256)
-  static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \
+  {
-  { \
+    EIGEN_UNUSED_VARIABLE(maxBlockSize);
-    EIGEN_UNUSED_VARIABLE(maxBlockSize);\
+    // Set up parameters for getrf
-    lapack_int matrix_order, first_zero_pivot; \
+    lapack_int matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR;
-    lapack_int m, n, lda, *ipiv, info; \
+    lapack_int lda = to_lapack(luStride);
-    EIGTYPE* a; \
+    Scalar* a = lu_data;
-/* Set up parameters for ?getrf */ \
+    lapack_int* ipiv = row_transpositions;
-    matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
+    lapack_int m = to_lapack(rows);
-    lda = convert_index<lapack_int>(luStride); \
+    lapack_int n = to_lapack(cols);
-    a = lu_data; \
+    nb_transpositions = 0;
-    ipiv = row_transpositions; \
+
-    m = convert_index<lapack_int>(rows); \
+    lapack_int info = getrf(matrix_order, m, n, to_lapack(a), lda, ipiv );
-    n = convert_index<lapack_int>(cols); \
+    eigen_assert(info >= 0);
-    nb_transpositions = 0; \
+
-\
+    for(int i=0; i<m; i++) {
-    info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \
+      ipiv[i]--;
-\
+      if (ipiv[i] != i) nb_transpositions++;
-    for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \
+    }
-\
+    lapack_int first_zero_pivot = info;
-    eigen_assert(info >= 0); \
+    return first_zero_pivot;
-/* something should be done with nb_transpositions */ \
+  }
 \
    first_zero_pivot = info; \
    return first_zero_pivot; \
  } \
 };
 } // end namespace lapacke_helpers
-EIGEN_LAPACKE_LU_PARTPIV(double, double, d)
+/*
-EIGEN_LAPACKE_LU_PARTPIV(float, float, s)
+ * Here, we just put the generic implementation from lapacke_partial_lu into a partial specialization of the partial_lu_impl
-EIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z)
+ * type. This specialization is more specialized than the generic implementations that Eigen implements, so if the
-EIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float,  c)
+ * Scalar type matches they will be chosen.
 */
 #define EIGEN_LAPACKE_PARTIAL_LU(EIGTYPE) \
 template<int StorageOrder>                \
 struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int, Dynamic> : public lapacke_helpers::lapacke_partial_lu<EIGTYPE, StorageOrder> {};
 EIGEN_LAPACKE_PARTIAL_LU(double)
 EIGEN_LAPACKE_PARTIAL_LU(float)
 EIGEN_LAPACKE_PARTIAL_LU(std::complex<double>)
 EIGEN_LAPACKE_PARTIAL_LU(std::complex<float>)
 #undef EIGEN_LAPACKE_PARTIAL_LU
 } // end namespace internal
--- a/Eigen/src/QR/HouseholderQR_LAPACKE.h
+++ b/Eigen/src/QR/HouseholderQR_LAPACKE.h
@ -40,28 +40,35 @@ namespace Eigen {
 namespace internal {
-/** \internal Specialization for the data types supported by LAPACKe */
+namespace lapacke_helpers {
-#define EIGEN_LAPACKE_QR_NOPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \
+template<typename MatrixQR, typename HCoeffs>
-template<typename MatrixQR, typename HCoeffs> \
+struct lapacke_hqr
-struct householder_qr_inplace_blocked<MatrixQR, HCoeffs, EIGTYPE, true> \
+{
-{ \
+  static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index = 32, typename MatrixQR::Scalar* = 0)
-  static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index = 32, \
+  {
-      typename MatrixQR::Scalar* = 0) \
+    lapack_int m = to_lapack(mat.rows());
-  { \
+    lapack_int n = to_lapack(mat.cols());
-    lapack_int m = (lapack_int) mat.rows(); \
+    lapack_int lda = to_lapack(mat.outerStride());
-    lapack_int n = (lapack_int) mat.cols(); \
+    lapack_int matrix_order = lapack_storage_of(mat);
-    lapack_int lda = (lapack_int) mat.outerStride(); \
+    geqrf(matrix_order, m, n, to_lapack(mat.data()), lda, to_lapack(hCoeffs.data()));
-    lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
+    hCoeffs.adjointInPlace();
-    LAPACKE_##LAPACKE_PREFIX##geqrf( matrix_order, m, n, (LAPACKE_TYPE*)mat.data(), lda, (LAPACKE_TYPE*)hCoeffs.data()); \
+  }
    hCoeffs.adjointInPlace(); \
  } \
 };
-EIGEN_LAPACKE_QR_NOPIV(double, double, d)
+}
-EIGEN_LAPACKE_QR_NOPIV(float, float, s)
+
-EIGEN_LAPACKE_QR_NOPIV(dcomplex, lapack_complex_double, z)
+/** \internal Specialization for the data types supported by LAPACKe */
-EIGEN_LAPACKE_QR_NOPIV(scomplex, lapack_complex_float, c)
+#define EIGEN_LAPACKE_HH_QR(EIGTYPE) \
 template<typename MatrixQR, typename HCoeffs> \
 struct householder_qr_inplace_blocked<MatrixQR, HCoeffs, EIGTYPE, true> : public lapacke_helpers::lapacke_hqr<MatrixQR, HCoeffs> {};
 EIGEN_LAPACKE_HH_QR(double)
 EIGEN_LAPACKE_HH_QR(float)
 EIGEN_LAPACKE_HH_QR(std::complex<double>)
 EIGEN_LAPACKE_HH_QR(std::complex<float>)
 #undef EIGEN_LAPACKE_HH_QR
 } // end namespace internal
--- a/Eigen/src/misc/lapacke_helpers.h
+++ b/Eigen/src/misc/lapacke_helpers.h
@ -0,0 +1,159 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2021 Erik Schultheis <erik.schultheis@aalto.fi>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_LAPACKE_HELPERS_H
 #define EIGEN_LAPACKE_HELPERS_H
 #include "./InternalHeaderCheck.h"
 #ifdef EIGEN_USE_MKL
 #include "mkl_lapacke.h"
 #else
 #include "lapacke.h"
 #endif
 namespace Eigen {
 namespace internal {
 /**
 * \internal
 * \brief Implementation details and helper functions for the lapacke glue code.
 */
 namespace lapacke_helpers {
 // ---------------------------------------------------------------------------------------------------------------------
 //                  Translation from Eigen to Lapacke for types and constants
 // ---------------------------------------------------------------------------------------------------------------------
 // For complex numbers, the types in Eigen and Lapacke are different, but layout compatible.
 template<typename Scalar>
 struct translate_type_imp;
 template<>
 struct translate_type_imp<float> {
    using type = float;
 };
 template<>
 struct translate_type_imp<double> {
    using type = double;
 };
 template<>
 struct translate_type_imp<std::complex<double>> {
    using type = lapack_complex_double;
 };
 template<>
 struct translate_type_imp<std::complex<float>> {
    using type = lapack_complex_float;
 };
 /// Given an Eigen types, this is defined to be the corresponding, layout-compatible lapack type
 template<typename Scalar>
 using translated_type = typename translate_type_imp<Scalar>::type;
 /// These functions convert their arguments from Eigen to Lapack types
 /// This function performs conversion for any of the translations defined above.
 template<typename Source, typename Target=translated_type<Source>>
 auto to_lapack(Source value) { return static_cast<Target>(value); }
 /// This function performs conversions for pointer types corresponding to the translations abovce.
 /// This is valid because the translations are between layout-compatible types.
 template<typename Source, typename Target=translated_type<Source>>
 auto to_lapack(Source *value) { return reinterpret_cast<Target*>(value); }
 /// This function converts the Eigen Index to a lapack index, with possible range checks
 /// \sa internal::convert_index
 lapack_int to_lapack(Index index) {
  return convert_index<lapack_int>(index);
 }
 /// translates storage order of the given Eigen object to the corresponding lapack constant
 template<typename Derived>
 EIGEN_CONSTEXPR lapack_int lapack_storage_of(const EigenBase<Derived> &) {
  return Derived::IsRowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR;
 }
 /// translate UpLo type to the corresponding letter code
 template<UpLoType mode> char translate_mode;
 template<> EIGEN_CONSTEXPR const char translate_mode<Lower> = 'L';
 template<> EIGEN_CONSTEXPR const char translate_mode<Upper> = 'U';
 // ---------------------------------------------------------------------------------------------------------------------
 //              Automatic generation of low-level wrappers
 // ---------------------------------------------------------------------------------------------------------------------
 /*!
 * \internal
 * \brief Helper type to facilitate the wrapping of raw LAPACKE functions for different types into a single, overloaded C++ function.
 * This is achieved in combination with \r EIGEN_MAKE_LAPACKE_WRAPPER
 * \details This implementation works by providing an overloaded call function that just forwards its arguments to the
 * underlying lapack function. Each of these overloads is enabled only if the call is actually well formed.
 * Because these lapack functions take pointers to the underlying scalar type as arguments, even though the actual Scalars
 * would be implicitly convertible, the pointers are not and therefore only a single overload can be valid at the same time.
 * Thus, despite all functions taking fully generic `Args&&... args` as arguments, there is never any ambiguity.
 */
 template<typename DoubleFn, typename SingleFn, typename DoubleCpxFn, typename SingleCpxFn>
 struct WrappingHelper {
  // The naming of double, single, double complex and single complex is purely for readability
  // and doesn't actually affect the workings of this class. In principle, the arguments can
  // be supplied in any permuted order.
  DoubleFn double_; SingleFn single_; DoubleCpxFn double_cpx_; SingleCpxFn single_cpx_;
  template<typename... Args>
  auto call(Args&&... args) -> decltype(double_(std::forward<Args>(args)...)) {
    return double_(std::forward<Args>(args)...);
  }
  template<typename... Args>
  auto call(Args&&... args) -> decltype(single_(std::forward<Args>(args)...)){
    return single_(std::forward<Args>(args)...);
  }
  template<typename... Args>
  auto call(Args&&... args) -> decltype(double_cpx_(std::forward<Args>(args)...)){
    return double_cpx_(std::forward<Args>(args)...);
  }
  template<typename... Args>
  auto call(Args&&... args) -> decltype(single_cpx_(std::forward<Args>(args)...)){
    return single_cpx_(std::forward<Args>(args)...);
  }
 };
 /** \internal Helper function that generates a `WrappingHelper` object with the given function pointers and
 * invokes its `call` method, thus selecting one of the overloads.
 * \sa EIGEN_MAKE_LAPACKE_WRAPPER
 */
 template<typename DoubleFn, typename SingleFn, typename DoubleCpxFn, typename SingleCpxFn, typename... Args>
 auto call_wrapper(DoubleFn df, SingleFn sf, DoubleCpxFn dcf, SingleCpxFn scf, Args&&... args) {
  WrappingHelper<DoubleFn, SingleFn, DoubleCpxFn, SingleCpxFn> helper{df, sf, dcf, scf};
  return helper.call(std::forward<Args>(args)...);
 }
 /**
 * \internal
 * Generates a new function `Function` that dispatches to the corresponding LAPACKE_? prefixed functions.
 * \sa WrappingHelper
 */
 #define EIGEN_MAKE_LAPACKE_WRAPPER(FUNCTION) \
 template<typename... Args> \
 auto FUNCTION(Args&&... args) { return call_wrapper(LAPACKE_d##FUNCTION, LAPACKE_s##FUNCTION, LAPACKE_z##FUNCTION, LAPACKE_c##FUNCTION, std::forward<Args>(args)...); }
 // Now with this macro and the helper wrappers, we can generate the dispatch for all the lapacke functions that are
 // used in Eigen.
 // We define these here instead of in the files where they are used because this allows us to #undef the macro again
 // right here
 EIGEN_MAKE_LAPACKE_WRAPPER(potrf)
 EIGEN_MAKE_LAPACKE_WRAPPER(getrf)
 EIGEN_MAKE_LAPACKE_WRAPPER(geqrf)
 #undef EIGEN_MAKE_LAPACKE_WRAPPER
 }
 }
 }
 #endif // EIGEN_LAPACKE_HELPERS_H