From f5f288b741b173a271b9c939ac5231639135dd93 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 22 Nov 2010 18:49:12 +0100 Subject: [PATCH] split level 1 and 2 implementation files into smaller ones and fix a couple of numerical and tricky issues discovered by the lapack test suite --- blas/common.h | 4 +- blas/complex_double.cpp | 2 + blas/complex_single.cpp | 2 + blas/double.cpp | 2 + blas/level1_cplx_impl.h | 141 +++++++++++ blas/level1_impl.h | 242 ++----------------- blas/level1_real_impl.h | 115 +++++++++ blas/level2_cplx_impl.h | 285 +++++++++++++++++++++++ blas/level2_impl.h | 505 +++------------------------------------- blas/level2_real_impl.h | 225 ++++++++++++++++++ blas/level3_impl.h | 64 +++-- blas/single.cpp | 2 + 12 files changed, 874 insertions(+), 715 deletions(-) create mode 100644 blas/level1_cplx_impl.h create mode 100644 blas/level1_real_impl.h create mode 100644 blas/level2_cplx_impl.h create mode 100644 blas/level2_real_impl.h diff --git a/blas/common.h b/blas/common.h index 290ae112a..d073361ed 100644 --- a/blas/common.h +++ b/blas/common.h @@ -133,7 +133,7 @@ T* get_compact_vector(T* x, int n, int incx) { if(incx==1) return x; - + T* ret = new Scalar[n]; if(incx<0) vector(ret,n) = vector(x,n,-incx).reverse(); else vector(ret,n) = vector(x,n, incx); @@ -145,7 +145,7 @@ T* copy_back(T* x_cpy, T* x, int n, int incx) { if(x_cpy==x) return 0; - + if(incx<0) vector(x,n,-incx).reverse() = vector(x_cpy,n); else vector(x,n, incx) = vector(x_cpy,n); return x_cpy; diff --git a/blas/complex_double.cpp b/blas/complex_double.cpp index bd7674cda..dad87a60d 100644 --- a/blas/complex_double.cpp +++ b/blas/complex_double.cpp @@ -29,5 +29,7 @@ #define ISCOMPLEX 1 #include "level1_impl.h" +#include "level1_cplx_impl.h" #include "level2_impl.h" +#include "level2_cplx_impl.h" #include "level3_impl.h" diff --git a/blas/complex_single.cpp b/blas/complex_single.cpp index 4cf19378f..11bbf629f 100644 --- a/blas/complex_single.cpp +++ b/blas/complex_single.cpp @@ -29,5 +29,7 @@ #define ISCOMPLEX 1 #include "level1_impl.h" +#include "level1_cplx_impl.h" #include "level2_impl.h" +#include "level2_cplx_impl.h" #include "level3_impl.h" diff --git a/blas/double.cpp b/blas/double.cpp index 10373d585..c6927ee22 100644 --- a/blas/double.cpp +++ b/blas/double.cpp @@ -28,5 +28,7 @@ #define ISCOMPLEX 0 #include "level1_impl.h" +#include "level1_real_impl.h" #include "level2_impl.h" +#include "level2_real_impl.h" #include "level3_impl.h" diff --git a/blas/level1_cplx_impl.h b/blas/level1_cplx_impl.h new file mode 100644 index 000000000..e268bb812 --- /dev/null +++ b/blas/level1_cplx_impl.h @@ -0,0 +1,141 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "common.h" + +struct scalar_norm1_op { + typedef RealScalar result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_norm1_op) + inline RealScalar operator() (const Scalar& a) const { return internal::norm1(a); } +}; +namespace Eigen { + namespace internal { + template<> struct functor_traits + { + enum { Cost = 3 * NumTraits::AddCost, PacketAccess = 0 }; + }; + } +} + +// computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum +// res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n +RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),asum_)(int *n, RealScalar *px, int *incx) +{ +// std::cerr << "__asum " << *n << " " << *incx << "\n"; + Complex* x = reinterpret_cast(px); + + if(*n<=0) return 0; + + if(*incx==1) return vector(x,*n).unaryExpr().sum(); + else return vector(x,*n,std::abs(*incx)).unaryExpr().sum(); +} + +// computes a dot product of a conjugated vector with another vector. +Scalar EIGEN_BLAS_FUNC(dotc)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +{ +// std::cerr << "_dotc " << *n << " " << *incx << " " << *incy << "\n"; + + if(*n<=0) return 0; + + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + + Scalar res; + if(*incx==1 && *incy==1) res = (vector(x,*n).dot(vector(y,*n))); + else if(*incx>0 && *incy>0) res = (vector(x,*n,*incx).dot(vector(y,*n,*incy))); + else if(*incx<0 && *incy>0) res = (vector(x,*n,-*incx).reverse().dot(vector(y,*n,*incy))); + else if(*incx>0 && *incy<0) res = (vector(x,*n,*incx).dot(vector(y,*n,-*incy).reverse())); + else if(*incx<0 && *incy<0) res = (vector(x,*n,-*incx).reverse().dot(vector(y,*n,-*incy).reverse())); + return res; +} + +// computes a vector-vector dot product without complex conjugation. +Scalar EIGEN_BLAS_FUNC(dotu)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +{ +// std::cerr << "_dotu " << *n << " " << *incx << " " << *incy << "\n"; + + if(*n<=0) return 0; + + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar res; + if(*incx==1 && *incy==1) res = (vector(x,*n).cwiseProduct(vector(y,*n))).sum(); + else if(*incx>0 && *incy>0) res = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum(); + else if(*incx<0 && *incy>0) res = (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum(); + else if(*incx>0 && *incy<0) res = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); + else if(*incx<0 && *incy<0) res = (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); + return res; +} + +RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),nrm2_)(int *n, RealScalar *px, int *incx) +{ +// std::cerr << "__nrm2 " << *n << " " << *incx << "\n"; + if(*n<=0) return 0; + + Scalar* x = reinterpret_cast(px); + + if(*incx==1) + return vector(x,*n).stableNorm(); + + return vector(x,*n,*incx).stableNorm(); +} + +int EIGEN_CAT(EIGEN_CAT(SCALAR_SUFFIX,REAL_SCALAR_SUFFIX),rot_)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps) +{ + if(*n<=0) return 0; + + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + RealScalar c = *pc; + RealScalar s = *ps; + + StridedVectorType vx(vector(x,*n,std::abs(*incx))); + StridedVectorType vy(vector(y,*n,std::abs(*incy))); + + Reverse rvx(vx); + Reverse rvy(vy); + + // TODO implement mixed real-scalar rotations + if(*incx<0 && *incy>0) internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation(c,s)); + else if(*incx>0 && *incy<0) internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation(c,s)); + else internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation(c,s)); + + return 0; +} + +int EIGEN_CAT(EIGEN_CAT(SCALAR_SUFFIX,REAL_SCALAR_SUFFIX),scal_)(int *n, RealScalar *palpha, RealScalar *px, int *incx) +{ + if(*n<=0) return 0; + + Scalar* x = reinterpret_cast(px); + RealScalar alpha = *palpha; + +// std::cerr << "__scal " << *n << " " << alpha << " " << *incx << "\n"; + + if(*incx==1) vector(x,*n) *= alpha; + else vector(x,*n,std::abs(*incx)) *= alpha; + + return 0; +} + diff --git a/blas/level1_impl.h b/blas/level1_impl.h index bc99ddb57..fb68abce0 100644 --- a/blas/level1_impl.h +++ b/blas/level1_impl.h @@ -41,209 +41,51 @@ int EIGEN_BLAS_FUNC(axpy)(int *n, RealScalar *palpha, RealScalar *px, int *incx, return 0; } -#if !ISCOMPLEX -// computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum -// res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n -RealScalar EIGEN_BLAS_FUNC(asum)(int *n, RealScalar *px, int *incx) -{ -// std::cerr << "_asum " << *n << " " << *incx << "\n"; - - Scalar* x = reinterpret_cast(px); - - if(*n<=0) return 0; - - if(*incx==1) return vector(x,*n).cwiseAbs().sum(); - else return vector(x,*n,std::abs(*incx)).cwiseAbs().sum(); -} -#else - -struct scalar_norm1_op { - typedef RealScalar result_type; - EIGEN_EMPTY_STRUCT_CTOR(scalar_norm1_op) - inline RealScalar operator() (const Scalar& a) const { return internal::norm1(a); } -}; -namespace Eigen { - namespace internal { - template<> struct functor_traits - { - enum { Cost = 3 * NumTraits::AddCost, PacketAccess = 0 }; - }; - } -} - -RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),asum_)(int *n, RealScalar *px, int *incx) -{ -// std::cerr << "__asum " << *n << " " << *incx << "\n"; - - Complex* x = reinterpret_cast(px); - - if(*n<=0) return 0; - - if(*incx==1) return vector(x,*n).unaryExpr().sum(); - else return vector(x,*n,std::abs(*incx)).unaryExpr().sum(); -} -#endif - int EIGEN_BLAS_FUNC(copy)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { -// std::cerr << "_copy " << *n << " " << *incx << " " << *incy << "\n"; - if(*n<=0) return 0; Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - if(*incx==1 && *incy==1) vector(y,*n) = vector(x,*n); - else if(*incx>0 && *incy>0) vector(y,*n,*incy) = vector(x,*n,*incx); - else if(*incx>0 && *incy<0) vector(y,*n,-*incy).reverse() = vector(x,*n,*incx); - else if(*incx<0 && *incy>0) vector(y,*n,*incy) = vector(x,*n,-*incx).reverse(); - else if(*incx<0 && *incy<0) vector(y,*n,-*incy).reverse() = vector(x,*n,-*incx).reverse(); + // be carefull, *incx==0 is allowed !! + if(*incx==1 && *incy==1) + vector(y,*n) = vector(x,*n); + else + { + if(*incx<0) x = x - (*n-1)*(*incx); + if(*incy<0) y = y - (*n-1)*(*incy); + for(int i=0;i<*n;++i) + { + *y = *x; + x += *incx; + y += *incy; + } + } return 0; } -// computes a vector-vector dot product. -Scalar EIGEN_BLAS_FUNC(dot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) -{ -// std::cerr << "_dot " << *n << " " << *incx << " " << *incy << "\n"; - - if(*n<=0) return 0; - - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - - if(*incx==1 && *incy==1) return (vector(x,*n).cwiseProduct(vector(y,*n))).sum(); - else if(*incx>0 && *incy>0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum(); - else if(*incx<0 && *incy>0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum(); - else if(*incx>0 && *incy<0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); - else if(*incx<0 && *incy<0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); - else return 0; -} - int EIGEN_CAT(EIGEN_CAT(i,SCALAR_SUFFIX),amax_)(int *n, RealScalar *px, int *incx) { -// std::cerr << "i_amax " << *n << " " << *incx << "\n"; - if(*n<=0) return 0; - Scalar* x = reinterpret_cast(px); DenseIndex ret; - if(*incx==1) vector(x,*n).cwiseAbs().maxCoeff(&ret); else vector(x,*n,std::abs(*incx)).cwiseAbs().maxCoeff(&ret); - return ret+1; } - -/* - -// computes a vector-vector dot product with extended precision. -Scalar EIGEN_BLAS_FUNC(sdot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +int EIGEN_CAT(EIGEN_CAT(i,SCALAR_SUFFIX),amin_)(int *n, RealScalar *px, int *incx) { - // TODO - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - - if(*incx==1 && *incy==1) - return vector(x,*n).dot(vector(y,*n)); - - return vector(x,*n,*incx).dot(vector(y,*n,*incy)); -} - -*/ - -#if ISCOMPLEX - -// computes a dot product of a conjugated vector with another vector. -Scalar EIGEN_BLAS_FUNC(dotc)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) -{ -// std::cerr << "_dotc " << *n << " " << *incx << " " << *incy << "\n"; - if(*n<=0) return 0; - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - - Scalar res; - if(*incx==1 && *incy==1) res = (vector(x,*n).dot(vector(y,*n))); - else if(*incx>0 && *incy>0) res = (vector(x,*n,*incx).dot(vector(y,*n,*incy))); - else if(*incx<0 && *incy>0) res = (vector(x,*n,-*incx).reverse().dot(vector(y,*n,*incy))); - else if(*incx>0 && *incy<0) res = (vector(x,*n,*incx).dot(vector(y,*n,-*incy).reverse())); - else if(*incx<0 && *incy<0) res = (vector(x,*n,-*incx).reverse().dot(vector(y,*n,-*incy).reverse())); - return res; -} - -// computes a vector-vector dot product without complex conjugation. -Scalar EIGEN_BLAS_FUNC(dotu)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) -{ -// std::cerr << "_dotu " << *n << " " << *incx << " " << *incy << "\n"; - - if(*n<=0) return 0; - - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - Scalar res; - if(*incx==1 && *incy==1) res = (vector(x,*n).cwiseProduct(vector(y,*n))).sum(); - else if(*incx>0 && *incy>0) res = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum(); - else if(*incx<0 && *incy>0) res = (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum(); - else if(*incx>0 && *incy<0) res = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); - else if(*incx<0 && *incy<0) res = (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); - return res; -} - -#endif // ISCOMPLEX - -#if !ISCOMPLEX -// computes the Euclidean norm of a vector. -Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx) -{ -// std::cerr << "_nrm2 " << *n << " " << *incx << "\n"; - if(*n<=0) return 0; - - Scalar* x = reinterpret_cast(px); - - if(*incx==1) return vector(x,*n).norm(); - else return vector(x,*n,std::abs(*incx)).norm(); -} -#else -RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),nrm2_)(int *n, RealScalar *px, int *incx) -{ -// std::cerr << "__nrm2 " << *n << " " << *incx << "\n"; - if(*n<=0) return 0; - - Scalar* x = reinterpret_cast(px); - - if(*incx==1) - return vector(x,*n).norm(); - - return vector(x,*n,*incx).norm(); -} -#endif - -int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps) -{ -// std::cerr << "_rot " << *n << " " << *incx << " " << *incy << "\n"; - if(*n<=0) return 0; - - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - Scalar c = *reinterpret_cast(pc); - Scalar s = *reinterpret_cast(ps); - - StridedVectorType vx(vector(x,*n,std::abs(*incx))); - StridedVectorType vy(vector(y,*n,std::abs(*incy))); - - Reverse rvx(vx); - Reverse rvy(vy); - - if(*incx<0 && *incy>0) internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation(c,s)); - else if(*incx>0 && *incy<0) internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation(c,s)); - else internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation(c,s)); - - - return 0; + + DenseIndex ret; + if(*incx==1) vector(x,*n).cwiseAbs().minCoeff(&ret); + else vector(x,*n,std::abs(*incx)).cwiseAbs().minCoeff(&ret); + return ret+1; } int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps) @@ -306,29 +148,6 @@ int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealSc return 0; } -#if !ISCOMPLEX -/* -// performs rotation of points in the modified plane. -int EIGEN_BLAS_FUNC(rotm)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *param) -{ - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - - // TODO - - return 0; -} - -// computes the modified parameters for a Givens rotation. -int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param) -{ - // TODO - - return 0; -} -*/ -#endif // !ISCOMPLEX - int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *palpha, RealScalar *px, int *incx) { if(*n<=0) return 0; @@ -336,35 +155,14 @@ int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *palpha, RealScalar *px, int *incx) Scalar* x = reinterpret_cast(px); Scalar alpha = *reinterpret_cast(palpha); -// std::cerr << "_scal " << *n << " " << alpha << " " << *incx << "\n"; - if(*incx==1) vector(x,*n) *= alpha; else vector(x,*n,std::abs(*incx)) *= alpha; return 0; } -#if ISCOMPLEX -int EIGEN_CAT(EIGEN_CAT(SCALAR_SUFFIX,REAL_SCALAR_SUFFIX),scal_)(int *n, RealScalar *palpha, RealScalar *px, int *incx) -{ - if(*n<=0) return 0; - - Scalar* x = reinterpret_cast(px); - RealScalar alpha = *palpha; - -// std::cerr << "__scal " << *n << " " << alpha << " " << *incx << "\n"; - - if(*incx==1) vector(x,*n) *= alpha; - else vector(x,*n,std::abs(*incx)) *= alpha; - - return 0; -} -#endif // ISCOMPLEX - int EIGEN_BLAS_FUNC(swap)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { -// std::cerr << "_swap " << *n << " " << *incx << " " << *incy << "\n"; - if(*n<=0) return 0; Scalar* x = reinterpret_cast(px); diff --git a/blas/level1_real_impl.h b/blas/level1_real_impl.h new file mode 100644 index 000000000..e18c40f0a --- /dev/null +++ b/blas/level1_real_impl.h @@ -0,0 +1,115 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "common.h" + +// computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum +// res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n +RealScalar EIGEN_BLAS_FUNC(asum)(int *n, RealScalar *px, int *incx) +{ +// std::cerr << "_asum " << *n << " " << *incx << "\n"; + + Scalar* x = reinterpret_cast(px); + + if(*n<=0) return 0; + + if(*incx==1) return vector(x,*n).cwiseAbs().sum(); + else return vector(x,*n,std::abs(*incx)).cwiseAbs().sum(); +} + +// computes a vector-vector dot product. +Scalar EIGEN_BLAS_FUNC(dot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +{ +// std::cerr << "_dot " << *n << " " << *incx << " " << *incy << "\n"; + + if(*n<=0) return 0; + + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + + if(*incx==1 && *incy==1) return (vector(x,*n).cwiseProduct(vector(y,*n))).sum(); + else if(*incx>0 && *incy>0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum(); + else if(*incx<0 && *incy>0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum(); + else if(*incx>0 && *incy<0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); + else if(*incx<0 && *incy<0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); + else return 0; +} + +// computes the Euclidean norm of a vector. +// FIXME +Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx) +{ +// std::cerr << "_nrm2 " << *n << " " << *incx << "\n"; + if(*n<=0) return 0; + + Scalar* x = reinterpret_cast(px); + + if(*incx==1) return vector(x,*n).stableNorm(); + else return vector(x,*n,std::abs(*incx)).stableNorm(); +} + +int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps) +{ +// std::cerr << "_rot " << *n << " " << *incx << " " << *incy << "\n"; + if(*n<=0) return 0; + + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar c = *reinterpret_cast(pc); + Scalar s = *reinterpret_cast(ps); + + StridedVectorType vx(vector(x,*n,std::abs(*incx))); + StridedVectorType vy(vector(y,*n,std::abs(*incy))); + + Reverse rvx(vx); + Reverse rvy(vy); + + if(*incx<0 && *incy>0) internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation(c,s)); + else if(*incx>0 && *incy<0) internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation(c,s)); + else internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation(c,s)); + + + return 0; +} + +/* +// performs rotation of points in the modified plane. +int EIGEN_BLAS_FUNC(rotm)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *param) +{ + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + + // TODO + + return 0; +} + +// computes the modified parameters for a Givens rotation. +int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param) +{ + // TODO + + return 0; +} +*/ diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h new file mode 100644 index 000000000..cbbf4f3e9 --- /dev/null +++ b/blas/level2_cplx_impl.h @@ -0,0 +1,285 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "common.h" + +/** ZHEMV performs the matrix-vector operation + * + * y := alpha*A*x + beta*y, + * + * where alpha and beta are scalars, x and y are n element vectors and + * A is an n by n hermitian matrix. + */ +int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) +{ + Scalar* a = reinterpret_cast(pa); + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); + + // check arguments + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*lda() * (alpha * vector(actual_x,*n)); + else if(UPLO(*uplo)==LO) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView() * (alpha * vector(actual_x,*n)); + } + + if(actual_x!=x) delete[] actual_x; + if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy); + + return 1; +} + +/** ZHBMV performs the matrix-vector operation + * + * y := alpha*A*x + beta*y, + * + * where alpha and beta are scalars, x and y are n element vectors and + * A is an n by n hermitian band matrix, with k super-diagonals. + */ +// int EIGEN_BLAS_FUNC(hbmv)(char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda, +// RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) +// { +// return 1; +// } + +/** ZHPMV performs the matrix-vector operation + * + * y := alpha*A*x + beta*y, + * + * where alpha and beta are scalars, x and y are n element vectors and + * A is an n by n hermitian matrix, supplied in packed form. + */ +// int EIGEN_BLAS_FUNC(hpmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) +// { +// return 1; +// } + +/** ZHPR performs the hermitian rank 1 operation + * + * A := alpha*x*conjg( x' ) + A, + * + * where alpha is a real scalar, x is an n element vector and A is an + * n by n hermitian matrix, supplied in packed form. + */ +// int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *alpha, RealScalar *x, int *incx, RealScalar *ap) +// { +// return 1; +// } + +/** ZHPR2 performs the hermitian rank 2 operation + * + * A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, + * + * where alpha is a scalar, x and y are n element vectors and A is an + * n by n hermitian matrix, supplied in packed form. + */ +// int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap) +// { +// return 1; +// } + +/** ZHER performs the hermitian rank 1 operation + * + * A := alpha*x*conjg( x' ) + A, + * + * where alpha is a real scalar, x is an n element vector and A is an + * n by n hermitian matrix. + */ +int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda) +{ + Scalar* x = reinterpret_cast(px); + Scalar* a = reinterpret_cast(pa); + RealScalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + else if(*lda().rankUpdate(vector(x_cpy,*n), alpha); +// else if(UPLO(*uplo)==UP) matrix(a,*n,*n,*lda).selfadjointView().rankUpdate(vector(x_cpy,*n), alpha); + + if(UPLO(*uplo)==LO) + for(int j=0;j<*n;++j) + matrix(a,*n,*n,*lda).col(j).tail(*n-j) += alpha * internal::conj(x_cpy[j]) * vector(x_cpy+j,*n-j); + else + for(int j=0;j<*n;++j) + matrix(a,*n,*n,*lda).col(j).head(j+1) += alpha * internal::conj(x_cpy[j]) * vector(x_cpy,j+1); + + matrix(a,*n,*n,*lda).diagonal().imag().setZero(); + + if(x_cpy!=x) delete[] x_cpy; + + return 1; +} + +/** ZHER2 performs the hermitian rank 2 operation + * + * A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, + * + * where alpha is a scalar, x and y are n element vectors and A is an n + * by n hermitian matrix. + */ +int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) +{ + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar* a = reinterpret_cast(pa); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + else if(*incy==0) info = 7; + else if(*lda().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha); + else if(UPLO(*uplo)==UP) matrix(a,*n,*n,*lda).selfadjointView().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha); + + matrix(a,*n,*n,*lda).diagonal().imag().setZero(); + + if(x_cpy!=x) delete[] x_cpy; + if(y_cpy!=y) delete[] y_cpy; + + return 1; +} + +/** ZGERU performs the rank 1 operation + * + * A := alpha*x*y' + A, + * + * where alpha is a scalar, x is an m element vector, y is an n element + * vector and A is an m by n matrix. + */ +int EIGEN_BLAS_FUNC(geru)(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) +{ + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar* a = reinterpret_cast(pa); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(*m<0) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + else if(*incy==0) info = 7; + else if(*lda(px); + Scalar* y = reinterpret_cast(py); + Scalar* a = reinterpret_cast(pa); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(*m<0) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + else if(*incy==0) info = 7; + else if(*lda(pa); Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); @@ -59,7 +59,7 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca if(info) return xerbla_(SCALAR_SUFFIX_UP"GEMV ",&info,6); - if(*m==0 || *n==0) + if(*m==0 || *n==0 || (alpha==Scalar(0) && beta==Scalar(1))) return 0; int actual_m = *m; @@ -69,10 +69,13 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca Scalar* actual_b = get_compact_vector(b,actual_n,*incb); Scalar* actual_c = get_compact_vector(c,actual_m,*incc); - + if(beta!=Scalar(1)) - vector(actual_c, actual_m) *= beta; - + { + if(beta==Scalar(0)) vector(actual_c, actual_m).setZero(); + else vector(actual_c, actual_m) *= beta; + } + int code = OP(*opa); func[code](actual_m, actual_n, a, *lda, actual_b, 1, actual_c, 1, alpha); @@ -131,7 +134,7 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar func[code](*n, a, *lda, actual_b); if(actual_b!=b) delete[] copy_back(actual_b,b,*n,*incb); - + return 0; } @@ -195,149 +198,10 @@ int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar copy_back(res.data(),b,*n,*incb); if(actual_b!=b) delete[] actual_b; - + return 0; } -// y = alpha*A*x + beta*y -int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) -{ - Scalar* a = reinterpret_cast(pa); - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); - - // check arguments - int info = 0; - if(UPLO(*uplo)==INVALID) info = 1; - else if(*n<0) info = 2; - else if(*lda() * (alpha * vector(actual_x,*n)); - else if(UPLO(*uplo)==LO) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView() * (alpha * vector(actual_x,*n)); - - if(actual_x!=x) delete[] actual_x; - if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy); - - return 1; -} - -// C := alpha*x*x' + C -int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc) -{ - -// typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar); -// static functype func[2]; - -// static bool init = false; -// if(!init) -// { -// for(int k=0; k<2; ++k) -// func[k] = 0; -// -// func[UP] = (internal::selfadjoint_product::run); -// func[LO] = (internal::selfadjoint_product::run); - -// init = true; -// } - - Scalar* x = reinterpret_cast(px); - Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - - int info = 0; - if(UPLO(*uplo)==INVALID) info = 1; - else if(*n<0) info = 2; - else if(*incx==0) info = 5; - else if(*ldc().rankUpdate(vector(x_cpy,*n), alpha); - else if(UPLO(*uplo)==UP) matrix(c,*n,*n,*ldc).selfadjointView().rankUpdate(vector(x_cpy,*n), alpha); - - if(x_cpy!=x) delete[] x_cpy; - -// func[code](*n, a, *inca, c, *ldc, alpha); - return 1; -} - - -// C := alpha*x*y' + alpha*y*x' + C -int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc) -{ -// typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); -// static functype func[2]; -// -// static bool init = false; -// if(!init) -// { -// for(int k=0; k<2; ++k) -// func[k] = 0; -// -// func[UP] = (internal::selfadjoint_product::run); -// func[LO] = (internal::selfadjoint_product::run); -// -// init = true; -// } - - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - - int info = 0; - if(UPLO(*uplo)==INVALID) info = 1; - else if(*n<0) info = 2; - else if(*incx==0) info = 5; - else if(*incy==0) info = 7; - else if(*ldc().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha); - else if(UPLO(*uplo)==UP) matrix(c,*n,*n,*ldc).selfadjointView().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha); - - if(x_cpy!=x) delete[] x_cpy; - if(y_cpy!=y) delete[] y_cpy; - -// int code = UPLO(*uplo); -// if(code>=2 || func[code]==0) -// return 0; - -// func[code](*n, a, *inca, b, *incb, c, *ldc, alpha); - return 1; -} - /** DGBMV performs one of the matrix-vector operations * * y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, @@ -345,23 +209,12 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px * where alpha and beta are scalars, x and y are vectors and A is an * m by n band matrix, with kl sub-diagonals and ku super-diagonals. */ -int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealScalar *alpha, RealScalar *a, int *lda, - RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) -{ - return 1; -} -/** DSBMV performs the matrix-vector operation - * - * y := alpha*A*x + beta*y, - * - * where alpha and beta are scalars, x and y are n element vectors and - * A is an n by n symmetric band matrix, with k super-diagonals. - */ -int EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda, - RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) -{ - return 1; -} +// int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealScalar *alpha, RealScalar *a, int *lda, +// RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) +// { +// return 1; +// } + /** DTBMV performs one of the matrix-vector operations * @@ -370,10 +223,10 @@ int EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealSc * where x is an n element vector and A is an n by n unit, or non-unit, * upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ -int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx) -{ - return 1; -} +// int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx) +// { +// return 1; +// } /** DTBSV solves one of the systems of equations * @@ -386,23 +239,10 @@ int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *trans, char *diag, int *n, int *k, R * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. */ -int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx) -{ - return 1; -} - -/** DSPMV performs the matrix-vector operation - * - * y := alpha*A*x + beta*y, - * - * where alpha and beta are scalars, x and y are n element vectors and - * A is an n by n symmetric matrix, supplied in packed form. - * - */ -int EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) -{ - return 1; -} +// int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx) +// { +// return 1; +// } /** DTPMV performs one of the matrix-vector operations * @@ -411,10 +251,10 @@ int EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, * where x is an n element vector and A is an n by n unit, or non-unit, * upper or lower triangular matrix, supplied in packed form. */ -int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *trans, char *diag, int *n, RealScalar *ap, RealScalar *x, int *incx) -{ - return 1; -} +// int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *trans, char *diag, int *n, RealScalar *ap, RealScalar *x, int *incx) +// { +// return 1; +// } /** DTPSV solves one of the systems of equations * @@ -426,10 +266,10 @@ int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *trans, char *diag, int *n, RealScala * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. */ -int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *trans, char *diag, int *n, RealScalar *ap, RealScalar *x, int *incx) -{ - return 1; -} +// int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *trans, char *diag, int *n, RealScalar *ap, RealScalar *x, int *incx) +// { +// return 1; +// } /** DGER performs the rank 1 operation * @@ -444,7 +284,7 @@ int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar* y = reinterpret_cast(py); Scalar* a = reinterpret_cast(pa); Scalar alpha = *reinterpret_cast(palpha); - + int info = 0; if(*m<0) info = 1; else if(*n<0) info = 2; @@ -453,293 +293,20 @@ int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, else if(*lda(pa); - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); - - // check arguments - int info = 0; - if(UPLO(*uplo)==INVALID) info = 1; - else if(*n<0) info = 2; - else if(*lda() * (alpha * vector(actual_x,*n)); - else if(UPLO(*uplo)==LO) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView() * (alpha * vector(actual_x,*n)); - } - - if(actual_x!=x) delete[] actual_x; - if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy); - - return 1; -} - -/** ZHBMV performs the matrix-vector operation - * - * y := alpha*A*x + beta*y, - * - * where alpha and beta are scalars, x and y are n element vectors and - * A is an n by n hermitian band matrix, with k super-diagonals. - */ -int EIGEN_BLAS_FUNC(hbmv)(char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda, - RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) -{ - return 1; -} - -/** ZHPMV performs the matrix-vector operation - * - * y := alpha*A*x + beta*y, - * - * where alpha and beta are scalars, x and y are n element vectors and - * A is an n by n hermitian matrix, supplied in packed form. - */ -int EIGEN_BLAS_FUNC(hpmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) -{ - return 1; -} - -/** ZHPR performs the hermitian rank 1 operation - * - * A := alpha*x*conjg( x' ) + A, - * - * where alpha is a real scalar, x is an n element vector and A is an - * n by n hermitian matrix, supplied in packed form. - */ -int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *alpha, RealScalar *x, int *incx, RealScalar *ap) -{ - return 1; -} - -/** ZHPR2 performs the hermitian rank 2 operation - * - * A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, - * - * where alpha is a scalar, x and y are n element vectors and A is an - * n by n hermitian matrix, supplied in packed form. - */ -int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap) -{ - return 1; -} - -/** ZHER performs the hermitian rank 1 operation - * - * A := alpha*x*conjg( x' ) + A, - * - * where alpha is a real scalar, x is an n element vector and A is an - * n by n hermitian matrix. - */ -int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda) -{ - Scalar* x = reinterpret_cast(px); - Scalar* a = reinterpret_cast(pa); - RealScalar alpha = *reinterpret_cast(palpha); - - int info = 0; - if(UPLO(*uplo)==INVALID) info = 1; - else if(*n<0) info = 2; - else if(*incx==0) info = 5; - else if(*lda().rankUpdate(vector(x_cpy,*n), alpha); - else if(UPLO(*uplo)==UP) matrix(a,*n,*n,*lda).selfadjointView().rankUpdate(vector(x_cpy,*n), alpha); - - matrix(a,*n,*n,*lda).diagonal().imag().setZero(); - - if(x_cpy!=x) delete[] x_cpy; - - return 1; -} - -/** ZHER2 performs the hermitian rank 2 operation - * - * A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, - * - * where alpha is a scalar, x and y are n element vectors and A is an n - * by n hermitian matrix. - */ -int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) -{ - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - Scalar* a = reinterpret_cast(pa); - Scalar alpha = *reinterpret_cast(palpha); - - int info = 0; - if(UPLO(*uplo)==INVALID) info = 1; - else if(*n<0) info = 2; - else if(*incx==0) info = 5; - else if(*incy==0) info = 7; - else if(*lda().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha); - else if(UPLO(*uplo)==UP) matrix(a,*n,*n,*lda).selfadjointView().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha); - - matrix(a,*n,*n,*lda).diagonal().imag().setZero(); - if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; return 1; } -/** ZGERU performs the rank 1 operation - * - * A := alpha*x*y' + A, - * - * where alpha is a scalar, x is an m element vector, y is an n element - * vector and A is an m by n matrix. - */ -int EIGEN_BLAS_FUNC(geru)(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) -{ - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - Scalar* a = reinterpret_cast(pa); - Scalar alpha = *reinterpret_cast(palpha); - - int info = 0; - if(*m<0) info = 1; - else if(*n<0) info = 2; - else if(*incx==0) info = 5; - else if(*incy==0) info = 7; - else if(*lda(px); - Scalar* y = reinterpret_cast(py); - Scalar* a = reinterpret_cast(pa); - Scalar alpha = *reinterpret_cast(palpha); - - int info = 0; - if(*m<0) info = 1; - else if(*n<0) info = 2; - else if(*incx==0) info = 5; - else if(*incy==0) info = 7; - else if(*lda +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "common.h" + +// y = alpha*A*x + beta*y +int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) +{ + Scalar* a = reinterpret_cast(pa); + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); + + // check arguments + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*lda() * (alpha * vector(actual_x,*n)); + else if(UPLO(*uplo)==LO) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView() * (alpha * vector(actual_x,*n)); + + if(actual_x!=x) delete[] actual_x; + if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy); + + return 1; +} + +// C := alpha*x*x' + C +int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc) +{ + +// typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar); +// static functype func[2]; + +// static bool init = false; +// if(!init) +// { +// for(int k=0; k<2; ++k) +// func[k] = 0; +// +// func[UP] = (internal::selfadjoint_product::run); +// func[LO] = (internal::selfadjoint_product::run); + +// init = true; +// } + + Scalar* x = reinterpret_cast(px); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + else if(*ldc m2(matrix(c,*n,*n,*ldc)); + + // TODO check why this is not accurate enough for lapack tests +// if(UPLO(*uplo)==LO) matrix(c,*n,*n,*ldc).selfadjointView().rankUpdate(vector(x_cpy,*n), alpha); +// else if(UPLO(*uplo)==UP) matrix(c,*n,*n,*ldc).selfadjointView().rankUpdate(vector(x_cpy,*n), alpha); + + if(UPLO(*uplo)==LO) + for(int j=0;j<*n;++j) + matrix(c,*n,*n,*ldc).col(j).tail(*n-j) += alpha * x_cpy[j] * vector(x_cpy+j,*n-j); + else + for(int j=0;j<*n;++j) + matrix(c,*n,*n,*ldc).col(j).head(j+1) += alpha * x_cpy[j] * vector(x_cpy,j+1); + + if(x_cpy!=x) delete[] x_cpy; + + return 1; +} + +// C := alpha*x*y' + alpha*y*x' + C +int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc) +{ +// typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); +// static functype func[2]; +// +// static bool init = false; +// if(!init) +// { +// for(int k=0; k<2; ++k) +// func[k] = 0; +// +// func[UP] = (internal::selfadjoint_product::run); +// func[LO] = (internal::selfadjoint_product::run); +// +// init = true; +// } + + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + else if(*incy==0) info = 7; + else if(*ldc().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha); + else if(UPLO(*uplo)==UP) matrix(c,*n,*n,*ldc).selfadjointView().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha); + + if(x_cpy!=x) delete[] x_cpy; + if(y_cpy!=y) delete[] y_cpy; + +// int code = UPLO(*uplo); +// if(code>=2 || func[code]==0) +// return 0; + +// func[code](*n, a, *inca, b, *incb, c, *ldc, alpha); + return 1; +} + +/** DSBMV performs the matrix-vector operation + * + * y := alpha*A*x + beta*y, + * + * where alpha and beta are scalars, x and y are n element vectors and + * A is an n by n symmetric band matrix, with k super-diagonals. + */ +// int EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda, +// RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) +// { +// return 1; +// } + + +/** DSPMV performs the matrix-vector operation + * + * y := alpha*A*x + beta*y, + * + * where alpha and beta are scalars, x and y are n element vectors and + * A is an n by n symmetric matrix, supplied in packed form. + * + */ +// int EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) +// { +// return 1; +// } + +/** DSPR performs the symmetric rank 1 operation + * + * A := alpha*x*x' + A, + * + * where alpha is a real scalar, x is an n element vector and A is an + * n by n symmetric matrix, supplied in packed form. + */ +// int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *alpha, Scalar *x, int *incx, Scalar *ap) +// { +// return 1; +// } + +/** DSPR2 performs the symmetric rank 2 operation + * + * A := alpha*x*y' + alpha*y*x' + A, + * + * where alpha is a scalar, x and y are n element vectors and A is an + * n by n symmetric matrix, supplied in packed form. + */ +// int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *alpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap) +// { +// return 1; +// } + diff --git a/blas/level3_impl.h b/blas/level3_impl.h index 5b28a1d52..6c53dc679 100644 --- a/blas/level3_impl.h +++ b/blas/level3_impl.h @@ -52,7 +52,7 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal Scalar* c = reinterpret_cast(pc); Scalar alpha = *reinterpret_cast(palpha); Scalar beta = *reinterpret_cast(pbeta); - + int info = 0; if(OP(*opa)==INVALID) info = 1; else if(OP(*opb)==INVALID) info = 2; @@ -342,13 +342,17 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp else if(*ldc() *= beta; - else matrix(c, *n, *n, *ldc).triangularView() *= beta; + if(UPLO(*uplo)==UP) + if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView().setZero(); + else matrix(c, *n, *n, *ldc).triangularView() *= beta; + else + if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView().setZero(); + else matrix(c, *n, *n, *ldc).triangularView() *= beta; } #if ISCOMPLEX @@ -383,7 +387,7 @@ int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal Scalar* c = reinterpret_cast(pc); Scalar alpha = *reinterpret_cast(palpha); Scalar beta = *reinterpret_cast(pbeta); - + int info = 0; if(UPLO(*uplo)==INVALID) info = 1; else if(OP(*op)==INVALID) info = 2; @@ -394,11 +398,15 @@ int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal else if(*ldc() *= beta; - else matrix(c, *n, *n, *ldc).triangularView() *= beta; + if(UPLO(*uplo)==UP) + if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView().setZero(); + else matrix(c, *n, *n, *ldc).triangularView() *= beta; + else + if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView().setZero(); + else matrix(c, *n, *n, *ldc).triangularView() *= beta; } if(*k==0) @@ -458,11 +466,9 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa if(info) return xerbla_(SCALAR_SUFFIX_UP"HEMM ",&info,6); - if(beta==Scalar(0)) - matrix(c, *m, *n, *ldc).setZero(); - else if(beta!=Scalar(1)) - matrix(c, *m, *n, *ldc) *= beta; - + if(beta==Scalar(0)) matrix(c, *m, *n, *ldc).setZero(); + else if(beta!=Scalar(1)) matrix(c, *m, *n, *ldc) *= beta; + if(*m==0 || *n==0) { return 1; @@ -535,11 +541,18 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp if(beta!=RealScalar(1)) { - if(UPLO(*uplo)==UP) matrix(c, *n, *n, *ldc).triangularView() *= beta; - else matrix(c, *n, *n, *ldc).triangularView() *= beta; - - matrix(c, *n, *n, *ldc).diagonal().real() *= beta; - matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); + if(UPLO(*uplo)==UP) + if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView().setZero(); + else matrix(c, *n, *n, *ldc).triangularView() *= beta; + else + if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView().setZero(); + else matrix(c, *n, *n, *ldc).triangularView() *= beta; + + if(beta!=Scalar(0)) + { + matrix(c, *n, *n, *ldc).diagonal().real() *= beta; + matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); + } } if(*k>0 && alpha!=RealScalar(0)) @@ -573,11 +586,18 @@ int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal if(beta!=RealScalar(1)) { - if(UPLO(*uplo)==UP) matrix(c, *n, *n, *ldc).triangularView() *= beta; - else matrix(c, *n, *n, *ldc).triangularView() *= beta; + if(UPLO(*uplo)==UP) + if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView().setZero(); + else matrix(c, *n, *n, *ldc).triangularView() *= beta; + else + if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView().setZero(); + else matrix(c, *n, *n, *ldc).triangularView() *= beta; - matrix(c, *n, *n, *ldc).diagonal().real() *= beta; - matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); + if(beta!=Scalar(0)) + { + matrix(c, *n, *n, *ldc).diagonal().real() *= beta; + matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); + } } else if(*k>0 && alpha!=Scalar(0)) matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); diff --git a/blas/single.cpp b/blas/single.cpp index 9ee2b78dd..5816d329b 100644 --- a/blas/single.cpp +++ b/blas/single.cpp @@ -28,5 +28,7 @@ #define ISCOMPLEX 0 #include "level1_impl.h" +#include "level1_real_impl.h" #include "level2_impl.h" +#include "level2_real_impl.h" #include "level3_impl.h"