add a wip blas library built on top of Eigen. TODO:

- write extentive unit tests (maybe this already exist in other projects) - the level2 functions still have to be implemented
2025-04-12 19:20:36 +08:00 · 2009-09-25 13:08:39 +02:00 · 2009-09-25 13:08:39 +02:00 · 04dc63776a
commit 04dc63776a
parent bdf603caec
11 changed files with 1065 additions and 1 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -95,7 +95,7 @@ if(MSVC)
  option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
  if(EIGEN_TEST_SSE2)
    if(NOT CMAKE_CL_64)
-      # arch is not supported on 64 bit systems, SSE is enabled automatically.	
+      # arch is not supported on 64 bit systems, SSE is enabled automatically.
      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2")
    endif(NOT CMAKE_CL_64)
    message("Enabling SSE2 in tests/examples")
@ -142,6 +142,10 @@ if(EIGEN_BUILD_DEMOS)
  add_subdirectory(demos)
 endif(EIGEN_BUILD_DEMOS)

+if(EIGEN_BUILD_BLAS)
+  add_subdirectory(blas)
+endif(EIGEN_BUILD_BLAS)
+
 if(EIGEN_BUILD_BTL)
  add_subdirectory(bench/btl)
 endif(EIGEN_BUILD_BTL)
--- a/blas/CMakeLists.txt
+++ b/blas/CMakeLists.txt
@ -0,0 +1,10 @@
+
+set(EigenBlas_SRCS single.cpp double.cpp complex_single.cpp complex_double.cpp)
+
+add_library(eigen_blas SHARED ${EigenBlas_SRCS})
+
+install(TARGETS eigen_blas
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        ARCHIVE DESTINATION lib)
+
--- a/blas/README.txt
+++ b/blas/README.txt
@ -0,0 +1,7 @@
+
+This directory contains a BLAS library built on top of Eigen.
+
+This is currently a work in progress which is far to be ready for use,
+but feel free to contribute to it if you wish.
+
+If you want to compile it, set the cmake variable EIGEN_BUILD_BLAS to "on".
--- a/blas/common.h
+++ b/blas/common.h
@ -0,0 +1,115 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_BLAS_COMMON_H
+#define EIGEN_BLAS_COMMON_H
+
+#ifndef SCALAR
+#error the token SCALAR must be defined to compile this file
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <blas.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#define NOTR    0
+#define TR      1
+#define ADJ     2
+
+#define LEFT    0
+#define RIGHT   1
+
+#define UP      0
+#define LO      1
+
+#define NUNIT   0
+#define UNIT    1
+
+#define OP(X)   (   ((X)=='N' || (X)=='n') ? NOTR   \
+                  : ((X)=='T' || (X)=='t') ? TR     \
+                  : ((X)=='C' || (X)=='c') ? ADJ    \
+                  : 0xff)
+
+#define SIDE(X) (   ((X)=='L' || (X)=='l') ? LEFT   \
+                  : ((X)=='R' || (X)=='r') ? RIGHT  \
+                  : 0xff)
+
+#define UPLO(X) (   ((X)=='U' || (X)=='u') ? UP     \
+                  : ((X)=='L' || (X)=='l') ? LO     \
+                  : 0xff)
+
+#define DIAG(X) (   ((X)=='N' || (X)=='N') ? NUNIT  \
+                  : ((X)=='U' || (X)=='u') ? UNIT   \
+                  : 0xff)
+
+#include <Eigen/Core>
+#include <Eigen/Jacobi>
+using namespace Eigen;
+
+template<typename T>
+Block<NestByValue<Map<Matrix<T,Dynamic,Dynamic> > >, Dynamic, Dynamic>
+matrix(T* data, int rows, int cols, int stride)
+{
+  return Map<Matrix<T,Dynamic,Dynamic> >(data, stride, cols).nestByValue().block(0,0,rows,cols);
+}
+
+template<typename T>
+Block<NestByValue<Map<Matrix<T,Dynamic,Dynamic,RowMajor> > >, Dynamic, 1>
+vector(T* data, int size, int incr)
+{
+  return Map<Matrix<T,Dynamic,Dynamic,RowMajor> >(data, size, incr).nestByValue().col(0);
+}
+
+template<typename T>
+Map<Matrix<T,Dynamic,1> >
+vector(T* data, int size)
+{
+  return Map<Matrix<T,Dynamic,1> >(data, size);
+}
+
+typedef SCALAR Scalar;
+typedef NumTraits<Scalar>::Real RealScalar;
+typedef std::complex<RealScalar> Complex;
+
+enum
+{
+  IsComplex = Eigen::NumTraits<SCALAR>::IsComplex,
+  Conj = IsComplex
+};
+
+typedef Block<NestByValue<Map<Matrix<Scalar,Dynamic,Dynamic> > >, Dynamic, Dynamic> MatrixType;
+typedef Block<NestByValue<Map<Matrix<Scalar,Dynamic,Dynamic, RowMajor> > >, Dynamic, 1> StridedVectorType;
+typedef Map<Matrix<Scalar,Dynamic,1> > CompactVectorType;
+
+#define EIGEN_BLAS_FUNC(X) EIGEN_CAT(SCALAR_SUFFIX,X##_)
+
+#endif // EIGEN_BLAS_COMMON_H
--- a/blas/complex_double.cpp
+++ b/blas/complex_double.cpp
@ -0,0 +1,31 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#define SCALAR        std::complex<double>
+#define SCALAR_SUFFIX c
+#define ISCOMPLEX     1
+
+#include "level1_impl.h"
+#include "level2_impl.h"
+#include "level3_impl.h"
--- a/blas/complex_single.cpp
+++ b/blas/complex_single.cpp
@ -0,0 +1,31 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#define SCALAR        std::complex<float>
+#define SCALAR_SUFFIX z
+#define ISCOMPLEX     1
+
+#include "level1_impl.h"
+#include "level2_impl.h"
+#include "level3_impl.h"
--- a/blas/double.cpp
+++ b/blas/double.cpp
@ -0,0 +1,31 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#define SCALAR        double
+#define SCALAR_SUFFIX d
+#define ISCOMPLEX     0
+
+#include "level1_impl.h"
+#include "level2_impl.h"
+#include "level3_impl.h"
--- a/blas/level1_impl.h
+++ b/blas/level1_impl.h
@ -0,0 +1,225 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "common.h"
+
+int EIGEN_BLAS_FUNC(axpy)(int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+  Scalar alpha  = *reinterpret_cast<Scalar*>(palpha);
+
+  if(*incx==1 && *incy==1)
+    vector(y,*n) += alpha * vector(x,*n);
+  else
+    vector(y,*n,*incy) += alpha * vector(x,*n,*incx);
+
+  return 1;
+}
+
+// computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum
+// res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n
+RealScalar EIGEN_BLAS_FUNC(asum)(int *n, RealScalar *px, int *incx)
+{
+  int size = IsComplex ? 2* *n : *n;
+
+  if(*incx==1)
+    return vector(px,size).cwise().abs().sum();
+  else
+    return vector(px,size,*incx).cwise().abs().sum();
+
+  return 1;
+}
+
+int EIGEN_BLAS_FUNC(copy)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
+{
+  int size = IsComplex ? 2* *n : *n;
+
+  if(*incx==1 && *incy==1)
+    vector(py,size) = vector(px,size);
+  else
+    vector(py,size,*incy) = vector(px,size,*incx);
+
+  return 1;
+}
+
+// computes a vector-vector dot product.
+Scalar EIGEN_BLAS_FUNC(dot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+
+  if(*incx==1 && *incy==1)
+    return (vector(x,*n).cwise()*vector(y,*n)).sum();
+
+  return (vector(x,*n,*incx).cwise()*vector(y,*n,*incy)).sum();
+}
+
+/*
+
+// computes a vector-vector dot product with extended precision.
+Scalar EIGEN_BLAS_FUNC(sdot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
+{
+  // TODO
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+
+  if(*incx==1 && *incy==1)
+    return vector(x,*n).dot(vector(y,*n));
+
+  return vector(x,*n,*incx).dot(vector(y,*n,*incy));
+}
+
+*/
+
+#if ISCOMPLEX
+
+// computes a dot product of a conjugated vector with another vector.
+Scalar EIGEN_BLAS_FUNC(dotc)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+
+  if(*incx==1 && *incy==1)
+    return vector(x,*n).dot(vector(y,*n));
+
+  return vector(x,*n,*incx).dot(vector(y,*n,*incy));
+}
+
+// computes a vector-vector dot product without complex conjugation.
+Scalar EIGEN_BLAS_FUNC(dotu)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+
+  if(*incx==1 && *incy==1)
+    return (vector(x,*n).cwise()*vector(y,*n)).sum();
+
+  return (vector(x,*n,*incx).cwise()*vector(y,*n,*incy)).sum();
+}
+
+#endif // ISCOMPLEX
+
+// computes the Euclidean norm of a vector.
+Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+
+  if(*incx==1)
+    return vector(x,*n).norm();
+
+  return vector(x,*n,*incx).norm();
+}
+
+int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+  Scalar c = *reinterpret_cast<Scalar*>(pc);
+  Scalar s = *reinterpret_cast<Scalar*>(ps);
+
+  StridedVectorType vx(vector(x,*n,*incx));
+  StridedVectorType vy(vector(y,*n,*incy));
+  ei_apply_rotation_in_the_plane(vx, vy, PlanarRotation<Scalar>(c,s));
+  return 1;
+}
+
+int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps)
+{
+  Scalar a = *reinterpret_cast<Scalar*>(pa);
+  Scalar b = *reinterpret_cast<Scalar*>(pb);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar* s = reinterpret_cast<Scalar*>(ps);
+
+  PlanarRotation<Scalar> r;
+  r.makeGivens(a,b);
+  *c = r.c();
+  *s = r.s();
+
+  return 1;
+}
+
+#if !ISCOMPLEX
+/*
+// performs rotation of points in the modified plane.
+int EIGEN_BLAS_FUNC(rotm)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *param)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+
+  // TODO
+
+  return 0;
+}
+
+// computes the modified parameters for a Givens rotation.
+int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param)
+{
+  // TODO
+
+  return 0;
+}
+*/
+#endif // !ISCOMPLEX
+
+int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *px, int *incx, RealScalar *palpha)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+
+  if(*incx==1)
+    vector(x,*n) *= alpha;
+
+  vector(x,*n,*incx) *= alpha;
+
+  return 1;
+}
+
+int EIGEN_BLAS_FUNC(swap)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
+{
+  int size = IsComplex ? 2* *n : *n;
+
+  if(*incx==1 && *incy==1)
+    vector(py,size).swap(vector(px,size));
+  else
+    vector(py,size,*incy).swap(vector(px,size,*incx));
+
+  return 1;
+}
+
+#if !ISCOMPLEX
+
+RealScalar EIGEN_BLAS_FUNC(casum)(int *n, RealScalar *px, int *incx)
+{
+  Complex* x = reinterpret_cast<Complex*>(px);
+
+  if(*incx==1)
+    return vector(x,*n).cwise().abs().sum();
+  else
+    return vector(x,*n,*incx).cwise().abs().sum();
+
+  return 1;
+}
+
+#endif // ISCOMPLEX
--- a/blas/level2_impl.h
+++ b/blas/level2_impl.h
@ -0,0 +1,214 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "common.h"
+
+int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *incb, RealScalar *pbeta, RealScalar *pc, int *incc)
+{
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* b = reinterpret_cast<Scalar*>(pb);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha  = *reinterpret_cast<Scalar*>(palpha);
+  Scalar beta   = *reinterpret_cast<Scalar*>(pbeta);
+
+  if(beta!=Scalar(1))
+    vector(c, *m, *incc) *= beta;
+
+  if(OP(*opa)==NOTR)
+    if(*incc==1)
+      vector(c,*m)        += alpha * matrix(a,*m,*n,*lda) * vector(b,*n,*incb);
+    else
+      vector(c,*m,*incc)  += alpha * matrix(a,*m,*n,*lda) * vector(b,*n,*incb);
+  else if(OP(*opa)==TR)
+    if(*incb==1)
+      vector(c,*m,*incc)  += alpha * matrix(a,*n,*m,*lda).transpose() * vector(b,*n);
+    else
+      vector(c,*m,*incc)  += alpha * matrix(a,*n,*m,*lda).transpose() * vector(b,*n,*incb);
+  else if(OP(*opa)==TR)
+    if(*incb==1)
+      vector(c,*m,*incc)  += alpha * matrix(a,*n,*m,*lda).adjoint() * vector(b,*n);
+    else
+      vector(c,*m,*incc)  += alpha * matrix(a,*n,*m,*lda).adjoint() * vector(b,*n,*incb);
+  else
+    return 0;
+
+  return 1;
+}
+
+/*
+int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb)
+{
+  typedef void (*functype)(int, const Scalar *, int, Scalar *, int);
+  functype func[16];
+
+  static bool init = false;
+  if(!init)
+  {
+    for(int k=0; k<16; ++k)
+      func[k] = 0;
+
+//     func[NOTR  | (UP << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector<Scalar, UpperTriangular|0,          false,ColMajor,ColMajor>::run);
+//     func[TR    | (UP << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector<Scalar, UpperTriangular|0,          false,RowMajor,ColMajor>::run);
+//     func[ADJ   | (UP << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector<Scalar, UpperTriangular|0,          Conj, RowMajor,ColMajor>::run);
+//
+//     func[NOTR  | (LO << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector<Scalar, LowerTriangular|0,          false,ColMajor,ColMajor>::run);
+//     func[TR    | (LO << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector<Scalar, LowerTriangular|0,          false,RowMajor,ColMajor>::run);
+//     func[ADJ   | (LO << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector<Scalar, LowerTriangular|0,          Conj, RowMajor,ColMajor>::run);
+//
+//     func[NOTR  | (UP << 3) | (UNIT  << 3)] = (ei_triangular_solve_vector<Scalar, UpperTriangular|UnitDiagBit,false,ColMajor,ColMajor>::run);
+//     func[TR    | (UP << 2) | (UNIT  << 3)] = (ei_triangular_solve_vector<Scalar, UpperTriangular|UnitDiagBit,false,RowMajor,ColMajor>::run);
+//     func[ADJ   | (UP << 2) | (UNIT  << 3)] = (ei_triangular_solve_vector<Scalar, UpperTriangular|UnitDiagBit,Conj, RowMajor,ColMajor>::run);
+//
+//     func[NOTR  | (LO << 2) | (UNIT  << 3)] = (ei_triangular_solve_vector<Scalar, LowerTriangular|UnitDiagBit,false,ColMajor,ColMajor>::run);
+//     func[TR    | (LO << 2) | (UNIT  << 3)] = (ei_triangular_solve_vector<Scalar, LowerTriangular|UnitDiagBit,false,RowMajor,ColMajor>::run);
+//     func[ADJ   | (LO << 2) | (UNIT  << 3)] = (ei_triangular_solve_vector<Scalar, LowerTriangular|UnitDiagBit,Conj, RowMajor,ColMajor>::run);
+
+    init = true;
+  }
+
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* b = reinterpret_cast<Scalar*>(pb);
+
+  int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3);
+  if(code>=16 || func[code]==0)
+    return 0;
+
+  func[code](*n, a, *lda, b, *incb);
+  return 1;
+}
+*/
+
+/*
+int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb)
+{
+  // TODO
+
+  typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int);
+  functype func[16];
+
+  static bool init = false;
+  if(!init)
+  {
+    for(int k=0; k<16; ++k)
+      func[k] = 0;
+
+//     func[NOTR  | (UP << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector<Scalar,UpperTriangular|0,          true, ColMajor,false,ColMajor,false,ColMajor>::run);
+//     func[TR    | (UP << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector<Scalar,UpperTriangular|0,          true, RowMajor,false,ColMajor,false,ColMajor>::run);
+//     func[ADJ   | (UP << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector<Scalar,UpperTriangular|0,          true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
+//
+//     func[NOTR  | (LO << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector<Scalar,LowerTriangular|0,          true, ColMajor,false,ColMajor,false,ColMajor>::run);
+//     func[TR    | (LO << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector<Scalar,LowerTriangular|0,          true, RowMajor,false,ColMajor,false,ColMajor>::run);
+//     func[ADJ   | (LO << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector<Scalar,LowerTriangular|0,          true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
+//
+//     func[NOTR  | (UP << 2) | (UNIT  << 3)] = (ei_product_triangular_matrix_vector<Scalar,UpperTriangular|UnitDiagBit,true, ColMajor,false,ColMajor,false,ColMajor>::run);
+//     func[TR    | (UP << 2) | (UNIT  << 3)] = (ei_product_triangular_matrix_vector<Scalar,UpperTriangular|UnitDiagBit,true, RowMajor,false,ColMajor,false,ColMajor>::run);
+//     func[ADJ   | (UP << 2) | (UNIT  << 3)] = (ei_product_triangular_matrix_vector<Scalar,UpperTriangular|UnitDiagBit,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
+//
+//     func[NOTR  | (LO << 2) | (UNIT  << 3)] = (ei_product_triangular_matrix_vector<Scalar,LowerTriangular|UnitDiagBit,true, ColMajor,false,ColMajor,false,ColMajor>::run);
+//     func[TR    | (LO << 2) | (UNIT  << 3)] = (ei_product_triangular_matrix_vector<Scalar,LowerTriangular|UnitDiagBit,true, RowMajor,false,ColMajor,false,ColMajor>::run);
+//     func[ADJ   | (LO << 2) | (UNIT  << 3)] = (ei_product_triangular_matrix_vector<Scalar,LowerTriangular|UnitDiagBit,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
+
+    init = true;
+  }
+
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* b = reinterpret_cast<Scalar*>(pb);
+
+  int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3);
+  if(code>=16 || func[code]==0)
+    return 0;
+
+  func[code](*n, a, *lda, b, *incb, b, *incb);
+  return 1;
+}
+*/
+
+/*
+int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *inca, RealScalar *pc, int *ldc)
+{
+  // TODO
+  typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar);
+  functype func[2];
+
+  static bool init = false;
+  if(!init)
+  {
+    for(int k=0; k<2; ++k)
+      func[k] = 0;
+
+//     func[UP] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run);
+//     func[LO] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run);
+
+    init = true;
+  }
+
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+
+  int code = UPLO(*uplo);
+  if(code>=2 || func[code]==0)
+    return 0;
+
+  func[code](*n, a, *inca, c, *ldc, alpha);
+  return 1;
+}
+*/
+
+/*
+int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *inca, RealScalar *pb, int *incb, RealScalar *pc, int *ldc)
+{
+  // TODO
+  typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
+  functype func[2];
+
+  static bool init = false;
+  if(!init)
+  {
+    for(int k=0; k<2; ++k)
+      func[k] = 0;
+
+//     func[UP] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run);
+//     func[LO] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run);
+
+    init = true;
+  }
+
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* b = reinterpret_cast<Scalar*>(pb);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+
+  int code = UPLO(*uplo);
+  if(code>=2 || func[code]==0)
+    return 0;
+
+  func[code](*n, a, *inca, b, *incb, c, *ldc, alpha);
+  return 1;
+}
+*/
+
+#if ISCOMPLEX
+
+#endif // ISCOMPLEX
--- a/blas/level3_impl.h
+++ b/blas/level3_impl.h
@ -0,0 +1,365 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "common.h"
+
+int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
+{
+  typedef void (*functype)(int, int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
+  functype func[12];
+
+  static bool init = false;
+  if(!init)
+  {
+    for(int k=0; k<12; ++k)
+      func[k] = 0;
+    func[NOTR  | (NOTR << 2)] = (ei_general_matrix_matrix_product<Scalar,ColMajor,false,ColMajor,false,ColMajor>::run);
+    func[TR    | (NOTR << 2)] = (ei_general_matrix_matrix_product<Scalar,RowMajor,false,ColMajor,false,ColMajor>::run);
+    func[ADJ   | (NOTR << 2)] = (ei_general_matrix_matrix_product<Scalar,RowMajor,Conj, ColMajor,false,ColMajor>::run);
+    func[NOTR  | (TR   << 2)] = (ei_general_matrix_matrix_product<Scalar,ColMajor,false,RowMajor,false,ColMajor>::run);
+    func[TR    | (TR   << 2)] = (ei_general_matrix_matrix_product<Scalar,RowMajor,false,RowMajor,false,ColMajor>::run);
+    func[ADJ   | (TR   << 2)] = (ei_general_matrix_matrix_product<Scalar,RowMajor,Conj, RowMajor,false,ColMajor>::run);
+    func[NOTR  | (ADJ  << 2)] = (ei_general_matrix_matrix_product<Scalar,ColMajor,false,RowMajor,Conj, ColMajor>::run);
+    func[TR    | (ADJ  << 2)] = (ei_general_matrix_matrix_product<Scalar,RowMajor,false,RowMajor,Conj, ColMajor>::run);
+    func[ADJ   | (ADJ  << 2)] = (ei_general_matrix_matrix_product<Scalar,RowMajor,Conj, RowMajor,Conj, ColMajor>::run);
+    init = true;
+  }
+
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* b = reinterpret_cast<Scalar*>(pb);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha  = *reinterpret_cast<Scalar*>(palpha);
+  Scalar beta   = *reinterpret_cast<Scalar*>(pbeta);
+
+  if(beta!=Scalar(1))
+    matrix(c, *m, *n, *ldc) *= beta;
+
+  int code = OP(*opa) | (OP(*opb) << 2);
+  if(code>=12 || func[code]==0)
+    return 0;
+
+  func[code](*m, *n, *k, a, *lda, b, *ldb, c, *ldc, alpha);
+  return 1;
+}
+
+int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha,  RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
+{
+  typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int);
+  functype func[32];
+
+  static bool init = false;
+  if(!init)
+  {
+    for(int k=0; k<32; ++k)
+      func[k] = 0;
+
+    func[NOTR  | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|0,          false,ColMajor,ColMajor>::run);
+    func[TR    | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|0,          false,RowMajor,ColMajor>::run);
+    func[ADJ   | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|0,          Conj, RowMajor,ColMajor>::run);
+
+    func[NOTR  | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|0,          false,ColMajor,ColMajor>::run);
+    func[TR    | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|0,          false,RowMajor,ColMajor>::run);
+    func[ADJ   | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|0,          Conj, RowMajor,ColMajor>::run);
+
+    func[NOTR  | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|0,          false,ColMajor,ColMajor>::run);
+    func[TR    | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|0,          false,RowMajor,ColMajor>::run);
+    func[ADJ   | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|0,          Conj, RowMajor,ColMajor>::run);
+
+    func[NOTR  | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|0,          false,ColMajor,ColMajor>::run);
+    func[TR    | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|0,          false,RowMajor,ColMajor>::run);
+    func[ADJ   | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|0,          Conj, RowMajor,ColMajor>::run);
+
+
+    func[NOTR  | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|UnitDiagBit,false,ColMajor,ColMajor>::run);
+    func[TR    | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|UnitDiagBit,false,RowMajor,ColMajor>::run);
+    func[ADJ   | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, UpperTriangular|UnitDiagBit,Conj, RowMajor,ColMajor>::run);
+
+    func[NOTR  | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|UnitDiagBit,false,ColMajor,ColMajor>::run);
+    func[TR    | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|UnitDiagBit,false,RowMajor,ColMajor>::run);
+    func[ADJ   | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,UpperTriangular|UnitDiagBit,Conj, RowMajor,ColMajor>::run);
+
+    func[NOTR  | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|UnitDiagBit,false,ColMajor,ColMajor>::run);
+    func[TR    | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|UnitDiagBit,false,RowMajor,ColMajor>::run);
+    func[ADJ   | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheLeft, LowerTriangular|UnitDiagBit,Conj, RowMajor,ColMajor>::run);
+
+    func[NOTR  | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|UnitDiagBit,false,ColMajor,ColMajor>::run);
+    func[TR    | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|UnitDiagBit,false,RowMajor,ColMajor>::run);
+    func[ADJ   | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (ei_triangular_solve_matrix<Scalar,OnTheRight,LowerTriangular|UnitDiagBit,Conj, RowMajor,ColMajor>::run);
+
+    init = true;
+  }
+
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* b = reinterpret_cast<Scalar*>(pb);
+  Scalar  alpha = *reinterpret_cast<Scalar*>(palpha);
+
+  // TODO handle alpha
+
+  int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
+  if(code>=32 || func[code]==0)
+    return 0;
+
+  func[code](*m, *n, a, *lda, b, *ldb);
+  return 1;
+}
+
+
+// b = alpha*op(a)*b  for side = 'L'or'l'
+// b = alpha*b*op(a)  for side = 'R'or'r'
+int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha,  RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
+{
+  typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
+  functype func[32];
+
+  static bool init = false;
+  if(!init)
+  {
+    for(int k=0; k<32; ++k)
+      func[k] = 0;
+
+    func[NOTR  | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0,          true, ColMajor,false,ColMajor,false,ColMajor>::run);
+    func[TR    | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0,          true, RowMajor,false,ColMajor,false,ColMajor>::run);
+    func[ADJ   | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0,          true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
+
+    func[NOTR  | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0,          false,ColMajor,false,ColMajor,false,ColMajor>::run);
+    func[TR    | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0,          false,ColMajor,false,RowMajor,false,ColMajor>::run);
+    func[ADJ   | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|0,          false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
+
+    func[NOTR  | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0,          true, ColMajor,false,ColMajor,false,ColMajor>::run);
+    func[TR    | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0,          true, RowMajor,false,ColMajor,false,ColMajor>::run);
+    func[ADJ   | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0,          true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
+
+    func[NOTR  | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0,          false,ColMajor,false,ColMajor,false,ColMajor>::run);
+    func[TR    | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0,          false,ColMajor,false,RowMajor,false,ColMajor>::run);
+    func[ADJ   | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|0,          false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
+
+    func[NOTR  | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,true, ColMajor,false,ColMajor,false,ColMajor>::run);
+    func[TR    | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,true, RowMajor,false,ColMajor,false,ColMajor>::run);
+    func[ADJ   | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
+
+    func[NOTR  | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,false,ColMajor,false,ColMajor,false,ColMajor>::run);
+    func[TR    | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,false,ColMajor,false,RowMajor,false,ColMajor>::run);
+    func[ADJ   | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,UpperTriangular|UnitDiagBit,false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
+
+    func[NOTR  | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,true, ColMajor,false,ColMajor,false,ColMajor>::run);
+    func[TR    | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,true, RowMajor,false,ColMajor,false,ColMajor>::run);
+    func[ADJ   | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
+
+    func[NOTR  | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,false,ColMajor,false,ColMajor,false,ColMajor>::run);
+    func[TR    | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,false,ColMajor,false,RowMajor,false,ColMajor>::run);
+    func[ADJ   | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)] = (ei_product_triangular_matrix_matrix<Scalar,LowerTriangular|UnitDiagBit,false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
+
+    init = true;
+  }
+
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* b = reinterpret_cast<Scalar*>(pb);
+  Scalar  alpha = *reinterpret_cast<Scalar*>(palpha);
+
+  int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
+  if(code>=32 || func[code]==0)
+    return 0;
+
+  func[code](*m, *n, a, *lda, b, *ldb, b, *ldb, alpha);
+  return 1;
+}
+
+// c = alpha*a*b + beta*c  for side = 'L'or'l'
+// c = alpha*b*a + beta*c  for side = 'R'or'r
+int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
+{
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* b = reinterpret_cast<Scalar*>(pb);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+  Scalar beta  = *reinterpret_cast<Scalar*>(pbeta);
+
+  if(beta!=Scalar(1))
+    matrix(c, *m, *n, *ldc) *= beta;
+
+  if(SIDE(*side)==LEFT)
+    if(UPLO(*uplo)==UP)
+      ei_product_selfadjoint_matrix<Scalar, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
+    else if(UPLO(*uplo)==LO)
+      ei_product_selfadjoint_matrix<Scalar, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
+    else
+      return 0;
+  else if(SIDE(*side)==RIGHT)
+    if(UPLO(*uplo)==UP)
+      ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
+    else if(UPLO(*uplo)==LO)
+      ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
+    else
+      return 0;
+  else
+    return 0;
+
+  return 1;
+}
+
+// c = alpha*a*a' + beta*c  for op = 'N'or'n'
+// c = alpha*a'*a + beta*c  for op = 'T'or't','C'or'c'
+int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc)
+{
+  typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int, Scalar);
+  functype func[8];
+
+  static bool init = false;
+  if(!init)
+  {
+    for(int k=0; k<8; ++k)
+      func[k] = 0;
+
+    func[NOTR  | (UP << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, UpperTriangular>::run);
+    func[TR    | (UP << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,UpperTriangular>::run);
+    func[ADJ   | (UP << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,UpperTriangular>::run);
+
+    func[NOTR  | (LO << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, LowerTriangular>::run);
+    func[TR    | (LO << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,LowerTriangular>::run);
+    func[ADJ   | (LO << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,LowerTriangular>::run);
+
+    init = true;
+  }
+
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+  Scalar beta  = *reinterpret_cast<Scalar*>(pbeta);
+
+  int code = OP(*op) | (UPLO(*uplo) << 2);
+  if(code>=8 || func[code]==0)
+    return 0;
+
+  if(beta!=Scalar(1))
+    matrix(c, *n, *n, *ldc) *= beta;
+
+  func[code](*n, *k, a, *lda, c, *ldc, alpha);
+  return 1;
+}
+
+// c = alpha*a*b' + alpha*b*a' + beta*c  for op = 'N'or'n'
+// c = alpha*a'*b + alpha*b'*a + beta*c  for op = 'T'or't'
+int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
+{
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* b = reinterpret_cast<Scalar*>(pb);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+  Scalar beta  = *reinterpret_cast<Scalar*>(pbeta);
+
+  // TODO
+
+  return 0;
+}
+
+
+#if ISCOMPLEX
+
+// c = alpha*a*b + beta*c  for side = 'L'or'l'
+// c = alpha*b*a + beta*c  for side = 'R'or'r
+int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
+{
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* b = reinterpret_cast<Scalar*>(pb);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+  Scalar beta  = *reinterpret_cast<Scalar*>(pbeta);
+
+  if(beta!=Scalar(1))
+    matrix(c, *m, *n, *ldc) *= beta;
+
+  if(SIDE(*side)==LEFT)
+    if(UPLO(*uplo)==UP)
+      ei_product_selfadjoint_matrix<Scalar, RowMajor,true,Conj,  ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
+    else if(UPLO(*uplo)==LO)
+      ei_product_selfadjoint_matrix<Scalar, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
+    else
+      return 0;
+  else if(SIDE(*side)==RIGHT)
+    if(UPLO(*uplo)==UP)
+      ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, RowMajor,true,Conj,  ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
+    else if(UPLO(*uplo)==LO)
+      ei_product_selfadjoint_matrix<Scalar, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
+    else
+      return 0;
+  else
+    return 0;
+
+  return 1;
+}
+
+// c = alpha*a*conj(a') + beta*c  for op = 'N'or'n'
+// c = alpha*conj(a')*a + beta*c  for op  = 'C'or'c'
+int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc)
+{
+  typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int, Scalar);
+  functype func[8];
+
+  static bool init = false;
+  if(!init)
+  {
+    for(int k=0; k<8; ++k)
+      func[k] = 0;
+
+    func[NOTR  | (UP << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, UpperTriangular>::run);
+    func[ADJ   | (UP << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,UpperTriangular>::run);
+
+    func[NOTR  | (LO << 2)] = (ei_selfadjoint_product<Scalar,ColMajor,ColMajor,true, LowerTriangular>::run);
+    func[ADJ   | (LO << 2)] = (ei_selfadjoint_product<Scalar,RowMajor,ColMajor,false,LowerTriangular>::run);
+
+    init = true;
+  }
+
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+  Scalar beta  = *reinterpret_cast<Scalar*>(pbeta);
+
+  int code = OP(*op) | (UPLO(*uplo) << 2);
+  if(code>=8 || func[code]==0)
+    return 0;
+
+  if(beta!=Scalar(1))
+    matrix(c, *n, *n, *ldc) *= beta;
+
+  func[code](*n, *k, a, *lda, c, *ldc, alpha);
+  return 1;
+}
+
+// c = alpha*a*conj(b') + conj(alpha)*b*conj(a') + beta*c,  for op = 'N'or'n'
+// c = alpha*conj(b')*a + conj(alpha)*conj(a')*b + beta*c,  for op = 'C'or'c'
+int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc)
+{
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* b = reinterpret_cast<Scalar*>(pb);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+  Scalar beta  = *reinterpret_cast<Scalar*>(pbeta);
+
+  // TODO
+
+  return 0;
+}
+
+#endif // ISCOMPLEX
--- a/blas/single.cpp
+++ b/blas/single.cpp
@ -0,0 +1,31 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#define SCALAR        float
+#define SCALAR_SUFFIX s
+#define ISCOMPLEX     0
+
+#include "level1_impl.h"
+#include "level2_impl.h"
+#include "level3_impl.h"