mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-18 14:34:17 +08:00
optimize "apply Jacobi" for small sizes, and move it to Jacobi.h
This commit is contained in:
parent
f2536416da
commit
13e95f7f68
1
Eigen/QR
1
Eigen/QR
@ -6,6 +6,7 @@
|
|||||||
#include "src/Core/util/DisableMSVCWarnings.h"
|
#include "src/Core/util/DisableMSVCWarnings.h"
|
||||||
|
|
||||||
#include "Cholesky"
|
#include "Cholesky"
|
||||||
|
#include "Jacobi"
|
||||||
|
|
||||||
// Note that EIGEN_HIDE_HEAVY_CODE has to be defined per module
|
// Note that EIGEN_HIDE_HEAVY_CODE has to be defined per module
|
||||||
#if (defined EIGEN_EXTERN_INSTANTIATIONS) && (EIGEN_EXTERN_INSTANTIATIONS>=2)
|
#if (defined EIGEN_EXTERN_INSTANTIATIONS) && (EIGEN_EXTERN_INSTANTIATIONS>=2)
|
||||||
|
@ -1,127 +0,0 @@
|
|||||||
// This file is part of Eigen, a lightweight C++ template library
|
|
||||||
// for linear algebra.
|
|
||||||
//
|
|
||||||
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
|
|
||||||
//
|
|
||||||
// Eigen is free software; you can redistribute it and/or
|
|
||||||
// modify it under the terms of the GNU Lesser General Public
|
|
||||||
// License as published by the Free Software Foundation; either
|
|
||||||
// version 3 of the License, or (at your option) any later version.
|
|
||||||
//
|
|
||||||
// Alternatively, you can redistribute it and/or
|
|
||||||
// modify it under the terms of the GNU General Public License as
|
|
||||||
// published by the Free Software Foundation; either version 2 of
|
|
||||||
// the License, or (at your option) any later version.
|
|
||||||
//
|
|
||||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
||||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
|
||||||
// GNU General Public License for more details.
|
|
||||||
//
|
|
||||||
// You should have received a copy of the GNU Lesser General Public
|
|
||||||
// License and a copy of the GNU General Public License along with
|
|
||||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
#ifndef EIGEN_ROTATION_IN_THE_PLANE_H
|
|
||||||
#define EIGEN_ROTATION_IN_THE_PLANE_H
|
|
||||||
|
|
||||||
/**********************************************************************
|
|
||||||
* This file implement ...
|
|
||||||
**********************************************************************/
|
|
||||||
|
|
||||||
template<typename Scalar, int Incr>
|
|
||||||
struct ei_apply_rotation_in_the_plane_selector;
|
|
||||||
|
|
||||||
template<typename VectorX, typename VectorY>
|
|
||||||
void ei_apply_rotation_in_the_plane(VectorX& x, VectorY& y, typename VectorX::Scalar c, typename VectorY::Scalar s)
|
|
||||||
{
|
|
||||||
ei_assert(x.size() == y.size());
|
|
||||||
int size = x.size();
|
|
||||||
int incrx = size ==1 ? 1 : &x.coeffRef(1) - &x.coeffRef(0);
|
|
||||||
int incry = size ==1 ? 1 : &y.coeffRef(1) - &y.coeffRef(0);
|
|
||||||
if (incrx==1 && incry==1)
|
|
||||||
ei_apply_rotation_in_the_plane_selector<typename VectorX::Scalar,1>
|
|
||||||
::run(&x.coeffRef(0), &y.coeffRef(0), x.size(), c, s, 1, 1);
|
|
||||||
else
|
|
||||||
ei_apply_rotation_in_the_plane_selector<typename VectorX::Scalar,Dynamic>
|
|
||||||
::run(&x.coeffRef(0), &y.coeffRef(0), x.size(), c, s, incrx, incry);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename Scalar>
|
|
||||||
struct ei_apply_rotation_in_the_plane_selector<Scalar,Dynamic>
|
|
||||||
{
|
|
||||||
static void run(Scalar* x, Scalar* y, int size, Scalar c, Scalar s, int incrx, int incry)
|
|
||||||
{
|
|
||||||
for(int i=0; i<size; ++i)
|
|
||||||
{
|
|
||||||
Scalar xi = *x;
|
|
||||||
Scalar yi = *y;
|
|
||||||
*x = c * xi - s * yi;
|
|
||||||
*y = s * xi + c * yi;
|
|
||||||
x += incrx;
|
|
||||||
y += incry;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// both vectors are sequentially stored in memory => vectorization
|
|
||||||
template<typename Scalar>
|
|
||||||
struct ei_apply_rotation_in_the_plane_selector<Scalar,1>
|
|
||||||
{
|
|
||||||
static void run(Scalar* x, Scalar* y, int size, Scalar c, Scalar s, int, int)
|
|
||||||
{
|
|
||||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
|
||||||
enum { PacketSize = ei_packet_traits<Scalar>::size, Peeling = 2 };
|
|
||||||
int alignedStart = ei_alignmentOffset(y, size);
|
|
||||||
int alignedEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
|
|
||||||
|
|
||||||
const Packet pc = ei_pset1(c);
|
|
||||||
const Packet ps = ei_pset1(s);
|
|
||||||
|
|
||||||
for(int i=0; i<alignedStart; ++i)
|
|
||||||
{
|
|
||||||
Scalar xi = x[i];
|
|
||||||
Scalar yi = y[i];
|
|
||||||
x[i] = c * xi - s * yi;
|
|
||||||
y[i] = s * xi + c * yi;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* px = x + alignedStart;
|
|
||||||
Scalar* py = y + alignedStart;
|
|
||||||
|
|
||||||
if(ei_alignmentOffset(x, size)==alignedStart)
|
|
||||||
for(int i=alignedStart; i<alignedEnd; i+=PacketSize)
|
|
||||||
{
|
|
||||||
Packet xi = ei_pload(px);
|
|
||||||
Packet yi = ei_pload(py);
|
|
||||||
ei_pstore(px, ei_psub(ei_pmul(pc,xi),ei_pmul(ps,yi)));
|
|
||||||
ei_pstore(py, ei_padd(ei_pmul(ps,xi),ei_pmul(pc,yi)));
|
|
||||||
px += PacketSize;
|
|
||||||
py += PacketSize;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
for(int i=alignedStart; i<alignedEnd; i+=Peeling*PacketSize)
|
|
||||||
{
|
|
||||||
Packet xi = ei_ploadu(px);
|
|
||||||
Packet xi1 = ei_ploadu(px+PacketSize);
|
|
||||||
Packet yi = ei_pload (py);
|
|
||||||
Packet yi1 = ei_pload (py+PacketSize);
|
|
||||||
ei_pstoreu(px, ei_psub(ei_pmul(pc,xi),ei_pmul(ps,yi)));
|
|
||||||
ei_pstoreu(px+PacketSize, ei_psub(ei_pmul(pc,xi1),ei_pmul(ps,yi1)));
|
|
||||||
ei_pstore (py, ei_padd(ei_pmul(ps,xi),ei_pmul(pc,yi)));
|
|
||||||
ei_pstore (py+PacketSize, ei_padd(ei_pmul(ps,xi1),ei_pmul(pc,yi1)));
|
|
||||||
px += Peeling*PacketSize;
|
|
||||||
py += Peeling*PacketSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int i=alignedEnd; i<size; ++i)
|
|
||||||
{
|
|
||||||
Scalar xi = x[i];
|
|
||||||
Scalar yi = y[i];
|
|
||||||
x[i] = c * xi - s * yi;
|
|
||||||
y[i] = s * xi + c * yi;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif // EIGEN_ROTATION_IN_THE_PLANE_H
|
|
@ -2,6 +2,7 @@
|
|||||||
// for linear algebra.
|
// for linear algebra.
|
||||||
//
|
//
|
||||||
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||||
|
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
|
||||||
//
|
//
|
||||||
// Eigen is free software; you can redistribute it and/or
|
// Eigen is free software; you can redistribute it and/or
|
||||||
// modify it under the terms of the GNU Lesser General Public
|
// modify it under the terms of the GNU Lesser General Public
|
||||||
@ -25,6 +26,9 @@
|
|||||||
#ifndef EIGEN_JACOBI_H
|
#ifndef EIGEN_JACOBI_H
|
||||||
#define EIGEN_JACOBI_H
|
#define EIGEN_JACOBI_H
|
||||||
|
|
||||||
|
template<typename VectorX, typename VectorY>
|
||||||
|
void ei_apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, typename VectorX::Scalar c, typename VectorY::Scalar s);
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
inline void MatrixBase<Derived>::applyJacobiOnTheLeft(int p, int q, Scalar c, Scalar s)
|
inline void MatrixBase<Derived>::applyJacobiOnTheLeft(int p, int q, Scalar c, Scalar s)
|
||||||
{
|
{
|
||||||
@ -101,5 +105,98 @@ inline void ei_normalizeJacobi(Scalar *c, Scalar *s, const Scalar& x, const Scal
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename VectorX, typename VectorY>
|
||||||
|
void /*EIGEN_DONT_INLINE*/ ei_apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, typename VectorX::Scalar c, typename VectorY::Scalar s)
|
||||||
|
{
|
||||||
|
typedef typename VectorX::Scalar Scalar;
|
||||||
|
ei_assert(_x.size() == _y.size());
|
||||||
|
int size = _x.size();
|
||||||
|
int incrx = size ==1 ? 1 : &_x.coeffRef(1) - &_x.coeffRef(0);
|
||||||
|
int incry = size ==1 ? 1 : &_y.coeffRef(1) - &_y.coeffRef(0);
|
||||||
|
|
||||||
|
Scalar* EIGEN_RESTRICT x = &_x.coeffRef(0);
|
||||||
|
Scalar* EIGEN_RESTRICT y = &_y.coeffRef(0);
|
||||||
|
|
||||||
|
if (incrx==1 && incry==1)
|
||||||
|
{
|
||||||
|
// both vectors are sequentially stored in memory => vectorization
|
||||||
|
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||||
|
enum { PacketSize = ei_packet_traits<Scalar>::size, Peeling = 2 };
|
||||||
|
|
||||||
|
int alignedStart = ei_alignmentOffset(y, size);
|
||||||
|
int alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
|
||||||
|
|
||||||
|
const Packet pc = ei_pset1(c);
|
||||||
|
const Packet ps = ei_pset1(s);
|
||||||
|
|
||||||
|
for(int i=0; i<alignedStart; ++i)
|
||||||
|
{
|
||||||
|
Scalar xi = x[i];
|
||||||
|
Scalar yi = y[i];
|
||||||
|
x[i] = c * xi - s * yi;
|
||||||
|
y[i] = s * xi + c * yi;
|
||||||
|
}
|
||||||
|
|
||||||
|
Scalar* px = x + alignedStart;
|
||||||
|
Scalar* py = y + alignedStart;
|
||||||
|
|
||||||
|
if(ei_alignmentOffset(x, size)==alignedStart)
|
||||||
|
{
|
||||||
|
for(int i=alignedStart; i<alignedEnd; i+=PacketSize)
|
||||||
|
{
|
||||||
|
Packet xi = ei_pload(px);
|
||||||
|
Packet yi = ei_pload(py);
|
||||||
|
ei_pstore(px, ei_psub(ei_pmul(pc,xi),ei_pmul(ps,yi)));
|
||||||
|
ei_pstore(py, ei_padd(ei_pmul(ps,xi),ei_pmul(pc,yi)));
|
||||||
|
px += PacketSize;
|
||||||
|
py += PacketSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
|
||||||
|
for(int i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
|
||||||
|
{
|
||||||
|
Packet xi = ei_ploadu(px);
|
||||||
|
Packet xi1 = ei_ploadu(px+PacketSize);
|
||||||
|
Packet yi = ei_pload (py);
|
||||||
|
Packet yi1 = ei_pload (py+PacketSize);
|
||||||
|
ei_pstoreu(px, ei_psub(ei_pmul(pc,xi),ei_pmul(ps,yi)));
|
||||||
|
ei_pstoreu(px+PacketSize, ei_psub(ei_pmul(pc,xi1),ei_pmul(ps,yi1)));
|
||||||
|
ei_pstore (py, ei_padd(ei_pmul(ps,xi),ei_pmul(pc,yi)));
|
||||||
|
ei_pstore (py+PacketSize, ei_padd(ei_pmul(ps,xi1),ei_pmul(pc,yi1)));
|
||||||
|
px += Peeling*PacketSize;
|
||||||
|
py += Peeling*PacketSize;
|
||||||
|
}
|
||||||
|
if(alignedEnd!=peelingEnd)
|
||||||
|
{
|
||||||
|
Packet xi = ei_ploadu(x+peelingEnd);
|
||||||
|
Packet yi = ei_pload (y+peelingEnd);
|
||||||
|
ei_pstoreu(x+peelingEnd, ei_psub(ei_pmul(pc,xi),ei_pmul(ps,yi)));
|
||||||
|
ei_pstore (y+peelingEnd, ei_padd(ei_pmul(ps,xi),ei_pmul(pc,yi)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i=alignedEnd; i<size; ++i)
|
||||||
|
{
|
||||||
|
Scalar xi = x[i];
|
||||||
|
Scalar yi = y[i];
|
||||||
|
x[i] = c * xi - s * yi;
|
||||||
|
y[i] = s * xi + c * yi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(int i=0; i<size; ++i)
|
||||||
|
{
|
||||||
|
Scalar xi = *x;
|
||||||
|
Scalar yi = *y;
|
||||||
|
*x = c * xi - s * yi;
|
||||||
|
*y = s * xi + c * yi;
|
||||||
|
x += incrx;
|
||||||
|
y += incry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // EIGEN_JACOBI_H
|
#endif // EIGEN_JACOBI_H
|
||||||
|
Loading…
Reference in New Issue
Block a user